From 0756fadda6074d56d45d8f40268f1342947bb217 Mon Sep 17 00:00:00 2001
From: Nilesh Patra <nilesh@debian.org>
Date: Fri, 15 Oct 2021 00:58:59 +0530
Subject: [PATCH] New upstream version 0.17.0+ds

---
 .github/workflows/ci.yaml                     |  152 ++
 .github/workflows/release.yaml                |  115 ++
 .gitignore                                    |    3 +
 .travis.yml => .travis.disabled.yml           |   17 +
 AUTHORS                                       |    9 +
 INSTALL                                       |    2 +-
 MANIFEST.in                                   |   50 +-
 NEWS                                          |    8 +-
 README.rst                                    |    2 +-
 bcftools/HMM.c                                |    2 +-
 bcftools/HMM.c.pysam.c                        |    2 +-
 bcftools/HMM.h                                |    2 +-
 bcftools/LICENSE                              |   27 +-
 bcftools/README                               |   22 +
 bcftools/abuf.c                               |  713 +++++++
 bcftools/abuf.c.pysam.c                       |  715 +++++++
 bcftools/abuf.h                               |   78 +
 bcftools/bam2bcf.c                            |  397 +++-
 bcftools/bam2bcf.c.pysam.c                    |  397 +++-
 bcftools/bam2bcf.h                            |   39 +-
 bcftools/bam2bcf_indel.c                      | 1110 ++++++++---
 bcftools/bam2bcf_indel.c.pysam.c              | 1110 ++++++++---
 bcftools/bcftools.h                           |   67 +-
 bcftools/bcftools.pysam.c                     |   20 +
 bcftools/bcftools.pysam.h                     |   13 +
 bcftools/bin.c                                |    1 +
 bcftools/bin.c.pysam.c                        |    1 +
 bcftools/call.h                               |   36 +-
 bcftools/ccall.c                              |    5 +-
 bcftools/ccall.c.pysam.c                      |    5 +-
 bcftools/consensus.c                          |  512 +++--
 bcftools/consensus.c.pysam.c                  |  514 +++--
 bcftools/convert.c                            |   63 +-
 bcftools/convert.c.pysam.c                    |   69 +-
 bcftools/csq.c                                |  203 +-
 bcftools/csq.c.pysam.c                        |  203 +-
 bcftools/dist.c                               |  124 ++
 bcftools/dist.c.pysam.c                       |  126 ++
 bcftools/dist.h                               |   98 +
 bcftools/em.c                                 |    2 +-
 bcftools/em.c.pysam.c                         |    2 +-
 bcftools/extsort.c                            |  250 +++
 bcftools/extsort.c.pysam.c                    |  252 +++
 bcftools/extsort.h                            |   56 +
 bcftools/filter.c                             |  747 +++++--
 bcftools/filter.c.pysam.c                     |  749 +++++--
 bcftools/filter.h                             |   14 +-
 bcftools/hclust.c                             |    1 +
 bcftools/hclust.c.pysam.c                     |    1 +
 bcftools/htslib-1.10.2/LICENSE                |   69 -
 bcftools/htslib-1.10.2/README                 |    5 -
 bcftools/main.c                               |    8 +-
 bcftools/main.c.pysam.c                       |    8 +-
 bcftools/mcall.c                              |  764 ++++---
 bcftools/mcall.c.pysam.c                      |  764 ++++---
 bcftools/mpileup.c                            |  536 ++++-
 bcftools/mpileup.c.pysam.c                    |  566 ++++--
 bcftools/ploidy.h                             |    2 +-
 bcftools/prob1.c                              |    2 +-
 bcftools/prob1.c.pysam.c                      |    2 +-
 bcftools/prob1.h                              |    2 +-
 bcftools/rbuf.h                               |    2 +-
 bcftools/regidx.c                             |    2 +-
 bcftools/regidx.c.pysam.c                     |    2 +-
 bcftools/regidx.h                             |    2 +-
 bcftools/reheader.c                           |  100 +-
 bcftools/reheader.c.pysam.c                   |  102 +-
 bcftools/smpl_ilist.c                         |    2 +-
 bcftools/smpl_ilist.c.pysam.c                 |    2 +-
 bcftools/str_finder.c                         |  270 +++
 bcftools/str_finder.c.pysam.c                 |  272 +++
 bcftools/str_finder.h                         |   64 +
 bcftools/utlist.h                             |  761 +++++++
 bcftools/vcfannotate.c                        |  795 ++++++--
 bcftools/vcfannotate.c.pysam.c                |  797 ++++++--
 bcftools/vcfbuf.c                             |  214 +-
 bcftools/vcfbuf.c.pysam.c                     |  214 +-
 bcftools/vcfbuf.h                             |   44 +-
 bcftools/vcfcall.c                            |  106 +-
 bcftools/vcfcall.c.pysam.c                    |  114 +-
 bcftools/vcfcnv.c                             |    1 +
 bcftools/vcfcnv.c.pysam.c                     |    3 +-
 bcftools/vcfconcat.c                          |   13 +-
 bcftools/vcfconcat.c.pysam.c                  |   15 +-
 bcftools/vcfconvert.c                         |   42 +-
 bcftools/vcfconvert.c.pysam.c                 |   44 +-
 bcftools/vcffilter.c                          |   66 +-
 bcftools/vcffilter.c.pysam.c                  |   68 +-
 bcftools/vcfgtcheck.c                         | 1613 +++++++++------
 bcftools/vcfgtcheck.c.pysam.c                 | 1615 +++++++++------
 bcftools/vcfindex.c                           |  155 +-
 bcftools/vcfindex.c.pysam.c                   |  157 +-
 bcftools/vcfisec.c                            |   11 +-
 bcftools/vcfisec.c.pysam.c                    |   13 +-
 bcftools/vcfmerge.c                           |  638 +++++-
 bcftools/vcfmerge.c.pysam.c                   |  644 +++++-
 bcftools/vcfnorm.c                            |  479 +++--
 bcftools/vcfnorm.c.pysam.c                    |  481 +++--
 bcftools/vcfplugin.c                          |   93 +-
 bcftools/vcfplugin.c.pysam.c                  |   95 +-
 bcftools/vcfquery.c                           |   13 +-
 bcftools/vcfquery.c.pysam.c                   |   15 +-
 bcftools/vcfroh.c                             |   54 +-
 bcftools/vcfroh.c.pysam.c                     |   62 +-
 bcftools/vcfsom.c                             |   16 +-
 bcftools/vcfsom.c.pysam.c                     |   18 +-
 bcftools/vcfsort.c                            |   42 +-
 bcftools/vcfsort.c.pysam.c                    |   46 +-
 bcftools/vcfstats.c                           |  177 +-
 bcftools/vcfstats.c.pysam.c                   |  185 +-
 bcftools/vcfview.c                            |   20 +-
 bcftools/vcfview.c.pysam.c                    |   30 +-
 bcftools/vcmp.c                               |    2 +-
 bcftools/vcmp.c.pysam.c                       |    2 +-
 bcftools/vcmp.h                               |    2 +-
 bcftools/version.c                            |   14 +-
 bcftools/version.c.pysam.c                    |   18 +-
 bcftools/version.sh                           |   25 +-
 cy_build.py                                   |    6 +-
 devtools/import.py                            |   29 +-
 devtools/install-CGAT-tools.sh                |    5 +-
 devtools/run_tests_travis.sh                  |    7 +-
 doc/api.rst                                   |   30 +-
 doc/benchmarking.rst                          |    2 +
 doc/conf.py                                   |   16 +-
 doc/developer.rst                             |   13 +-
 doc/faq.rst                                   |   13 +-
 doc/glossary.rst                              |   13 +
 doc/index.rst                                 |   21 +-
 doc/installation.rst                          |    2 +-
 doc/release.rst                               |   70 +-
 doc/usage.rst                                 |    3 +-
 import/pysam.c                                |   20 +
 import/pysam.h                                |   13 +
 pysam.py                                      |    1 -
 pysam/__init__.py                             |    4 +-
 pysam/libcalignedsegment.pxd                  |    6 +-
 pysam/libcalignedsegment.pyx                  |   64 +-
 pysam/libcalignmentfile.pxd                   |   20 +-
 pysam/libcalignmentfile.pyx                   |   71 +-
 pysam/libcbcf.pyx                             |   39 +-
 pysam/libcbcftools.pxd                        |    2 +-
 pysam/libcfaidx.pyx                           |    4 +-
 pysam/libchtslib.pxd                          |   23 +-
 pysam/libchtslib.pyx                          |    6 +-
 pysam/libcsamtools.pxd                        |    2 +-
 pysam/libctabix.pyx                           |   60 +-
 pysam/libcutils.pxd                           |   16 +-
 pysam/libcutils.pyx                           |   70 +-
 pysam/samtools.py                             |    4 +
 pysam/version.h                               |    6 +-
 pysam/version.py                              |    8 +-
 samtools/LICENSE                              |    2 +-
 samtools/README                               |   35 +-
 samtools/amplicon_stats.c                     | 1754 ++++++++++++++++
 samtools/amplicon_stats.c.pysam.c             | 1756 +++++++++++++++++
 samtools/bam.c                                |   20 +-
 samtools/bam.c.pysam.c                        |   20 +-
 samtools/bam.h                                |    4 +-
 samtools/bam2bcf_indel.c                      |    4 +
 samtools/bam2bcf_indel.c.pysam.c              |    4 +
 samtools/bam2depth.c                          | 1160 ++++++++---
 samtools/bam2depth.c.pysam.c                  | 1160 ++++++++---
 samtools/bam_addrprg.c                        |   42 +-
 samtools/bam_addrprg.c.pysam.c                |   42 +-
 samtools/bam_ampliconclip.c                   | 1079 ++++++++++
 samtools/bam_ampliconclip.c.pysam.c           | 1081 ++++++++++
 samtools/bam_ampliconclip.h                   |   54 +
 samtools/bam_aux.c                            |    6 +-
 samtools/bam_aux.c.pysam.c                    |    6 +-
 samtools/bam_cat.c                            |   24 +-
 samtools/bam_cat.c.pysam.c                    |   24 +-
 samtools/bam_color.c                          |   24 +-
 samtools/bam_color.c.pysam.c                  |   24 +-
 samtools/bam_fastq.c                          |  950 ++++-----
 samtools/bam_fastq.c.pysam.c                  |  950 ++++-----
 samtools/bam_flags.c                          |   66 +-
 samtools/bam_flags.c.pysam.c                  |   66 +-
 samtools/bam_import.c                         |  487 +++++
 samtools/bam_import.c.pysam.c                 |  489 +++++
 samtools/bam_index.c.pysam.c                  |    2 +-
 samtools/bam_markdup.c                        |  675 +++++--
 samtools/bam_markdup.c.pysam.c                |  675 +++++--
 samtools/bam_mate.c                           |   12 +-
 samtools/bam_mate.c.pysam.c                   |   12 +-
 samtools/bam_md.c                             |  185 +-
 samtools/bam_md.c.pysam.c                     |  185 +-
 samtools/bam_plcmd.c                          |   66 +-
 samtools/bam_plcmd.c.pysam.c                  |   94 +-
 samtools/bam_reheader.c.pysam.c               |    4 +-
 samtools/bam_rmdupse.c.pysam.c                |    6 +-
 samtools/bam_sort.c                           |  533 ++++-
 samtools/bam_sort.c.pysam.c                   |  533 ++++-
 samtools/bam_stat.c                           |  113 +-
 samtools/bam_stat.c.pysam.c                   |  115 +-
 samtools/bamtk.c                              |   94 +-
 samtools/bamtk.c.pysam.c                      |   99 +-
 samtools/bedcov.c                             |  103 +-
 samtools/bedcov.c.pysam.c                     |  103 +-
 samtools/bedidx.c                             |    8 +
 samtools/bedidx.c.pysam.c                     |    8 +
 samtools/coverage.c                           |  241 ++-
 samtools/coverage.c.pysam.c                   |  241 ++-
 samtools/cut_target.c                         |   12 +-
 samtools/cut_target.c.pysam.c                 |   12 +-
 samtools/dict.c                               |   28 +-
 samtools/dict.c.pysam.c                       |   34 +-
 samtools/faidx.c                              |   53 +-
 samtools/faidx.c.pysam.c                      |   53 +-
 samtools/htslib-1.10/LICENSE                  |   69 -
 samtools/htslib-1.10/README                   |    5 -
 samtools/padding.c                            |   61 +-
 samtools/padding.c.pysam.c                    |   61 +-
 samtools/phase.c                              |    9 +-
 samtools/phase.c.pysam.c                      |    9 +-
 samtools/sam_view.c                           |  452 +++--
 samtools/sam_view.c.pysam.c                   |  452 +++--
 samtools/samtools.pysam.c                     |   20 +
 samtools/samtools.pysam.h                     |   13 +
 samtools/stats.c                              |   82 +-
 samtools/stats.c.pysam.c                      |   84 +-
 samtools/stats_isize.c.pysam.c                |    2 +-
 samtools/tmp_file.h                           |    2 +-
 samtools/version.sh                           |    2 +-
 setup.py                                      |  130 +-
 tests/AlignedSegment_test.py                  |   13 +-
 tests/AlignmentFileHeader_test.py             |    6 +-
 tests/AlignmentFilePileup_test.py             |    6 +-
 tests/AlignmentFile_test.py                   |   53 +-
 tests/StreamFiledescriptors_test.py           |    6 +-
 tests/TestUtils.py                            |   13 +
 tests/VariantFile_test.py                     |   21 +-
 tests/VariantRecord_test.py                   |    6 +-
 tests/cbcf_data/Makefile                      |    8 +-
 tests/compile_test.py                         |    8 +-
 tests/faidx_test.py                           |    6 +-
 ...ader.bam => 0example_no_seq_in_header.bam} |  Bin
 ... 0example_no_seq_in_header_null_bytes.bam} |  Bin
 tests/pysam_data/Makefile                     |   38 +-
 tests/pysam_data/ex1.sam.gz                   |  Bin 113194 -> 109698 bytes
 tests/refactoring.txt                         |    2 +-
 tests/samtools_test.py                        |   18 +-
 tests/tabix_data/Makefile                     |    7 +
 tests/tabix_data/example.bed.gz.tbi           |  Bin 192 -> 190 bytes
 tests/tabix_data/example.gff3.gz.tbi          |  Bin 1457 -> 1454 bytes
 tests/tabix_data/example.gtf.gz.tbi           |  Bin 196 -> 196 bytes
 tests/tabix_data/example.vcf.gz.tbi           |  Bin 182 -> 180 bytes
 .../tabix_data/example_badcomments.bed.gz.tbi |  Bin 194 -> 194 bytes
 .../tabix_data/example_badcomments.gtf.gz.tbi |  Bin 198 -> 198 bytes
 .../tabix_data/example_badcomments.vcf.gz.tbi |  Bin 186 -> 184 bytes
 tests/tabix_data/example_comments.bed.gz.tbi  |  Bin 194 -> 194 bytes
 tests/tabix_data/example_comments.gtf.gz.tbi  |  Bin 198 -> 198 bytes
 tests/tabix_data/example_comments.vcf.gz.tbi  |  Bin 186 -> 184 bytes
 tests/tabix_test.py                           |    8 +-
 tests/tabixproxies_test.py                    |    6 +-
 tests/test_samtools_python.py                 |    6 +-
 256 files changed, 31299 insertions(+), 9372 deletions(-)
 create mode 100644 .github/workflows/ci.yaml
 create mode 100644 .github/workflows/release.yaml
 rename .travis.yml => .travis.disabled.yml (81%)
 create mode 100644 bcftools/abuf.c
 create mode 100644 bcftools/abuf.c.pysam.c
 create mode 100644 bcftools/abuf.h
 create mode 100644 bcftools/dist.c
 create mode 100644 bcftools/dist.c.pysam.c
 create mode 100644 bcftools/dist.h
 create mode 100644 bcftools/extsort.c
 create mode 100644 bcftools/extsort.c.pysam.c
 create mode 100644 bcftools/extsort.h
 delete mode 100644 bcftools/htslib-1.10.2/LICENSE
 delete mode 100644 bcftools/htslib-1.10.2/README
 create mode 100644 bcftools/str_finder.c
 create mode 100644 bcftools/str_finder.c.pysam.c
 create mode 100644 bcftools/str_finder.h
 create mode 100644 bcftools/utlist.h
 delete mode 100644 pysam.py
 create mode 100644 samtools/amplicon_stats.c
 create mode 100644 samtools/amplicon_stats.c.pysam.c
 create mode 100644 samtools/bam_ampliconclip.c
 create mode 100644 samtools/bam_ampliconclip.c.pysam.c
 create mode 100644 samtools/bam_ampliconclip.h
 create mode 100644 samtools/bam_import.c
 create mode 100644 samtools/bam_import.c.pysam.c
 delete mode 100644 samtools/htslib-1.10/LICENSE
 delete mode 100644 samtools/htslib-1.10/README
 rename tests/pysam_data/{example_no_seq_in_header.bam => 0example_no_seq_in_header.bam} (100%)
 rename tests/pysam_data/{example_no_seq_in_header_null_bytes.bam => 0example_no_seq_in_header_null_bytes.bam} (100%)
 create mode 100644 tests/tabix_data/Makefile

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 0000000..4075f1c
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,152 @@
+name: CI
+
+# on: [push, pull_request]
+on: [pull_request]
+
+jobs:
+  direct:
+    runs-on: ${{ matrix.os }}-latest
+    strategy:
+      matrix:
+        os: [ubuntu, macos]
+        python-version: [2.7, 3.6, 3.7, 3.8, 3.9]
+        exclude:
+          # Run only the latest 2.x and 3.x on macOS
+          - os: macos
+            python-version: 3.6
+          - os: macos
+            python-version: 3.7
+          - os: macos
+            python-version: 3.8
+
+    steps:
+      - name: Checkout pysam
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install prerequisite Python libraries
+        run:  pip install cython pytest pytest-pep8
+
+      - name: Install build prerequisites
+        if:   runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -q --no-install-recommends --no-install-suggests libcurl4-openssl-dev
+
+      - name: Build (directly from checkout)
+        run:  python setup.py build
+
+      - name: Install test prerequisites
+        run: |
+          case $RUNNER_OS in
+          Linux)
+              sudo apt-get install -q --no-install-recommends --no-install-suggests samtools bcftools tabix
+              ;;
+          macOS)
+              brew install -q samtools bcftools
+              ;;
+          esac
+
+      - name: Run tests
+        run: |
+          export PYTHONPATH=$(echo $GITHUB_WORKSPACE/build/lib.*)
+          export REF_PATH=':'
+          pytest
+
+
+  sdist:
+    runs-on: ${{ matrix.os }}-latest
+    strategy:
+      matrix:
+        os: [ubuntu, macos]
+        python-version: [3.9]
+
+    steps:
+      - name: Checkout pysam
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install prerequisite Python libraries
+        run:  pip install cython pytest pytest-pep8
+
+      - name: Install build prerequisites
+        if:   runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -q --no-install-recommends --no-install-suggests libcurl4-openssl-dev
+
+      - name: Create source distribution
+        run:  python setup.py sdist --owner=root --group=root
+
+      - name: Build (via sdist tarball)
+        run:  pip install --verbose --no-deps --no-binary=':all:' pysam-*.tar.gz
+        working-directory: dist
+
+      - name: Install test prerequisites
+        run: |
+          case $RUNNER_OS in
+          Linux)
+              sudo apt-get install -q --no-install-recommends --no-install-suggests samtools bcftools tabix
+              ;;
+          macOS)
+              brew install -q samtools bcftools
+              ;;
+          esac
+
+      - name: Run tests
+        run:  REF_PATH=':' pytest
+
+      - name: Upload sdist tarball
+        if:   runner.os == 'Linux'
+        uses: actions/upload-artifact@v2
+        with:
+          name: sdist
+          path: dist/pysam-*.tar.gz
+          retention-days: 14
+
+
+  conda:
+    timeout-minutes: 20
+    runs-on: ${{ matrix.os }}-latest
+    strategy:
+      matrix:
+        os: [ubuntu]
+        python-version: [3.7]
+    defaults:
+      run:
+        shell: bash -l {0}  # needed for conda activation
+    env:
+      HTSLIB_CONFIGURE_OPTIONS: "--disable-libcurl"
+
+    steps:
+      - name: Checkout pysam
+        uses: actions/checkout@v2
+
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          channel-priority: strict
+          activate-environment: testenv
+          auto-activate-base: false
+          use-only-tar-bz2: true
+
+      - name: Set up Conda and Python ${{ matrix.python-version }}
+        run: |
+          conda config --add channels bioconda --add channels conda-forge
+          conda install python=${{ matrix.python-version }} cython
+
+      - name: Build (directly from checkout)
+        run:  python setup.py install
+
+      - name: Install test prerequisites via Conda
+        run:  conda install "samtools>=1.11" "bcftools>=1.11" "htslib>=1.11" pytest
+
+      - name: Run tests
+        run:  REF_PATH=':' pytest
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
new file mode 100644
index 0000000..bbc954f
--- /dev/null
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,115 @@
+name: Publish pysam wheels to PyPI and TestPyPI
+
+on:
+  push:
+    branches:
+      - v[0-9]+.[0-9]+.x
+    tags:
+      - v*
+  release:
+    types:
+      - published
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-20.04, macos-10.15]  # windows-2019, 
+        
+    steps:
+      - name: Checkout pysam
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+        
+      - name: Install prerequisite Python libraries
+        run:  |
+          python -m pip install --upgrade pip        
+          pip install cython pytest pytest-pep8
+
+      - name: Build wheels for linux
+        if:   runner.os == 'Linux'
+        uses: pypa/cibuildwheel@v2.1.2
+        env:
+          CIBW_BUILD: cp36-* cp37-* cp38-* cp39-*
+          CIBW_BEFORE_BUILD: yum install -y libcurl-devel zlib-devel bzip2-devel xz-devel && pip install cython
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
+          CIBW_MANYLINUX_I686_IMAGE: manylinux1
+
+      - name: Build wheels for macos
+        if:   runner.os != 'Linux'
+        uses: pypa/cibuildwheel@v2.1.2
+        env:
+          CIBW_BUILD: cp36-* cp37-* cp38-* cp39-*
+          CIBW_BEFORE_BUILD: pip install cython
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+
+    runs-on: ${{ matrix.os }}-latest
+    strategy:
+      matrix:
+        os: [ubuntu, macos]
+        python-version: [3.9]
+
+    steps:
+      - name: Checkout pysam
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install prerequisite Python libraries
+        run:  pip install cython pytest pytest-pep8
+
+      - name: Install build prerequisites
+        if:   runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -q --no-install-recommends --no-install-suggests libcurl4-openssl-dev
+
+      - name: Create source distribution
+        run:  python setup.py sdist
+
+      - uses: actions/upload-artifact@v2
+        with:
+          path: dist/*.tar.gz
+
+  upload_pypi:
+
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Get Artifacts
+        uses: actions/download-artifact@v2
+        with:
+          name: artifact
+          path: dist
+
+      - name: Publish distribution to Test PyPI
+        if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
+        uses: pypa/gh-action-pypi-publish@master
+        with:
+          user: __token__
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
+
+      - name: Publish distribution to PyPI
+        if: github.event_name == 'release' && github.event.action == 'published'
+        uses: pypa/gh-action-pypi-publish@master
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
+
diff --git a/.gitignore b/.gitignore
index b07a532..6ec2d26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,11 +17,14 @@ tests/cbcf_data
 tests/tabix_data
 
 samtools/config.h
+samtools/samtools_config_vars.h
 bcftools/config.h
 htslib/config.status
 htslib/config.h
 htslib/config.log
 htslib/config.mk
+htslib/config_vars.h
+htslib/htscodecs.mk
 htslib/htslib.pc.tmp
 htslib/htslib-uninstalled.pc
 pysam/config.py
diff --git a/.travis.yml b/.travis.disabled.yml
similarity index 81%
rename from .travis.yml
rename to .travis.disabled.yml
index 47ce194..5b7bcc8 100644
--- a/.travis.yml
+++ b/.travis.disabled.yml
@@ -39,6 +39,16 @@ _cibw_linux: &cibw_linux
     - docker
   <<: *cibw_common
 
+_cibw_linux_aarch64: &cibw_linux_aarch64
+  stage: deploy
+  os: linux
+  arch: arm64
+  language: python
+  python: '3.9'
+  services:
+    - docker
+  <<: *cibw_common
+
 matrix:
   include:
     - stage: deploy
@@ -74,6 +84,13 @@ matrix:
         - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
         - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
         - CIBW_TEST_COMMAND='python -c "import pysam"'
+    - <<: *cibw_linux_aarch64
+      env:
+        - CIBW_BUILD="*_aarch64"
+        - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+        - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+        - CIBW_TEST_COMMAND='python -c "import pysam"'
     - stage: deploy
       os: osx
       language: generic
diff --git a/AUTHORS b/AUTHORS
index 4b00536..4e9c5eb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,8 +1,17 @@
+Many people have contributed to pysam. The list of github contributors
+is the best place to get a full list of authors and their contributions.
+The list and summary below is a out-of-date and represents the earlier
+stages of the project.
+
 List of contributors:
 
 Andreas Heger, Tildon Grant Belgard, Florian Finkernagel, Leo
 Goodstadt, Martin Goodson all contributed code to pysam.
 
+John Marshall has been looking after pysam and its community for
+several years, as well as making many code contributions and improving
+the engineering of pysam.
+
 Kevin B. Jacobs implemented a Cython wrapper for the VCF/BCF
 reader/writer in htslib.
 
diff --git a/INSTALL b/INSTALL
index 9636125..5016dcc 100644
--- a/INSTALL
+++ b/INSTALL
@@ -47,7 +47,7 @@ features. If these fail, for example due to missing library
 dependencies (`libcurl`, `libcrypto`), it will fall back to
 conservative defaults.
 
-Options can be passed to the configure script explicitely by
+Options can be passed to the configure script explicitly by
 setting the environment variable `HTSLIB_CONFIGURE_OPTIONS`.
 For example::
 
diff --git a/MANIFEST.in b/MANIFEST.in
index aaacb22..25e9a1a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -16,45 +16,39 @@ include pysam/libc*.pyx
 include pysam/libc*.c
 include pysam/*.c
 include pysam/*.h
+exclude pysam/config.py
+
+include win32/*.[ch]
 
 # exclude tests from pypi tar-ball - they
 # require additional data
 prune tests/
 
 # samtools
-include samtools/configure
-include samtools/config.mk.in
-include samtools/config.h.in
-include samtools/*.h
-include samtools/*.c
-exclude samtools/config.h
-include samtools/*/*.h
+include samtools/LICENSE samtools/README samtools/lz4/LICENSE
+recursive-include samtools *.[ch]
+include samtools/version.sh
+exclude samtools/*config*.h
 
 # bcftools
-include bcftools/*.h
-include bcftools/*.c
-exclude bcftools/config.h
+include bcftools/LICENSE bcftools/README
+include bcftools/*.[ch]
+include bcftools/version.sh
+exclude bcftools/*config*.h
 
 # htslib
-include htslib/*.c
-include htslib/*.h
-include htslib/INSTALL
-include htslib/NEWS
-exclude htslib/config.h
-include htslib/Makefile
-include htslib/htslib_vars.mk
-include htslib/configure
-include htslib/config.mk.in
-include htslib/config.h.in
-include htslib/htslib.pc.in
-include htslib/htslib/*.h
-include htslib/cram/*.c
-include htslib/cram/*.h
-include htslib/os/*.c
-include htslib/os/*.h
+include htslib/LICENSE htslib/README
+recursive-include htslib *.[ch]
+exclude htslib/*config*.h
+
+include htslib/configure.ac htslib/m4/*.m4 htslib/*.in
+include htslib/configure htslib/version.sh
+include htslib/Makefile htslib/*.mk
+exclude htslib/config.mk htslib/htscodecs.mk
+
 include cy_build.py
-include pysam.py
 include requirements.txt
 
 # documentation
-include doc/*
+include doc/*.py doc/*.rst
+include doc/Makefile doc/make.bat
diff --git a/NEWS b/NEWS
index 49ce485..75d9249 100644
--- a/NEWS
+++ b/NEWS
@@ -209,7 +209,7 @@ Release 0.11.2
 ==============
 
 This release wraps htslib/samtools/bcfools versions 1.4.1 in response
-to a security fix in these libraries. Additionaly the following
+to a security fix in these libraries. Additionally the following
 issues have been fixed:
 
 * [#452] add GFF3 support for tabix parsers
@@ -330,7 +330,7 @@ Overview
 --------
 
 The 0.9.0 release upgrades htslib to htslib 1.3 and numerous other
-enchancements and bugfixes. See below for a detailed list.
+enhancements and bugfixes. See below for a detailed list.
 
 `Htslib 1.3 <https://github.com/samtools/htslib/releases/tag/1.3>`_
 comes with additional capabilities for remote file access which depend
@@ -373,7 +373,7 @@ Detailed release notes
      and code bloat.
    * run configure for the builtin htslib library in order to detect
      optional libraries such as libcurl. Configure behaviour can be
-     controlled by setting the environmet variable
+     controlled by setting the environment variable
      HTSLIB_CONFIGURE_OPTIONS.
 * get_reference_sequence() now returns the reference sequence and not
   something looking like it. This bug had effects on
@@ -576,7 +576,7 @@ Other changes:
 
 Backwards incompatible changes
 
-* Empty cigarstring now returns None (intstead of '')
+* Empty cigarstring now returns None (instead of '')
 * Empty cigar now returns None (instead of [])
 * When using the extension classes in cython modules, AlignedRead
   needs to be substituted with AlignedSegment.
diff --git a/README.rst b/README.rst
index 4efa827..368984a 100644
--- a/README.rst
+++ b/README.rst
@@ -25,7 +25,7 @@ as it resolves non-python dependencies and uses pre-configured
 compilation options. Especially for OS X this will potentially save a
 lot of trouble.
 
-The current version of pysam wraps 3rd-party code from htslib-1.10.2, samtools-1.10, and bcftools-1.10.2.
+The current version of pysam wraps 3rd-party code from htslib-1.13, samtools-1.13, and bcftools-1.13.
 
 Pysam is available through `pypi
 <https://pypi.python.org/pypi/pysam>`_. To install, type::
diff --git a/bcftools/HMM.c b/bcftools/HMM.c
index 70ad8d6..c2d302f 100644
--- a/bcftools/HMM.c
+++ b/bcftools/HMM.c
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2014-2015 Genome Research Ltd.
+   Copyright (c) 2014-2017 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/HMM.c.pysam.c b/bcftools/HMM.c.pysam.c
index 2280c0d..d039367 100644
--- a/bcftools/HMM.c.pysam.c
+++ b/bcftools/HMM.c.pysam.c
@@ -2,7 +2,7 @@
 
 /* The MIT License
 
-   Copyright (c) 2014-2015 Genome Research Ltd.
+   Copyright (c) 2014-2017 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/HMM.h b/bcftools/HMM.h
index 70c9cb8..3a6cab3 100644
--- a/bcftools/HMM.h
+++ b/bcftools/HMM.h
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2014-2015 Genome Research Ltd.
+   Copyright (c) 2014-2016 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/LICENSE b/bcftools/LICENSE
index 75aeb6c..f223b09 100644
--- a/bcftools/LICENSE
+++ b/bcftools/LICENSE
@@ -9,7 +9,7 @@ the INSTALL document), the use of this software is governed by the GPL license.
 
 The MIT/Expat License
 
-Copyright (C) 2012-2014 Genome Research Ltd.
+Copyright (C) 2012-2021 Genome Research Ltd.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -746,3 +746,28 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+
+-----------------------------------------------------------------------------
+
+LICENSE for utlist.h
+
+Copyright (c) 2007-2014, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/bcftools/README b/bcftools/README
index 5cb1bbd..fff0cb7 100644
--- a/bcftools/README
+++ b/bcftools/README
@@ -3,3 +3,25 @@ SAMtools) and manipulating VCF and BCF files.  The program is intended
 to replace the Perl-based tools from vcftools.
 
 See INSTALL for building and installation instructions.
+
+Please cite this paper when using BCFtools for your publications:
+
+Twelve years of SAMtools and BCFtools
+Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li
+GigaScience, Volume 10, Issue 2, February 2021, giab008, https://doi.org/10.1093/gigascience/giab008
+
+@article{10.1093/gigascience/giab008,
+    author = {Danecek, Petr and Bonfield, James K and Liddle, Jennifer and Marshall, John and Ohan, Valeriu and Pollard, Martin O and Whitwham, Andrew and Keane, Thomas and McCarthy, Shane A and Davies, Robert M and Li, Heng},
+    title = "{Twelve years of SAMtools and BCFtools}",
+    journal = {GigaScience},
+    volume = {10},
+    number = {2},
+    year = {2021},
+    month = {02},
+    abstract = "{SAMtools and BCFtools are widely used programs for processing and analysing high-throughput sequencing data. They include tools for file format conversion and manipulation, sorting, querying, statistics, variant calling, and effect analysis amongst other methods.The first version appeared online 12 years ago and has been maintained and further developed ever since, with many new features and improvements added over the years. The SAMtools and BCFtools packages represent a unique collection of tools that have been used in numerous other software projects and countless genomic pipelines.Both SAMtools and BCFtools are freely available on GitHub under the permissive MIT licence, free for both non-commercial and commercial use. Both packages have been installed \\&gt;1 million times via Bioconda. The source code and documentation are available from https://www.htslib.org.}",
+    issn = {2047-217X},
+    doi = {10.1093/gigascience/giab008},
+    url = {https://doi.org/10.1093/gigascience/giab008},
+    note = {giab008},
+    eprint = {https://academic.oup.com/gigascience/article-pdf/10/2/giab008/36332246/giab008.pdf},
+}
diff --git a/bcftools/abuf.c b/bcftools/abuf.c
new file mode 100644
index 0000000..5e45e9e
--- /dev/null
+++ b/bcftools/abuf.c
@@ -0,0 +1,713 @@
+/* The MIT License
+
+   Copyright (c) 2021 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <assert.h>
+#include <strings.h>
+#include <htslib/vcf.h>
+#include <ctype.h>
+#include "bcftools.h"
+#include "abuf.h"
+#include "rbuf.h"
+
+typedef enum
+{
+    M_FIRST, M_SUM
+}
+merge_rule_t;
+
+typedef struct
+{
+    kstring_t ref, alt;
+    int ial;        // the index of the original ALT allele, 1-based
+    int beg, end;   // 0-based inclusive offsets to ref,alt
+}
+atom_t;
+
+typedef struct
+{
+    bcf1_t *rec;
+    int nori, nout;     // number of ALTs in the input, and VCF rows on output
+    uint8_t *tbl;       // nori columns, nout rows; indicates allele contribution to output rows, see "The atomization works as follows" below
+    uint8_t *overlaps;  // is the star allele needed for this variant?
+    atom_t **atoms;
+    int matoms, mtbl, moverlaps;
+    char *info_tag;
+}
+split_t;
+
+struct _abuf_t
+{
+    abuf_opt_t mode;
+    split_t split;
+    atom_t *atoms;
+    int natoms, matoms;
+    const bcf_hdr_t *hdr;
+    bcf_hdr_t *out_hdr;
+    bcf1_t **vcf;       // dimensions stored in rbuf
+    rbuf_t rbuf;
+
+    kstring_t tmps;
+    void *tmp, *tmp2;
+    int32_t *gt, *tmpi;
+    int ngt, mgt, ntmpi, mtmpi, mtmp, mtmp2;
+    int star_allele;
+};
+
+abuf_t *abuf_init(const bcf_hdr_t *hdr, abuf_opt_t mode)
+{
+    if ( mode!=SPLIT ) error("todo\n");
+    abuf_t *buf = (abuf_t*) calloc(1,sizeof(abuf_t));
+    buf->hdr  = hdr;
+    buf->out_hdr = (bcf_hdr_t*) hdr;
+    buf->mode = mode;
+    buf->star_allele = 1;
+    rbuf_init(&buf->rbuf, 0);
+    return buf;
+}
+
+void abuf_destroy(abuf_t *buf)
+{
+    int i;
+    for (i=0; i<buf->matoms; i++)
+    {
+        free(buf->atoms[i].ref.s);
+        free(buf->atoms[i].alt.s);
+    }
+    free(buf->atoms);
+    free(buf->split.atoms);
+    free(buf->split.overlaps);
+    free(buf->split.tbl);
+    for (i=0; i<buf->rbuf.m; i++)
+        if ( buf->vcf[i] ) bcf_destroy(buf->vcf[i]);
+    free(buf->vcf);
+    free(buf->gt);
+    free(buf->tmpi);
+    free(buf->tmp);
+    free(buf->tmp2);
+    free(buf->tmps.s);
+    free(buf);
+}
+
+void abuf_set(abuf_t *buf, abuf_opt_t key, void *value)
+{
+    if ( key==BCF_HDR ) { buf->out_hdr = *((bcf_hdr_t**)value); return; }
+    if ( key==INFO_TAG )
+    {
+        buf->split.info_tag = *((char**)value);
+        bcf_hdr_printf(buf->out_hdr,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">",buf->split.info_tag); 
+        return;
+    }
+    if ( key==STAR_ALLELE ) { buf->star_allele = *((int*)value); return; }
+}
+
+/*
+    Split alleles into primitivs, e.g.
+        CC>TT  becomes  C>T,C>T
+        GCGT>GTGA  becomes C>T,T>A
+
+    There is no sequence alignment, just trimming and hungry matching
+    from left side.
+*/
+static void _atomize_allele(abuf_t *buf, bcf1_t *rec, int ial)
+{
+    // Trim identical sequence from right
+    char *ref = rec->d.allele[0];
+    char *alt = rec->d.allele[ial];
+    int rlen = strlen(ref);
+    int alen = strlen(alt);
+    while ( rlen>1 && alen>1 && ref[rlen-1]==alt[alen-1] ) rlen--, alen--;
+    int Mlen = rlen > alen ? rlen : alen;
+
+    atom_t *atom = NULL; 
+    int i;
+    for (i=0; i<Mlen; i++)
+    {
+        char refb = i<rlen ? ref[i] : '-';
+        char altb = i<alen ? alt[i] : '-';
+        if ( refb!=altb )
+        {
+            if ( refb=='-' || altb=='-' )
+            {
+                assert(atom);
+                if ( altb!='-' ) kputc(altb, &atom->alt);
+                if ( refb!='-' ) { kputc(refb, &atom->ref); atom->end++; }
+            }
+            else
+            {
+                buf->natoms++;
+                hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
+                atom = &buf->atoms[buf->natoms-1];
+                atom->ref.l = 0;
+                atom->alt.l = 0;
+                kputc(refb, &atom->ref);
+                kputc(altb, &atom->alt);
+                atom->beg = atom->end = i;
+                atom->ial = ial;
+            }
+            continue;
+        }
+        if ( i+1>=rlen || i+1>=alen )   // is the next base a deletion?
+        {
+            buf->natoms++;
+            hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
+            atom = &buf->atoms[buf->natoms-1];
+            atom->ref.l = 0;
+            atom->alt.l = 0;
+            kputc(refb, &atom->ref);
+            kputc(altb, &atom->alt);
+            atom->beg = atom->end = i;
+            atom->ial = ial;
+        }
+    }
+}
+static int _atoms_inconsistent(const atom_t *a, const atom_t *b)
+{
+    if ( a->beg < b->beg ) return -1;
+    if ( a->beg > b->beg ) return 1;
+    int rcmp = strcasecmp(a->ref.s,b->ref.s);
+    if ( rcmp ) return rcmp;
+    return strcasecmp(a->alt.s,b->alt.s);
+}
+/*
+    For reproducibility of tests on different platforms, we need to guarantee the same order of identical
+    atoms originating from different source ALTs.  Even though they are consistent, different values can be
+    picked for VCF annotations as currently the values from the one that comes first are used.
+*/
+static int _cmp_atoms(const void *aptr, const void *bptr)
+{
+    const atom_t *a = (const atom_t*) aptr;
+    const atom_t *b = (const atom_t*) bptr;
+    int rcmp = _atoms_inconsistent(a,b);
+    if ( rcmp ) return rcmp;
+    if ( a->ial < b->ial ) return -1;
+    if ( a->ial > b->ial ) return 1;
+    return 0;
+}
+static void _split_table_init(abuf_t *buf, bcf1_t *rec, int natoms)
+{
+    buf->split.rec  = rec;
+    buf->split.nori = rec->n_allele - 1;
+    buf->split.nout = 0;
+    hts_expand(uint8_t,buf->split.nori*natoms,buf->split.mtbl,buf->split.tbl);
+    hts_expand(atom_t*,natoms,buf->split.matoms,buf->split.atoms);
+    hts_expand(uint8_t,natoms,buf->split.moverlaps,buf->split.overlaps);
+    memset(buf->split.overlaps,0,sizeof(*buf->split.overlaps)*natoms);
+}
+static void _split_table_new(abuf_t *buf, atom_t *atom)
+{
+    int i, iout = buf->split.nout++;
+    buf->split.atoms[iout] = atom;
+    uint8_t *ptr = buf->split.tbl + iout*buf->split.nori;
+    for (i=0; i<buf->split.nori; i++) ptr[i] = 0;
+    ptr[atom->ial-1] = 1;
+}
+static void _split_table_overlap(abuf_t *buf, int iout, atom_t *atom)
+{
+    uint8_t *ptr = buf->split.tbl + iout*buf->split.nori;
+    ptr[atom->ial-1] = _atoms_inconsistent(atom,buf->split.atoms[iout]) ? 2 : 1;
+    buf->split.overlaps[iout] = 1;
+}
+#if 0
+static void _split_table_print(abuf_t *buf)
+{
+    int i,j;
+    for (i=0; i<buf->split.nout; i++)
+    {
+        atom_t *atom = buf->split.atoms[i];
+        uint8_t *ptr = buf->split.tbl + i*buf->split.nori;
+        fprintf(stderr,"%d\t%s\t%s",(int)buf->split.rec->pos+1+atom->beg,atom->ref.s,atom->alt.s);
+        for (j=0; j<buf->split.nori; j++) fprintf(stderr,"\t%d",(int)ptr[j]);
+        fprintf(stderr,"\n");
+    }
+}
+static void _split_table_print_atoms(abuf_t *buf)
+{
+    int i;
+    for (i=0; i<buf->natoms; i++)
+    {
+        atom_t *atom = &buf->atoms[i];
+        fprintf(stderr,"atom%d %p: ialt=%d %s>%s %d-%d\n",i,atom,atom->ial,atom->ref.s,atom->alt.s,atom->beg,atom->end);
+    }
+}
+#endif
+static inline uint8_t _has_star_allele(abuf_t *buf, int iout)
+{
+    if ( !buf->star_allele ) return 0;
+    return buf->split.overlaps[iout];
+}
+static inline int _split_table_get_ial(abuf_t *buf, int irow, int ial)
+{
+    if ( !ial ) return ial;
+    return buf->split.tbl[irow*buf->split.nori + ial - 1];
+}
+static void _split_table_set_chrom_qual(abuf_t *buf)
+{
+    int iout,j;
+    bcf1_t *rec = buf->split.rec;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        j = rbuf_append(&buf->rbuf);
+        if ( !buf->vcf[j] ) buf->vcf[j] = bcf_init1();
+        bcf1_t *out = buf->vcf[j];
+        bcf_clear1(out);
+
+        atom_t *atom = buf->split.atoms[iout];
+        out->rid = rec->rid;
+        out->pos = rec->pos + atom->beg;
+        bcf_update_id(buf->out_hdr, out, rec->d.id);
+
+        const char *als[3];
+        als[0] = atom->ref.s;
+        als[1] = atom->alt.s;
+        als[2] = "*";
+        int nals = _has_star_allele(buf,iout) ? 3 : 2;
+        bcf_update_alleles(buf->out_hdr, out, als, nals);
+
+        if ( bcf_float_is_missing(rec->qual) )
+            bcf_float_set_missing(out->qual);
+        else
+            out->qual = rec->qual;
+
+        bcf_update_filter(buf->out_hdr, out, rec->d.flt, rec->d.n_flt);
+    }
+}
+static void _split_table_set_info(abuf_t *buf, bcf_info_t *info, merge_rule_t mode)
+{
+    const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,info->key);
+    int type = bcf_hdr_id2type(buf->hdr,BCF_HL_INFO,info->key);
+    int len  = bcf_hdr_id2length(buf->hdr,BCF_HL_INFO,info->key);
+    if ( len==BCF_VL_G ) return;                                                // todo: Number=G INFO tags
+    if ( type==BCF_HT_STR && len!=BCF_VL_FIXED && len!=BCF_VL_VAR ) return;     // todo: Number=A,R,G for strings
+    if ( type==BCF_HT_LONG ) return;                                            // todo: 64bit integers
+
+    bcf1_t *rec = buf->split.rec;
+    int mtmp = ( type==BCF_HT_INT || type==BCF_HT_REAL ) ? buf->mtmp/4 : buf->mtmp;
+    int nval = bcf_get_info_values(buf->hdr,rec,tag,&buf->tmp,&mtmp,type);
+    if ( type==BCF_HT_INT || type==BCF_HT_REAL ) buf->mtmp = mtmp*4;
+
+    // Check for incorrect number of values. Note this check does not consider all values missing
+    // and will remove annotations that don't pass.
+    if ( (len==BCF_VL_A && nval != rec->n_allele - 1) || (len==BCF_VL_R && nval != rec->n_allele) ) return;
+
+    if ( buf->mtmp2 < buf->mtmp )
+    {
+        buf->tmp2  = realloc(buf->tmp2, buf->mtmp);
+        if ( !buf->tmp2 ) error("Failed to alloc %d bytes\n", buf->mtmp);
+        buf->mtmp2 = buf->mtmp;
+    }
+
+    int32_t missing = bcf_int32_missing;
+    void *missing_ptr = (void*)&missing;
+    if ( type==BCF_HT_REAL ) bcf_float_set_missing(*((float*)missing_ptr));
+
+    int iout,i;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int star_allele = _has_star_allele(buf,iout);
+        int ret = 0;
+        if ( len==BCF_VL_FIXED || len==BCF_VL_VAR )
+            ret = bcf_update_info(buf->out_hdr, out, tag, type==BCF_HT_FLAG ? NULL : buf->tmp, nval, type);
+        else if ( len==BCF_VL_A )
+        {
+            int iori = buf->split.atoms[iout]->ial - 1;
+            assert( iori<nval );
+            memcpy(buf->tmp2,buf->tmp+4*iori,4);
+            if ( star_allele )
+                memcpy(buf->tmp2+4,missing_ptr,4);
+            ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, 1 + star_allele, type);
+        }
+        else if ( len==BCF_VL_R )
+        {
+            memcpy(buf->tmp2,buf->tmp,4);   // REF contributes to all records
+            int iori = buf->split.atoms[iout]->ial;
+            assert( iori<nval && iori<=buf->split.nori );
+            memcpy(buf->tmp2+4,buf->tmp+4*iori,4);
+            if ( type==BCF_HT_INT && mode==M_SUM ) 
+            {
+                uint8_t *tbl = buf->split.tbl + iout*buf->split.nori;
+                for (i=iori; i<buf->split.nori; i++)
+                {
+                    if ( tbl[i]==1 ) ((int32_t*)buf->tmp2)[1] += ((int32_t*)buf->tmp)[i+1];
+                }
+            }
+            if ( star_allele )
+                memcpy(buf->tmp2+8,missing_ptr,4);
+            ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, 2 + star_allele, type);
+        }
+        if ( ret!=0 ) error("An error occurred while updating INFO/%s\n",tag);
+    }
+}
+static void _split_table_set_history(abuf_t *buf)
+{
+    int i,j;
+    bcf1_t *rec = buf->split.rec;
+    buf->tmps.l = 0;
+    ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]);
+    for (i=1; i<rec->n_allele; i++)
+    {
+        kputs(rec->d.allele[i],&buf->tmps);
+        if ( i+1<rec->n_allele ) kputc(',',&buf->tmps);
+        else kputc(',',&buf->tmps);
+    }
+    int len = buf->tmps.l;
+    buf->tmps.s[buf->tmps.l-1] = '|';
+
+    for (i=0; i<buf->split.nout; i++)
+    {
+        buf->tmps.l = len;
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,i)];
+        uint8_t *ptr = buf->split.tbl + i*buf->split.nori;
+        for (j=0; j<buf->split.nori; j++)
+        {
+            if ( ptr[j]!=1 ) continue;
+            kputw(j+1,&buf->tmps);
+            kputc(',',&buf->tmps);
+        }
+        buf->tmps.s[--buf->tmps.l] = 0;
+        if ( (bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 )
+            error("An error occurred while updating INFO/%s\n",buf->split.info_tag);
+    }
+}
+static void _split_table_set_gt(abuf_t *buf)
+{
+    int nsmpl = bcf_hdr_nsamples(buf->hdr);
+    if ( !nsmpl ) return;
+
+    bcf1_t *rec = buf->split.rec;
+    buf->ngt = bcf_get_genotypes(buf->hdr, rec, &buf->gt, &buf->mgt);
+    if ( buf->ngt<=0 ) return;
+    else
+        hts_expand(int32_t,buf->ngt,buf->mtmpi,buf->tmpi);
+
+    int iout,i,j;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int star_allele = _has_star_allele(buf,iout);
+        int max_ploidy = buf->ngt/nsmpl;
+        int32_t *src = buf->gt, *dst = buf->tmpi;
+        for (i=0; i<nsmpl; i++)
+        {
+            for (j=0; j<max_ploidy; j++)
+            {
+                if ( src[j]==bcf_int32_vector_end || bcf_gt_is_missing(src[j]) )
+                {
+                    dst[j] = src[j];
+                    continue;
+                }
+                int iori = bcf_gt_allele(src[j]);
+                if ( iori<0 || iori>=rec->n_allele )
+                    error("Out-of-bounds genotypes at %s:%"PRIhts_pos"\n",bcf_seqname(buf->hdr,rec),rec->pos+1);
+                int ial = _split_table_get_ial(buf,iout,iori);
+                if ( ial==2 && !star_allele )
+                    dst[j] = bcf_gt_missing;
+                else
+                    dst[j] = bcf_gt_is_phased(src[j]) ? bcf_gt_phased(ial) : bcf_gt_unphased(ial);
+            }
+            src += max_ploidy;
+            dst += max_ploidy;
+        }
+        bcf_update_genotypes(buf->out_hdr,out,buf->tmpi,buf->ngt);
+    }
+}
+static void _split_table_set_format(abuf_t *buf, bcf_fmt_t *fmt, merge_rule_t mode)
+{
+    int nsmpl = bcf_hdr_nsamples(buf->hdr);
+    if ( !nsmpl ) return;
+
+    const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,fmt->id);
+    if ( tag[0]=='G' && tag[1]=='T' && !tag[2] )        // FORMAT/GT
+    {
+        _split_table_set_gt(buf);
+        return;
+    }
+
+    int type = bcf_hdr_id2type(buf->hdr,BCF_HL_FMT,fmt->id);
+    int len  = bcf_hdr_id2length(buf->hdr,BCF_HL_FMT,fmt->id);
+    if ( type==BCF_HT_STR && len!=BCF_VL_FIXED && len!=BCF_VL_VAR ) return;     // todo: Number=A,R,G for strings
+    if ( type==BCF_HT_LONG ) return;                                            // todo: 64bit integers
+
+    const int num_size = 4;
+    assert( num_size==sizeof(int32_t) && num_size==sizeof(float) );
+    int32_t missing = bcf_int32_missing;
+    void *missing_ptr = (void*)&missing;
+    if ( type==BCF_HT_REAL ) bcf_float_set_missing(*((float*)missing_ptr));
+
+    bcf1_t *rec = buf->split.rec;
+    int mtmp = ( type==BCF_HT_INT || type==BCF_HT_REAL ) ? buf->mtmp/num_size : buf->mtmp;  // number of items
+    int nval = bcf_get_format_values(buf->hdr,rec,tag,&buf->tmp,&mtmp,type);
+    if ( type==BCF_HT_INT || type==BCF_HT_REAL ) buf->mtmp = mtmp*num_size;                 // number of bytes
+
+    if ( len==BCF_VL_G && nval!=nsmpl*rec->n_allele && nval!=nsmpl*rec->n_allele*(rec->n_allele+1)/2 ) return;      // not haploid nor diploid
+
+    // Check for incorrect number of values. Note this check does not consider all values missing
+    // and will remove annotations that don't pass.
+    if ( (len==BCF_VL_A && nval != nsmpl*(rec->n_allele - 1)) || (len==BCF_VL_R && nval != nsmpl*rec->n_allele) ) return;
+
+    // Increase buffer size to accommodate star allele
+    int nval1 = nval / nsmpl;
+    mtmp = buf->mtmp;
+    if ( (len==BCF_VL_A || len==BCF_VL_R) && mtmp < num_size*nsmpl*(nval1+1) ) mtmp = num_size*nsmpl*(nval1+1); // +1 for the possibility of the star allele
+    else if ( len==BCF_VL_G && mtmp < num_size*nsmpl*(nval1+3) ) mtmp = num_size*nsmpl*(nval1+3);
+
+    if ( buf->mtmp2 < mtmp )
+    {
+        buf->tmp2  = realloc(buf->tmp2, mtmp);
+        if ( !buf->tmp2 ) error("Failed to alloc %d bytes\n", mtmp);
+        buf->mtmp2 = mtmp;
+    }
+
+    int iout, i, j;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        int star_allele = _has_star_allele(buf,iout);
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int ret = 0; 
+        if ( len==BCF_VL_FIXED || len==BCF_VL_VAR )
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp, nval, type);
+        else if ( len==BCF_VL_A )
+        {
+            int iori = buf->split.atoms[iout]->ial - 1;
+            assert( iori<nval );
+            for (i=0; i<nsmpl; i++)
+            {
+                void *src = buf->tmp  + nval1*num_size*i;
+                void *dst = buf->tmp2 + num_size*i*(star_allele+1);
+                memcpy(dst,src+iori*num_size,num_size);
+                if ( star_allele )
+                    memcpy(dst+num_size,missing_ptr,num_size);
+            }
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, nsmpl*(star_allele+1), type);
+        }
+        else if ( len==BCF_VL_R )
+        {
+            int iori = buf->split.atoms[iout]->ial;
+            assert( iori<=nval );
+            for (i=0; i<nsmpl; i++)
+            {
+                void *src = buf->tmp  + nval1*num_size*i;
+                void *dst = buf->tmp2 + num_size*i*(star_allele+2);
+                memcpy(dst,src,num_size);
+                memcpy(dst+num_size,src+iori*num_size,num_size);
+
+                if ( type==BCF_HT_INT && mode==M_SUM )
+                {
+                    uint8_t *tbl = buf->split.tbl + iout*buf->split.nori;
+                    for (j=iori; j<buf->split.nori; j++)
+                        if ( tbl[j]==1 ) ((int32_t*)dst)[1] += ((int32_t*)src)[j+1];
+                }
+                if ( star_allele )
+                    memcpy(dst+num_size*2,missing_ptr,num_size);
+            }
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, nsmpl*(star_allele+2), type);
+        }
+        else if ( len==BCF_VL_G )
+        {
+            int iori = buf->split.atoms[iout]->ial;
+            int i01  = bcf_alleles2gt(0,iori);
+            int i11  = bcf_alleles2gt(iori,iori);
+            assert( iori<nval );
+            #define BRANCH(type_t, is_missing, is_vector_end, set_missing, set_vector_end) { \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    type_t *src = (type_t*)buf->tmp + i*nval1; \
+                    type_t *dst = (type_t*)buf->tmp2 + i*3*(1+star_allele); \
+                    int n=0; /* determine ploidy of this genotype */ \
+                    while ( n<nval1 && !(is_vector_end) ) { n++; src++; } \
+                    src = (type_t*)buf->tmp + i*nval1; \
+                    memcpy(dst++,src,sizeof(type)); \
+                    int nmiss = 0, nend = 0; \
+                    if ( n==rec->n_allele ) /* haploid */ \
+                    { \
+                        memcpy(dst++,src+iori,sizeof(type)); \
+                        if ( star_allele ) { nmiss = 1; nend = 3; } \
+                        else nend = 1; \
+                    } \
+                    else if ( n==nval1 ) \
+                    { \
+                        memcpy(dst++,src+i01,sizeof(type)); \
+                        memcpy(dst++,src+i11,sizeof(type)); \
+                        if ( star_allele ) nmiss = 3; \
+                    } \
+                    else if ( n==1 && is_missing ) \
+                    { \
+                        if ( star_allele ) nend = 5; \
+                        else nend = 2; \
+                    } \
+                    else  \
+                        error("Incorrect number of values at %s:%"PRIhts_pos" .. tag=FORMAT/%s Number=G nAlleles=%d nValues=%d, %d-th sample\n", \
+                                bcf_seqname(buf->hdr,rec),rec->pos+1,tag,rec->n_allele,n,i+1); \
+                    for (j=0; j<nmiss; j++) { set_missing; dst++; } \
+                    for (j=0; j<nend; j++) { set_vector_end; dst++; } \
+                } \
+            }
+            switch (type)
+            {
+                case BCF_HT_INT:  BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, *dst=bcf_int32_missing, *dst=bcf_int32_vector_end); break;
+                case BCF_HT_REAL: BRANCH(float, bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), bcf_float_set_missing(*dst), bcf_float_set_vector_end(*dst)); break;
+                default: error("Unexpected case: %d\n", type);
+            }
+            #undef BRANCH
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, 3*(1+star_allele)*nsmpl, type);
+        }
+        if ( ret!=0 ) error("An error occurred while updating FORMAT/%s\n",tag);
+    }
+}
+static inline int _is_acgtn(char *seq)
+{
+    while ( *seq )
+    {
+        char c = toupper(*seq);
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 0;
+        seq++;
+    }
+    return 1;
+}
+/*
+    The atomization works as follows:
+    - Atomize each alternate allele separately by leaving out sequence identical to the reference. No
+      alignment is performed, just greedy trimming of the end, then from left. This operation returns
+      a list of atoms (atom_t) which carry fragments of REF,ALT and their positions as 0-based offsets
+      to the original REF allele
+    - Sort atoms by POS, REF and ALT. Each unique atom (POS+REF+ALT) forms a new VCF record, each
+      with a single ALT.
+    - For each new VCF record determine how to translate the original allele index (iori) to this new
+      record:
+        - 1: the original allele matches the atom
+        - 0: the original allele does not overlap this atom or the overlapping part matches the REF
+             allele
+        - 2 (or equivalently "."): there is a mismatch between the original allele and the atom
+      The mapping is encoded in a table with columns corresponding to the original ALTs and rows
+      to the new POS+ALTs (atoms). The table is initialized to 0, then we set 1's for matching
+      atoms and 2's for overlapping mismatching atoms.
+
+    Note that different ALT alleles can result in the same atom (the same output line) and this code
+    does not know how to reconcile possibly conflicting VCF annotations. This could be improved
+    and merge logic provided, similarly to `merge -l`. For example, the allelic depths (AD) should
+    be summed for the same atomized output allele. However, this level of complexity is not addressed
+    in this initial draft. Higher priority for now is to provide the inverse "join" operation.
+
+    Update 2021-04-09:
+        Tags QS,AD are now automatically incremented as they should be, for both INFO and FORMAT.
+        Note that the code will fail on missing values (todo) and it needs to be generalized and
+        made customizable.
+*/
+void _abuf_split(abuf_t *buf, bcf1_t *rec)
+{
+    int i,j;
+    if ( rec->n_allele < 2 )
+    {
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        int j = rbuf_append(&buf->rbuf);
+        if ( buf->vcf[j] ) bcf_destroy(buf->vcf[j]);
+        buf->vcf[j] = bcf_dup(rec);
+        return;
+    }
+    for (i=1; i<rec->n_allele; i++)
+    {
+        if ( _is_acgtn(rec->d.allele[i]) ) continue;
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        int j = rbuf_append(&buf->rbuf);
+        if ( buf->vcf[j] ) bcf_destroy(buf->vcf[j]);
+        buf->vcf[j] = bcf_dup(rec);
+        return;
+    }
+
+    buf->natoms = 0;
+    for (i=1; i<rec->n_allele; i++) _atomize_allele(buf,rec,i);
+    qsort(buf->atoms,buf->natoms,sizeof(*buf->atoms),_cmp_atoms);
+    _split_table_init(buf,rec,buf->natoms);
+    for (i=0; i<buf->natoms; i++)
+    {
+        if ( i && !_atoms_inconsistent(&buf->atoms[i-1],&buf->atoms[i]) ) continue;
+        _split_table_new(buf, &buf->atoms[i]);  // add a new unique output atom
+    }
+    for (i=0; i<buf->natoms; i++)
+    {
+        // Looping over sorted list of all atoms with possible duplicates from different source ALT alleles
+        atom_t *atom = &buf->atoms[i];
+        for (j=0; j<buf->split.nout; j++)
+        {
+            atom_t *out = buf->split.atoms[j];
+            if ( atom == out ) continue;            // table already set to 1
+            if ( atom->beg > out->end ) continue;   // cannot overlap this output atom
+            if ( atom->end < out->beg ) break;      // this atom is ahead of all subsequent output records
+            _split_table_overlap(buf, j, atom);
+        }
+    }
+    assert( !buf->rbuf.n ); // all records should be flushed first in the SPLIT mode
+
+    // Create the output records, transferring all annotations:
+    // CHROM-QUAL
+    _split_table_set_chrom_qual(buf);
+
+    // INFO
+    for (i=0; i<rec->n_info; i++)
+    {
+        // this implementation of merging rules is temporary: generalize and made customizable through the API
+        merge_rule_t mode = M_FIRST;
+        const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,rec->d.info[i].key);
+        if ( !strcmp(tag,"QS") || !strcmp(tag,"AD") ) mode = M_SUM;
+
+        _split_table_set_info(buf, &rec->d.info[i], mode);
+    }
+
+    // Set INFO tag showing the original record
+    if ( buf->split.info_tag )
+        _split_table_set_history(buf);
+
+    // FORMAT
+    for (i=0; i<rec->n_fmt; i++)
+    {
+        // this implementation of merging rules is temporary: generalize and made customizable through the API
+        merge_rule_t mode = M_FIRST;
+        const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,rec->d.fmt[i].id);
+        if ( !strcmp(tag,"QS") || !strcmp(tag,"AD") ) mode = M_SUM;
+
+        _split_table_set_format(buf, &rec->d.fmt[i], mode);
+    }
+}
+
+void abuf_push(abuf_t *buf, bcf1_t *rec)
+{
+    bcf_unpack(rec, BCF_UN_ALL);
+    if ( buf->mode==SPLIT ) _abuf_split(buf,rec);
+}
+
+bcf1_t *abuf_flush(abuf_t *buf, int flush_all)
+{
+    int i;
+
+    if ( buf->rbuf.n==0 ) return NULL;
+    if ( flush_all ) goto ret;
+
+ret:
+    i = rbuf_shift(&buf->rbuf);
+    return buf->vcf[i];
+}
+
diff --git a/bcftools/abuf.c.pysam.c b/bcftools/abuf.c.pysam.c
new file mode 100644
index 0000000..811ef10
--- /dev/null
+++ b/bcftools/abuf.c.pysam.c
@@ -0,0 +1,715 @@
+#include "bcftools.pysam.h"
+
+/* The MIT License
+
+   Copyright (c) 2021 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <assert.h>
+#include <strings.h>
+#include <htslib/vcf.h>
+#include <ctype.h>
+#include "bcftools.h"
+#include "abuf.h"
+#include "rbuf.h"
+
+typedef enum
+{
+    M_FIRST, M_SUM
+}
+merge_rule_t;
+
+typedef struct
+{
+    kstring_t ref, alt;
+    int ial;        // the index of the original ALT allele, 1-based
+    int beg, end;   // 0-based inclusive offsets to ref,alt
+}
+atom_t;
+
+typedef struct
+{
+    bcf1_t *rec;
+    int nori, nout;     // number of ALTs in the input, and VCF rows on output
+    uint8_t *tbl;       // nori columns, nout rows; indicates allele contribution to output rows, see "The atomization works as follows" below
+    uint8_t *overlaps;  // is the star allele needed for this variant?
+    atom_t **atoms;
+    int matoms, mtbl, moverlaps;
+    char *info_tag;
+}
+split_t;
+
+struct _abuf_t
+{
+    abuf_opt_t mode;
+    split_t split;
+    atom_t *atoms;
+    int natoms, matoms;
+    const bcf_hdr_t *hdr;
+    bcf_hdr_t *out_hdr;
+    bcf1_t **vcf;       // dimensions stored in rbuf
+    rbuf_t rbuf;
+
+    kstring_t tmps;
+    void *tmp, *tmp2;
+    int32_t *gt, *tmpi;
+    int ngt, mgt, ntmpi, mtmpi, mtmp, mtmp2;
+    int star_allele;
+};
+
+abuf_t *abuf_init(const bcf_hdr_t *hdr, abuf_opt_t mode)
+{
+    if ( mode!=SPLIT ) error("todo\n");
+    abuf_t *buf = (abuf_t*) calloc(1,sizeof(abuf_t));
+    buf->hdr  = hdr;
+    buf->out_hdr = (bcf_hdr_t*) hdr;
+    buf->mode = mode;
+    buf->star_allele = 1;
+    rbuf_init(&buf->rbuf, 0);
+    return buf;
+}
+
+void abuf_destroy(abuf_t *buf)
+{
+    int i;
+    for (i=0; i<buf->matoms; i++)
+    {
+        free(buf->atoms[i].ref.s);
+        free(buf->atoms[i].alt.s);
+    }
+    free(buf->atoms);
+    free(buf->split.atoms);
+    free(buf->split.overlaps);
+    free(buf->split.tbl);
+    for (i=0; i<buf->rbuf.m; i++)
+        if ( buf->vcf[i] ) bcf_destroy(buf->vcf[i]);
+    free(buf->vcf);
+    free(buf->gt);
+    free(buf->tmpi);
+    free(buf->tmp);
+    free(buf->tmp2);
+    free(buf->tmps.s);
+    free(buf);
+}
+
+void abuf_set(abuf_t *buf, abuf_opt_t key, void *value)
+{
+    if ( key==BCF_HDR ) { buf->out_hdr = *((bcf_hdr_t**)value); return; }
+    if ( key==INFO_TAG )
+    {
+        buf->split.info_tag = *((char**)value);
+        bcf_hdr_printf(buf->out_hdr,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">",buf->split.info_tag); 
+        return;
+    }
+    if ( key==STAR_ALLELE ) { buf->star_allele = *((int*)value); return; }
+}
+
+/*
+    Split alleles into primitivs, e.g.
+        CC>TT  becomes  C>T,C>T
+        GCGT>GTGA  becomes C>T,T>A
+
+    There is no sequence alignment, just trimming and hungry matching
+    from left side.
+*/
+static void _atomize_allele(abuf_t *buf, bcf1_t *rec, int ial)
+{
+    // Trim identical sequence from right
+    char *ref = rec->d.allele[0];
+    char *alt = rec->d.allele[ial];
+    int rlen = strlen(ref);
+    int alen = strlen(alt);
+    while ( rlen>1 && alen>1 && ref[rlen-1]==alt[alen-1] ) rlen--, alen--;
+    int Mlen = rlen > alen ? rlen : alen;
+
+    atom_t *atom = NULL; 
+    int i;
+    for (i=0; i<Mlen; i++)
+    {
+        char refb = i<rlen ? ref[i] : '-';
+        char altb = i<alen ? alt[i] : '-';
+        if ( refb!=altb )
+        {
+            if ( refb=='-' || altb=='-' )
+            {
+                assert(atom);
+                if ( altb!='-' ) kputc(altb, &atom->alt);
+                if ( refb!='-' ) { kputc(refb, &atom->ref); atom->end++; }
+            }
+            else
+            {
+                buf->natoms++;
+                hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
+                atom = &buf->atoms[buf->natoms-1];
+                atom->ref.l = 0;
+                atom->alt.l = 0;
+                kputc(refb, &atom->ref);
+                kputc(altb, &atom->alt);
+                atom->beg = atom->end = i;
+                atom->ial = ial;
+            }
+            continue;
+        }
+        if ( i+1>=rlen || i+1>=alen )   // is the next base a deletion?
+        {
+            buf->natoms++;
+            hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
+            atom = &buf->atoms[buf->natoms-1];
+            atom->ref.l = 0;
+            atom->alt.l = 0;
+            kputc(refb, &atom->ref);
+            kputc(altb, &atom->alt);
+            atom->beg = atom->end = i;
+            atom->ial = ial;
+        }
+    }
+}
+static int _atoms_inconsistent(const atom_t *a, const atom_t *b)
+{
+    if ( a->beg < b->beg ) return -1;
+    if ( a->beg > b->beg ) return 1;
+    int rcmp = strcasecmp(a->ref.s,b->ref.s);
+    if ( rcmp ) return rcmp;
+    return strcasecmp(a->alt.s,b->alt.s);
+}
+/*
+    For reproducibility of tests on different platforms, we need to guarantee the same order of identical
+    atoms originating from different source ALTs.  Even though they are consistent, different values can be
+    picked for VCF annotations as currently the values from the one that comes first are used.
+*/
+static int _cmp_atoms(const void *aptr, const void *bptr)
+{
+    const atom_t *a = (const atom_t*) aptr;
+    const atom_t *b = (const atom_t*) bptr;
+    int rcmp = _atoms_inconsistent(a,b);
+    if ( rcmp ) return rcmp;
+    if ( a->ial < b->ial ) return -1;
+    if ( a->ial > b->ial ) return 1;
+    return 0;
+}
+static void _split_table_init(abuf_t *buf, bcf1_t *rec, int natoms)
+{
+    buf->split.rec  = rec;
+    buf->split.nori = rec->n_allele - 1;
+    buf->split.nout = 0;
+    hts_expand(uint8_t,buf->split.nori*natoms,buf->split.mtbl,buf->split.tbl);
+    hts_expand(atom_t*,natoms,buf->split.matoms,buf->split.atoms);
+    hts_expand(uint8_t,natoms,buf->split.moverlaps,buf->split.overlaps);
+    memset(buf->split.overlaps,0,sizeof(*buf->split.overlaps)*natoms);
+}
+static void _split_table_new(abuf_t *buf, atom_t *atom)
+{
+    int i, iout = buf->split.nout++;
+    buf->split.atoms[iout] = atom;
+    uint8_t *ptr = buf->split.tbl + iout*buf->split.nori;
+    for (i=0; i<buf->split.nori; i++) ptr[i] = 0;
+    ptr[atom->ial-1] = 1;
+}
+static void _split_table_overlap(abuf_t *buf, int iout, atom_t *atom)
+{
+    uint8_t *ptr = buf->split.tbl + iout*buf->split.nori;
+    ptr[atom->ial-1] = _atoms_inconsistent(atom,buf->split.atoms[iout]) ? 2 : 1;
+    buf->split.overlaps[iout] = 1;
+}
+#if 0
+static void _split_table_print(abuf_t *buf)
+{
+    int i,j;
+    for (i=0; i<buf->split.nout; i++)
+    {
+        atom_t *atom = buf->split.atoms[i];
+        uint8_t *ptr = buf->split.tbl + i*buf->split.nori;
+        fprintf(bcftools_stderr,"%d\t%s\t%s",(int)buf->split.rec->pos+1+atom->beg,atom->ref.s,atom->alt.s);
+        for (j=0; j<buf->split.nori; j++) fprintf(bcftools_stderr,"\t%d",(int)ptr[j]);
+        fprintf(bcftools_stderr,"\n");
+    }
+}
+static void _split_table_print_atoms(abuf_t *buf)
+{
+    int i;
+    for (i=0; i<buf->natoms; i++)
+    {
+        atom_t *atom = &buf->atoms[i];
+        fprintf(bcftools_stderr,"atom%d %p: ialt=%d %s>%s %d-%d\n",i,atom,atom->ial,atom->ref.s,atom->alt.s,atom->beg,atom->end);
+    }
+}
+#endif
+static inline uint8_t _has_star_allele(abuf_t *buf, int iout)
+{
+    if ( !buf->star_allele ) return 0;
+    return buf->split.overlaps[iout];
+}
+static inline int _split_table_get_ial(abuf_t *buf, int irow, int ial)
+{
+    if ( !ial ) return ial;
+    return buf->split.tbl[irow*buf->split.nori + ial - 1];
+}
+static void _split_table_set_chrom_qual(abuf_t *buf)
+{
+    int iout,j;
+    bcf1_t *rec = buf->split.rec;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        j = rbuf_append(&buf->rbuf);
+        if ( !buf->vcf[j] ) buf->vcf[j] = bcf_init1();
+        bcf1_t *out = buf->vcf[j];
+        bcf_clear1(out);
+
+        atom_t *atom = buf->split.atoms[iout];
+        out->rid = rec->rid;
+        out->pos = rec->pos + atom->beg;
+        bcf_update_id(buf->out_hdr, out, rec->d.id);
+
+        const char *als[3];
+        als[0] = atom->ref.s;
+        als[1] = atom->alt.s;
+        als[2] = "*";
+        int nals = _has_star_allele(buf,iout) ? 3 : 2;
+        bcf_update_alleles(buf->out_hdr, out, als, nals);
+
+        if ( bcf_float_is_missing(rec->qual) )
+            bcf_float_set_missing(out->qual);
+        else
+            out->qual = rec->qual;
+
+        bcf_update_filter(buf->out_hdr, out, rec->d.flt, rec->d.n_flt);
+    }
+}
+static void _split_table_set_info(abuf_t *buf, bcf_info_t *info, merge_rule_t mode)
+{
+    const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,info->key);
+    int type = bcf_hdr_id2type(buf->hdr,BCF_HL_INFO,info->key);
+    int len  = bcf_hdr_id2length(buf->hdr,BCF_HL_INFO,info->key);
+    if ( len==BCF_VL_G ) return;                                                // todo: Number=G INFO tags
+    if ( type==BCF_HT_STR && len!=BCF_VL_FIXED && len!=BCF_VL_VAR ) return;     // todo: Number=A,R,G for strings
+    if ( type==BCF_HT_LONG ) return;                                            // todo: 64bit integers
+
+    bcf1_t *rec = buf->split.rec;
+    int mtmp = ( type==BCF_HT_INT || type==BCF_HT_REAL ) ? buf->mtmp/4 : buf->mtmp;
+    int nval = bcf_get_info_values(buf->hdr,rec,tag,&buf->tmp,&mtmp,type);
+    if ( type==BCF_HT_INT || type==BCF_HT_REAL ) buf->mtmp = mtmp*4;
+
+    // Check for incorrect number of values. Note this check does not consider all values missing
+    // and will remove annotations that don't pass.
+    if ( (len==BCF_VL_A && nval != rec->n_allele - 1) || (len==BCF_VL_R && nval != rec->n_allele) ) return;
+
+    if ( buf->mtmp2 < buf->mtmp )
+    {
+        buf->tmp2  = realloc(buf->tmp2, buf->mtmp);
+        if ( !buf->tmp2 ) error("Failed to alloc %d bytes\n", buf->mtmp);
+        buf->mtmp2 = buf->mtmp;
+    }
+
+    int32_t missing = bcf_int32_missing;
+    void *missing_ptr = (void*)&missing;
+    if ( type==BCF_HT_REAL ) bcf_float_set_missing(*((float*)missing_ptr));
+
+    int iout,i;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int star_allele = _has_star_allele(buf,iout);
+        int ret = 0;
+        if ( len==BCF_VL_FIXED || len==BCF_VL_VAR )
+            ret = bcf_update_info(buf->out_hdr, out, tag, type==BCF_HT_FLAG ? NULL : buf->tmp, nval, type);
+        else if ( len==BCF_VL_A )
+        {
+            int iori = buf->split.atoms[iout]->ial - 1;
+            assert( iori<nval );
+            memcpy(buf->tmp2,buf->tmp+4*iori,4);
+            if ( star_allele )
+                memcpy(buf->tmp2+4,missing_ptr,4);
+            ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, 1 + star_allele, type);
+        }
+        else if ( len==BCF_VL_R )
+        {
+            memcpy(buf->tmp2,buf->tmp,4);   // REF contributes to all records
+            int iori = buf->split.atoms[iout]->ial;
+            assert( iori<nval && iori<=buf->split.nori );
+            memcpy(buf->tmp2+4,buf->tmp+4*iori,4);
+            if ( type==BCF_HT_INT && mode==M_SUM ) 
+            {
+                uint8_t *tbl = buf->split.tbl + iout*buf->split.nori;
+                for (i=iori; i<buf->split.nori; i++)
+                {
+                    if ( tbl[i]==1 ) ((int32_t*)buf->tmp2)[1] += ((int32_t*)buf->tmp)[i+1];
+                }
+            }
+            if ( star_allele )
+                memcpy(buf->tmp2+8,missing_ptr,4);
+            ret = bcf_update_info(buf->out_hdr, out, tag, buf->tmp2, 2 + star_allele, type);
+        }
+        if ( ret!=0 ) error("An error occurred while updating INFO/%s\n",tag);
+    }
+}
+static void _split_table_set_history(abuf_t *buf)
+{
+    int i,j;
+    bcf1_t *rec = buf->split.rec;
+    buf->tmps.l = 0;
+    ksprintf(&buf->tmps,"%s|%"PRIhts_pos"|%s|",bcf_seqname(buf->hdr,rec),rec->pos+1,rec->d.allele[0]);
+    for (i=1; i<rec->n_allele; i++)
+    {
+        kputs(rec->d.allele[i],&buf->tmps);
+        if ( i+1<rec->n_allele ) kputc(',',&buf->tmps);
+        else kputc(',',&buf->tmps);
+    }
+    int len = buf->tmps.l;
+    buf->tmps.s[buf->tmps.l-1] = '|';
+
+    for (i=0; i<buf->split.nout; i++)
+    {
+        buf->tmps.l = len;
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,i)];
+        uint8_t *ptr = buf->split.tbl + i*buf->split.nori;
+        for (j=0; j<buf->split.nori; j++)
+        {
+            if ( ptr[j]!=1 ) continue;
+            kputw(j+1,&buf->tmps);
+            kputc(',',&buf->tmps);
+        }
+        buf->tmps.s[--buf->tmps.l] = 0;
+        if ( (bcf_update_info_string(buf->out_hdr, out, buf->split.info_tag, buf->tmps.s))!=0 )
+            error("An error occurred while updating INFO/%s\n",buf->split.info_tag);
+    }
+}
+static void _split_table_set_gt(abuf_t *buf)
+{
+    int nsmpl = bcf_hdr_nsamples(buf->hdr);
+    if ( !nsmpl ) return;
+
+    bcf1_t *rec = buf->split.rec;
+    buf->ngt = bcf_get_genotypes(buf->hdr, rec, &buf->gt, &buf->mgt);
+    if ( buf->ngt<=0 ) return;
+    else
+        hts_expand(int32_t,buf->ngt,buf->mtmpi,buf->tmpi);
+
+    int iout,i,j;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int star_allele = _has_star_allele(buf,iout);
+        int max_ploidy = buf->ngt/nsmpl;
+        int32_t *src = buf->gt, *dst = buf->tmpi;
+        for (i=0; i<nsmpl; i++)
+        {
+            for (j=0; j<max_ploidy; j++)
+            {
+                if ( src[j]==bcf_int32_vector_end || bcf_gt_is_missing(src[j]) )
+                {
+                    dst[j] = src[j];
+                    continue;
+                }
+                int iori = bcf_gt_allele(src[j]);
+                if ( iori<0 || iori>=rec->n_allele )
+                    error("Out-of-bounds genotypes at %s:%"PRIhts_pos"\n",bcf_seqname(buf->hdr,rec),rec->pos+1);
+                int ial = _split_table_get_ial(buf,iout,iori);
+                if ( ial==2 && !star_allele )
+                    dst[j] = bcf_gt_missing;
+                else
+                    dst[j] = bcf_gt_is_phased(src[j]) ? bcf_gt_phased(ial) : bcf_gt_unphased(ial);
+            }
+            src += max_ploidy;
+            dst += max_ploidy;
+        }
+        bcf_update_genotypes(buf->out_hdr,out,buf->tmpi,buf->ngt);
+    }
+}
+static void _split_table_set_format(abuf_t *buf, bcf_fmt_t *fmt, merge_rule_t mode)
+{
+    int nsmpl = bcf_hdr_nsamples(buf->hdr);
+    if ( !nsmpl ) return;
+
+    const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,fmt->id);
+    if ( tag[0]=='G' && tag[1]=='T' && !tag[2] )        // FORMAT/GT
+    {
+        _split_table_set_gt(buf);
+        return;
+    }
+
+    int type = bcf_hdr_id2type(buf->hdr,BCF_HL_FMT,fmt->id);
+    int len  = bcf_hdr_id2length(buf->hdr,BCF_HL_FMT,fmt->id);
+    if ( type==BCF_HT_STR && len!=BCF_VL_FIXED && len!=BCF_VL_VAR ) return;     // todo: Number=A,R,G for strings
+    if ( type==BCF_HT_LONG ) return;                                            // todo: 64bit integers
+
+    const int num_size = 4;
+    assert( num_size==sizeof(int32_t) && num_size==sizeof(float) );
+    int32_t missing = bcf_int32_missing;
+    void *missing_ptr = (void*)&missing;
+    if ( type==BCF_HT_REAL ) bcf_float_set_missing(*((float*)missing_ptr));
+
+    bcf1_t *rec = buf->split.rec;
+    int mtmp = ( type==BCF_HT_INT || type==BCF_HT_REAL ) ? buf->mtmp/num_size : buf->mtmp;  // number of items
+    int nval = bcf_get_format_values(buf->hdr,rec,tag,&buf->tmp,&mtmp,type);
+    if ( type==BCF_HT_INT || type==BCF_HT_REAL ) buf->mtmp = mtmp*num_size;                 // number of bytes
+
+    if ( len==BCF_VL_G && nval!=nsmpl*rec->n_allele && nval!=nsmpl*rec->n_allele*(rec->n_allele+1)/2 ) return;      // not haploid nor diploid
+
+    // Check for incorrect number of values. Note this check does not consider all values missing
+    // and will remove annotations that don't pass.
+    if ( (len==BCF_VL_A && nval != nsmpl*(rec->n_allele - 1)) || (len==BCF_VL_R && nval != nsmpl*rec->n_allele) ) return;
+
+    // Increase buffer size to accommodate star allele
+    int nval1 = nval / nsmpl;
+    mtmp = buf->mtmp;
+    if ( (len==BCF_VL_A || len==BCF_VL_R) && mtmp < num_size*nsmpl*(nval1+1) ) mtmp = num_size*nsmpl*(nval1+1); // +1 for the possibility of the star allele
+    else if ( len==BCF_VL_G && mtmp < num_size*nsmpl*(nval1+3) ) mtmp = num_size*nsmpl*(nval1+3);
+
+    if ( buf->mtmp2 < mtmp )
+    {
+        buf->tmp2  = realloc(buf->tmp2, mtmp);
+        if ( !buf->tmp2 ) error("Failed to alloc %d bytes\n", mtmp);
+        buf->mtmp2 = mtmp;
+    }
+
+    int iout, i, j;
+    for (iout=0; iout<buf->split.nout; iout++)
+    {
+        int star_allele = _has_star_allele(buf,iout);
+        bcf1_t *out = buf->vcf[rbuf_kth(&buf->rbuf,iout)];
+        int ret = 0; 
+        if ( len==BCF_VL_FIXED || len==BCF_VL_VAR )
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp, nval, type);
+        else if ( len==BCF_VL_A )
+        {
+            int iori = buf->split.atoms[iout]->ial - 1;
+            assert( iori<nval );
+            for (i=0; i<nsmpl; i++)
+            {
+                void *src = buf->tmp  + nval1*num_size*i;
+                void *dst = buf->tmp2 + num_size*i*(star_allele+1);
+                memcpy(dst,src+iori*num_size,num_size);
+                if ( star_allele )
+                    memcpy(dst+num_size,missing_ptr,num_size);
+            }
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, nsmpl*(star_allele+1), type);
+        }
+        else if ( len==BCF_VL_R )
+        {
+            int iori = buf->split.atoms[iout]->ial;
+            assert( iori<=nval );
+            for (i=0; i<nsmpl; i++)
+            {
+                void *src = buf->tmp  + nval1*num_size*i;
+                void *dst = buf->tmp2 + num_size*i*(star_allele+2);
+                memcpy(dst,src,num_size);
+                memcpy(dst+num_size,src+iori*num_size,num_size);
+
+                if ( type==BCF_HT_INT && mode==M_SUM )
+                {
+                    uint8_t *tbl = buf->split.tbl + iout*buf->split.nori;
+                    for (j=iori; j<buf->split.nori; j++)
+                        if ( tbl[j]==1 ) ((int32_t*)dst)[1] += ((int32_t*)src)[j+1];
+                }
+                if ( star_allele )
+                    memcpy(dst+num_size*2,missing_ptr,num_size);
+            }
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, nsmpl*(star_allele+2), type);
+        }
+        else if ( len==BCF_VL_G )
+        {
+            int iori = buf->split.atoms[iout]->ial;
+            int i01  = bcf_alleles2gt(0,iori);
+            int i11  = bcf_alleles2gt(iori,iori);
+            assert( iori<nval );
+            #define BRANCH(type_t, is_missing, is_vector_end, set_missing, set_vector_end) { \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    type_t *src = (type_t*)buf->tmp + i*nval1; \
+                    type_t *dst = (type_t*)buf->tmp2 + i*3*(1+star_allele); \
+                    int n=0; /* determine ploidy of this genotype */ \
+                    while ( n<nval1 && !(is_vector_end) ) { n++; src++; } \
+                    src = (type_t*)buf->tmp + i*nval1; \
+                    memcpy(dst++,src,sizeof(type)); \
+                    int nmiss = 0, nend = 0; \
+                    if ( n==rec->n_allele ) /* haploid */ \
+                    { \
+                        memcpy(dst++,src+iori,sizeof(type)); \
+                        if ( star_allele ) { nmiss = 1; nend = 3; } \
+                        else nend = 1; \
+                    } \
+                    else if ( n==nval1 ) \
+                    { \
+                        memcpy(dst++,src+i01,sizeof(type)); \
+                        memcpy(dst++,src+i11,sizeof(type)); \
+                        if ( star_allele ) nmiss = 3; \
+                    } \
+                    else if ( n==1 && is_missing ) \
+                    { \
+                        if ( star_allele ) nend = 5; \
+                        else nend = 2; \
+                    } \
+                    else  \
+                        error("Incorrect number of values at %s:%"PRIhts_pos" .. tag=FORMAT/%s Number=G nAlleles=%d nValues=%d, %d-th sample\n", \
+                                bcf_seqname(buf->hdr,rec),rec->pos+1,tag,rec->n_allele,n,i+1); \
+                    for (j=0; j<nmiss; j++) { set_missing; dst++; } \
+                    for (j=0; j<nend; j++) { set_vector_end; dst++; } \
+                } \
+            }
+            switch (type)
+            {
+                case BCF_HT_INT:  BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, *dst=bcf_int32_missing, *dst=bcf_int32_vector_end); break;
+                case BCF_HT_REAL: BRANCH(float, bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), bcf_float_set_missing(*dst), bcf_float_set_vector_end(*dst)); break;
+                default: error("Unexpected case: %d\n", type);
+            }
+            #undef BRANCH
+            ret = bcf_update_format(buf->out_hdr, out, tag, buf->tmp2, 3*(1+star_allele)*nsmpl, type);
+        }
+        if ( ret!=0 ) error("An error occurred while updating FORMAT/%s\n",tag);
+    }
+}
+static inline int _is_acgtn(char *seq)
+{
+    while ( *seq )
+    {
+        char c = toupper(*seq);
+        if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 0;
+        seq++;
+    }
+    return 1;
+}
+/*
+    The atomization works as follows:
+    - Atomize each alternate allele separately by leaving out sequence identical to the reference. No
+      alignment is performed, just greedy trimming of the end, then from left. This operation returns
+      a list of atoms (atom_t) which carry fragments of REF,ALT and their positions as 0-based offsets
+      to the original REF allele
+    - Sort atoms by POS, REF and ALT. Each unique atom (POS+REF+ALT) forms a new VCF record, each
+      with a single ALT.
+    - For each new VCF record determine how to translate the original allele index (iori) to this new
+      record:
+        - 1: the original allele matches the atom
+        - 0: the original allele does not overlap this atom or the overlapping part matches the REF
+             allele
+        - 2 (or equivalently "."): there is a mismatch between the original allele and the atom
+      The mapping is encoded in a table with columns corresponding to the original ALTs and rows
+      to the new POS+ALTs (atoms). The table is initialized to 0, then we set 1's for matching
+      atoms and 2's for overlapping mismatching atoms.
+
+    Note that different ALT alleles can result in the same atom (the same output line) and this code
+    does not know how to reconcile possibly conflicting VCF annotations. This could be improved
+    and merge logic provided, similarly to `merge -l`. For example, the allelic depths (AD) should
+    be summed for the same atomized output allele. However, this level of complexity is not addressed
+    in this initial draft. Higher priority for now is to provide the inverse "join" operation.
+
+    Update 2021-04-09:
+        Tags QS,AD are now automatically incremented as they should be, for both INFO and FORMAT.
+        Note that the code will fail on missing values (todo) and it needs to be generalized and
+        made customizable.
+*/
+void _abuf_split(abuf_t *buf, bcf1_t *rec)
+{
+    int i,j;
+    if ( rec->n_allele < 2 )
+    {
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        int j = rbuf_append(&buf->rbuf);
+        if ( buf->vcf[j] ) bcf_destroy(buf->vcf[j]);
+        buf->vcf[j] = bcf_dup(rec);
+        return;
+    }
+    for (i=1; i<rec->n_allele; i++)
+    {
+        if ( _is_acgtn(rec->d.allele[i]) ) continue;
+        rbuf_expand0(&buf->rbuf, bcf1_t*, buf->rbuf.n+1, buf->vcf);
+        int j = rbuf_append(&buf->rbuf);
+        if ( buf->vcf[j] ) bcf_destroy(buf->vcf[j]);
+        buf->vcf[j] = bcf_dup(rec);
+        return;
+    }
+
+    buf->natoms = 0;
+    for (i=1; i<rec->n_allele; i++) _atomize_allele(buf,rec,i);
+    qsort(buf->atoms,buf->natoms,sizeof(*buf->atoms),_cmp_atoms);
+    _split_table_init(buf,rec,buf->natoms);
+    for (i=0; i<buf->natoms; i++)
+    {
+        if ( i && !_atoms_inconsistent(&buf->atoms[i-1],&buf->atoms[i]) ) continue;
+        _split_table_new(buf, &buf->atoms[i]);  // add a new unique output atom
+    }
+    for (i=0; i<buf->natoms; i++)
+    {
+        // Looping over sorted list of all atoms with possible duplicates from different source ALT alleles
+        atom_t *atom = &buf->atoms[i];
+        for (j=0; j<buf->split.nout; j++)
+        {
+            atom_t *out = buf->split.atoms[j];
+            if ( atom == out ) continue;            // table already set to 1
+            if ( atom->beg > out->end ) continue;   // cannot overlap this output atom
+            if ( atom->end < out->beg ) break;      // this atom is ahead of all subsequent output records
+            _split_table_overlap(buf, j, atom);
+        }
+    }
+    assert( !buf->rbuf.n ); // all records should be flushed first in the SPLIT mode
+
+    // Create the output records, transferring all annotations:
+    // CHROM-QUAL
+    _split_table_set_chrom_qual(buf);
+
+    // INFO
+    for (i=0; i<rec->n_info; i++)
+    {
+        // this implementation of merging rules is temporary: generalize and made customizable through the API
+        merge_rule_t mode = M_FIRST;
+        const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,rec->d.info[i].key);
+        if ( !strcmp(tag,"QS") || !strcmp(tag,"AD") ) mode = M_SUM;
+
+        _split_table_set_info(buf, &rec->d.info[i], mode);
+    }
+
+    // Set INFO tag showing the original record
+    if ( buf->split.info_tag )
+        _split_table_set_history(buf);
+
+    // FORMAT
+    for (i=0; i<rec->n_fmt; i++)
+    {
+        // this implementation of merging rules is temporary: generalize and made customizable through the API
+        merge_rule_t mode = M_FIRST;
+        const char *tag = bcf_hdr_int2id(buf->hdr,BCF_DT_ID,rec->d.fmt[i].id);
+        if ( !strcmp(tag,"QS") || !strcmp(tag,"AD") ) mode = M_SUM;
+
+        _split_table_set_format(buf, &rec->d.fmt[i], mode);
+    }
+}
+
+void abuf_push(abuf_t *buf, bcf1_t *rec)
+{
+    bcf_unpack(rec, BCF_UN_ALL);
+    if ( buf->mode==SPLIT ) _abuf_split(buf,rec);
+}
+
+bcf1_t *abuf_flush(abuf_t *buf, int flush_all)
+{
+    int i;
+
+    if ( buf->rbuf.n==0 ) return NULL;
+    if ( flush_all ) goto ret;
+
+ret:
+    i = rbuf_shift(&buf->rbuf);
+    return buf->vcf[i];
+}
+
diff --git a/bcftools/abuf.h b/bcftools/abuf.h
new file mode 100644
index 0000000..5fc1e00
--- /dev/null
+++ b/bcftools/abuf.h
@@ -0,0 +1,78 @@
+/* The MIT License
+
+   Copyright (c) 2021 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+/*
+    Atomize/deatomize complex variants
+*/
+
+#ifndef __ABUF_H__
+#define __ABUF_H__
+
+#include <htslib/vcf.h>
+
+typedef struct _abuf_t abuf_t;
+
+// Modes of operation
+typedef enum
+{
+    NONE,
+
+    // mode of operation, to be passed to abuf_init
+    SPLIT,
+    JOIN,
+
+    BCF_HDR,        // should the records be annotated, a writable bcf header is required
+    INFO_TAG,       // set BCF_HDR first
+    STAR_ALLELE     // 1: use STAR allele (the default), 0: set overlaps to missing
+}
+abuf_opt_t;
+
+#define abuf_set_opt(buf,type,key,value) { type tmp = value; abuf_set(buf, key, (void*)&tmp); }
+void abuf_set(abuf_t *buf, abuf_opt_t key, void *value);
+
+/*
+ *  abuf_init() - init buffer
+ *  @win:   number of sites (>0) or bp (<0)
+ */
+abuf_t *abuf_init(const bcf_hdr_t *hdr, abuf_opt_t mode);
+void abuf_destroy(abuf_t *buf);
+
+/*
+ *  abuf_push() - Push a new site for analysis
+ */
+void abuf_push(abuf_t *buf, bcf1_t *rec);
+
+/*
+ *  abuf_flush() - Return next buffered record
+ *  @flush_all: Set to 1 if no more overlapping records are coming (e.g. end of chromosome or end of file),
+ *              the buffer can be emptied.
+ *  return:     The next atomized/deatomized VCF record or NULL if no record is ready. The returned
+ *              structure will be cleaned by abuf.
+ */
+bcf1_t *abuf_flush(abuf_t *buf, int flush_all);
+
+#endif
+
diff --git a/bcftools/bam2bcf.c b/bcftools/bam2bcf.c
index d080917..336e2f6 100644
--- a/bcftools/bam2bcf.c
+++ b/bcftools/bam2bcf.c
@@ -1,7 +1,7 @@
 /*  bam2bcf.c -- variant calling.
 
     Copyright (C) 2010-2012 Broad Institute.
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -40,7 +40,8 @@ extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
 #define CAP_DIST 25
 
-bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
+bcf_callaux_t *bcf_call_init(double theta, int min_baseQ, int max_baseQ,
+                             int delta_baseQ)
 {
     bcf_callaux_t *bca;
     if (theta <= 0.) theta = CALL_DEFTHETA;
@@ -48,6 +49,8 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->capQ = 60;
     bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100;
     bca->min_baseQ = min_baseQ;
+    bca->max_baseQ = max_baseQ;
+    bca->delta_baseQ = delta_baseQ;
     bca->e = errmod_init(1. - theta);
     bca->min_frac = 0.002;
     bca->min_support = 1;
@@ -55,9 +58,13 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->npos = 100;
     bca->ref_pos = (int*) malloc(bca->npos*sizeof(int));
     bca->alt_pos = (int*) malloc(bca->npos*sizeof(int));
+    bca->iref_pos= (int*) malloc(bca->npos*sizeof(int));
+    bca->ialt_pos= (int*) malloc(bca->npos*sizeof(int));
     bca->nqual = 60;
     bca->ref_mq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->alt_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->iref_mq = (int*) malloc(bca->nqual*sizeof(int));
+    bca->ialt_mq = (int*) malloc(bca->nqual*sizeof(int));
     bca->ref_bq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->alt_bq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->fwd_mqs = (int*) malloc(bca->nqual*sizeof(int));
@@ -69,47 +76,68 @@ void bcf_call_destroy(bcf_callaux_t *bca)
 {
     if (bca == 0) return;
     errmod_destroy(bca->e);
-    if (bca->npos) { free(bca->ref_pos); free(bca->alt_pos); bca->npos = 0; }
-    free(bca->ref_mq); free(bca->alt_mq); free(bca->ref_bq); free(bca->alt_bq);
+    if (bca->npos) {
+        free(bca->ref_pos);  free(bca->alt_pos);
+        free(bca->iref_pos); free(bca->ialt_pos);
+        bca->npos = 0;
+    }
+    free(bca->ref_mq); free(bca->alt_mq);
+    free(bca->iref_mq); free(bca->ialt_mq);
+    free(bca->ref_bq); free(bca->alt_bq);
     free(bca->fwd_mqs); free(bca->rev_mqs);
     bca->nqual = 0;
     free(bca->bases); free(bca->inscns); free(bca);
 }
 
 // position in the sequence with respect to the aligned part of the read
-static int get_position(const bam_pileup1_t *p, int *len)
-{
-    int icig, n_tot_bases = 0, iread = 0, edist = p->qpos + 1;
-    for (icig=0; icig<p->b->core.n_cigar; icig++)
-    {
-        int cig  = bam_get_cigar(p->b)[icig] & BAM_CIGAR_MASK;
-        int ncig = bam_get_cigar(p->b)[icig] >> BAM_CIGAR_SHIFT;
-        if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF )
-        {
-            n_tot_bases += ncig;
-            iread += ncig;
-            continue;
-        }
-        if ( cig==BAM_CINS )
-        {
-            n_tot_bases += ncig;
-            iread += ncig;
+static int get_position(const bam_pileup1_t *p, int *len,
+                        int *sc_len, int *sc_dist) {
+    int i, j, edist = p->qpos + 1;
+    int sc_left = 0, sc_right = 0;
+    int sc_left_dist = -1, sc_right_dist = -1;
+
+    // left end
+    for (i = 0; i < p->b->core.n_cigar; i++) {
+        int cig  = bam_get_cigar(p->b)[i] & BAM_CIGAR_MASK;
+        if (cig == BAM_CHARD_CLIP)
             continue;
-        }
-        if ( cig==BAM_CSOFT_CLIP )
-        {
-            iread += ncig;
-            if ( iread<=p->qpos ) edist -= ncig;
+        else if (cig == BAM_CSOFT_CLIP)
+            sc_left += bam_get_cigar(p->b)[i] >> BAM_CIGAR_SHIFT;
+        else
+            break;
+    }
+    if (sc_left)
+        sc_left_dist = p->qpos+1 - sc_left;
+    edist -= sc_left;
+
+    // right end
+    for (j = p->b->core.n_cigar-1; j >= i; j--) {
+        int cig  = bam_get_cigar(p->b)[j] & BAM_CIGAR_MASK;
+        if (cig == BAM_CHARD_CLIP)
             continue;
+        else if (cig == BAM_CSOFT_CLIP)
+            sc_right += bam_get_cigar(p->b)[j] >> BAM_CIGAR_SHIFT;
+        else
+            break;
+    }
+    if (sc_right)
+        sc_right_dist = p->b->core.l_qseq - sc_right - p->qpos;
+
+    // Distance to nearest soft-clips and length of that clip.
+    if (sc_left_dist >= 0) {
+        if (sc_right_dist < 0 || sc_left_dist < sc_right_dist) {
+            *sc_len  = sc_left;
+            *sc_dist = sc_left_dist;
         }
-        if ( cig==BAM_CDEL ) continue;
-        if ( cig==BAM_CHARD_CLIP ) continue;
-        if ( cig==BAM_CPAD ) continue;
-        if ( cig==BAM_CREF_SKIP ) continue;
-        fprintf(stderr,"todo: cigar %d\n", cig);
-        assert(0);
-    }
-    *len = n_tot_bases;
+    } else if (sc_right_dist >= 0) {
+        *sc_len  = sc_right;
+        *sc_dist = sc_right_dist;
+    } else {
+        *sc_len  = 0;
+        *sc_dist = 0;
+    }
+
+    *len = p->b->core.l_qseq - sc_left - sc_right;
     return edist;
 }
 
@@ -117,8 +145,12 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
 {
     memset(bca->ref_pos,0,sizeof(int)*bca->npos);
     memset(bca->alt_pos,0,sizeof(int)*bca->npos);
+    memset(bca->iref_pos,0,sizeof(int)*bca->npos);
+    memset(bca->ialt_pos,0,sizeof(int)*bca->npos);
     memset(bca->ref_mq,0,sizeof(int)*bca->nqual);
     memset(bca->alt_mq,0,sizeof(int)*bca->nqual);
+    memset(bca->iref_mq,0,sizeof(int)*bca->nqual);
+    memset(bca->ialt_mq,0,sizeof(int)*bca->nqual);
     memset(bca->ref_bq,0,sizeof(int)*bca->nqual);
     memset(bca->alt_bq,0,sizeof(int)*bca->nqual);
     memset(bca->fwd_mqs,0,sizeof(int)*bca->nqual);
@@ -126,13 +158,18 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
     if ( call->ADF ) memset(call->ADF,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
     if ( call->ADR ) memset(call->ADR,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
     if ( call->SCR ) memset(call->SCR,0,sizeof(*call->SCR)*(call->n+1));
+    memset(call->QS,0,sizeof(*call->QS)*call->n*B2B_MAX_ALLELES);
+    memset(bca->ref_scl,  0, 100*sizeof(int));
+    memset(bca->alt_scl,  0, 100*sizeof(int));
+    memset(bca->iref_scl, 0, 100*sizeof(int));
+    memset(bca->ialt_scl, 0, 100*sizeof(int));
 }
 
 /*
     Notes:
-    - Called from bam_plcmd.c by mpileup. Amongst other things, sets the bcf_callret1_t.qsum frequencies
-        which are carried over via bcf_call_combine and bcf_call2bcf to the output BCF as the QS annotation.
-        Later it's used for multiallelic calling by bcftools -m
+    - Called from bam_plcmd.c by mpileup. Amongst other things, sets the bcf_callret1_t.QS frequencies
+        which are carried over via bcf_call_combine and bcf_call2bcf to the output BCF as the INFO/QS and FMT/QS annotations.
+        Later it's used for multiallelic calling by `call -m`, `call -mG` and `+trio-dnm`.
     - ref_base is the 4-bit representation of the reference base. It is negative if we are looking at an indel.
  */
 /*
@@ -150,7 +187,6 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
     // clean from previous run
     r->ori_depth = 0;
     r->mq0 = 0;
-    memset(r->qsum,0,sizeof(float)*4);
     memset(r->anno,0,sizeof(double)*16);
     memset(r->p,0,sizeof(float)*25);
     r->SCR = 0;
@@ -166,30 +202,65 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         kroundup32(bca->max_bases);
         bca->bases = (uint16_t*)realloc(bca->bases, 2 * bca->max_bases);
     }
+
     // fill the bases array
+    double nqual_over_60 = bca->nqual / 60.0;
+    int ADR_ref_missed[4] = {0};
+    int ADF_ref_missed[4] = {0};
     for (i = n = 0; i < _n; ++i) {
         const bam_pileup1_t *p = pl + i;
         int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
+        if ( bca->fmt_flag&(B2B_INFO_SCR|B2B_FMT_SCR) && PLP_HAS_SOFT_CLIP(p->cd.i) ) r->SCR++;
         if (p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
         if (p->is_del && !is_indel) continue;
         ++ori_depth;
         if (is_indel)
         {
-            b     = p->aux>>16&0x3f;
-            baseQ = q = p->aux&0xff;
-            // This read is not counted as indel. Instead of skipping it, treat it as ref. It is
-            // still only an approximation, but gives more accurate AD counts and calls correctly
-            // hets instead of alt-homs in some cases (see test/mpileup/indel-AD.1.sam)
-            if ( q < bca->min_baseQ ) b = 0, q = (int)bam_get_qual(p->b)[p->qpos];
-            seqQ  = p->aux>>8&0xff;
+            b = p->aux>>16&0x3f;
+            seqQ = q = (p->aux & 0xff); // mp2 + builtin indel-bias
+            if (q < bca->min_baseQ)
+            {
+                if (!p->indel && b < 4)
+                {
+                    if (bam_is_rev(p->b))
+                        ADR_ref_missed[b]++;
+                    else
+                        ADF_ref_missed[b]++;
+                }
+                continue;
+            }
+            if (p->indel == 0 && (q < _n/2 || _n > 20)) {
+                // high quality indel calls without p->indel set aren't
+                // particularly indicative of being a good REF match either,
+                // at least not in low coverage.  So require solid coverage
+                // before we start utilising such quals.
+                b = 0;
+                q = (int)bam_get_qual(p->b)[p->qpos];
+                seqQ = (3*seqQ + 2*q)/8;
+            }
+            if (_n > 20 && seqQ > 40) seqQ = 40;
+            baseQ  = p->aux>>8&0xff;
+
             is_diff = (b != 0);
         }
         else
         {
             b = bam_seqi(bam_get_seq(p->b), p->qpos); // base
             b = seq_nt16_int[b? b : ref_base]; // b is the 2-bit base
-            baseQ = q = (int)bam_get_qual(p->b)[p->qpos];
+
+            // Lowest of this and neighbour quality values
+            uint8_t *qual = bam_get_qual(p->b);
+            q = qual[p->qpos];
+            if (p->qpos > 0 &&
+                q > qual[p->qpos-1]+bca->delta_baseQ)
+                q = qual[p->qpos-1]+bca->delta_baseQ;
+            if (p->qpos+1 < p->b->core.l_qseq &&
+                q > qual[p->qpos+1]+bca->delta_baseQ)
+                q = qual[p->qpos+1]+bca->delta_baseQ;
+
             if (q < bca->min_baseQ) continue;
+            if (q > bca->max_baseQ) q = bca->max_baseQ;
+            baseQ = q;
             seqQ  = 99;
             is_diff = (ref4 < 4 && b == ref4)? 0 : 1;
         }
@@ -201,11 +272,10 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         if (q > 63) q = 63;
         if (q < 4) q = 4;       // MQ=0 reads count as BQ=4
         bca->bases[n++] = q<<5 | (int)bam_is_rev(p->b)<<4 | b;
-        if ( bca->fmt_flag&(B2B_INFO_SCR|B2B_FMT_SCR) && PLP_HAS_SOFT_CLIP(p->cd.i) ) r->SCR++;
         // collect annotations
         if (b < 4)
         {
-            r->qsum[b] += q;
+            r->QS[b] += q;
             if ( r->ADF )
             {
                 if ( bam_is_rev(p->b) )
@@ -228,29 +298,65 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         // collect for bias tests
         if ( baseQ > 59 ) baseQ = 59;
         if ( mapQ > 59 ) mapQ = 59;
-        int len, epos = 0;
-        if ( bca->fmt_flag & (B2B_INFO_RPB|B2B_INFO_VDB) )
+        int len, epos = 0, sc_len = 0, sc_dist = 0;
+        if ( bca->fmt_flag & (B2B_INFO_RPB|B2B_INFO_VDB|B2B_INFO_SCB) )
         {
-            int pos = get_position(p, &len);
+            int pos = get_position(p, &len, &sc_len, &sc_dist);
             epos = (double)pos/(len+1) * bca->npos;
+
+            if (sc_len) {
+                sc_len = 15.0*sc_len / sc_dist;
+                if (sc_len > 99) sc_len = 99;
+            }
         }
-        int ibq  = baseQ/60. * bca->nqual;
-        int imq  = mapQ/60. * bca->nqual;
-        if ( bam_is_rev(p->b) ) bca->rev_mqs[imq]++;
-        else bca->fwd_mqs[imq]++;
+
+        int imq  = mapQ * nqual_over_60;
+        int ibq  = baseQ * nqual_over_60;
+
+        if ( bam_is_rev(p->b) )
+            bca->rev_mqs[imq]++;
+        else
+            bca->fwd_mqs[imq]++;
+
         if ( bam_seqi(bam_get_seq(p->b),p->qpos) == ref_base )
         {
             bca->ref_pos[epos]++;
             bca->ref_bq[ibq]++;
             bca->ref_mq[imq]++;
+            bca->ref_scl[sc_len]++;
         }
         else
         {
             bca->alt_pos[epos]++;
             bca->alt_bq[ibq]++;
             bca->alt_mq[imq]++;
+            bca->alt_scl[sc_len]++;
         }
     }
+
+    // Compensate for AD not being counted on low quality REF indel matches.
+    if ( r->ADF && bca->ambig_reads==B2B_INC_AD0 )
+    {
+        for (i=0; i<4; i++) // verify: are the counters ever non-zero for i!=0?
+        {
+            r->ADR[i] += ADR_ref_missed[i];
+            r->ADF[i] += ADF_ref_missed[i];
+        }
+    }
+    else if ( r->ADF && bca->ambig_reads==B2B_INC_AD )
+    {
+        int dp = 0, dp_ambig = 0;
+        for (i=0; i<4; i++) dp += r->ADR[i];
+        for (i=0; i<4; i++) dp_ambig += ADR_ref_missed[i];
+        if ( dp )
+            for (i=0; i<4; i++) r->ADR[i] += lroundf((float)dp_ambig * r->ADR[i]/dp);
+        dp = 0, dp_ambig = 0;
+        for (i=0; i<4; i++) dp += r->ADF[i];
+        for (i=0; i<4; i++) dp_ambig += ADF_ref_missed[i];
+        if ( dp )
+            for (i=0; i<4; i++) r->ADF[i] += lroundf((float)dp_ambig * r->ADF[i]/dp);
+    }
+
     r->ori_depth = ori_depth;
     // glfgen
     errmod_cal(bca->e, n, 5, bca->bases, r->p); // calculate PL of each genotype
@@ -437,7 +543,7 @@ double calc_mwu_bias_cdf(int *a, int *b, int n)
     return pval>1 ? 1 : pval;
 }
 
-double calc_mwu_bias(int *a, int *b, int n)
+double calc_mwu_bias(int *a, int *b, int n, int left)
 {
     int na = 0, nb = 0, i;
     double U = 0, ties = 0;
@@ -461,6 +567,7 @@ double calc_mwu_bias(int *a, int *b, int n)
     if ( na==1 || nb==1 ) return 1.0;       // Flat probability, all U values are equally likely
 
     double mean = ((double)na*nb)*0.5;
+    if (left && U > mean) return 1; // for MQB which is asymmetrical
     if ( na==2 || nb==2 )
     {
         // Linear approximation
@@ -483,6 +590,85 @@ double calc_mwu_bias(int *a, int *b, int n)
     return mann_whitney_1947(na,nb,U) * sqrt(2*M_PI*var2);
 }
 
+// A Z-score version of the above function.
+//
+// See "Normal approximation and tie correction" at
+// https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test
+//
+// The Z score is the number of standard deviations above or below the mean
+// with 0 being equality of the two distributions and +ve/-ve from there.
+//
+// This is a more robust score to filter on.
+double calc_mwu_biasZ(int *a, int *b, int n, int left_only, int do_Z) {
+    int i;
+    int64_t t;
+
+    // Optimisation
+    for (i = 0; i < n; i++)
+        if (b[i])
+            break;
+    int b_empty = (i == n);
+
+    // Count equal (e), less-than (l) and greater-than (g) permutations.
+    int e = 0, l = 0, na = 0, nb = 0;
+    if (b_empty) {
+        for (t = 0, i = n-1; i >= 0; i--) {
+            na += a[i];
+            t += (a[i]*a[i]-1)*a[i];  // adjustment score for ties
+        }
+    } else {
+        for (t = 0, i = n-1; i >= 0; i--) {
+            // Combinations of a[i] and b[j] for i==j
+            e += a[i]*b[i];
+
+            // nb is running total of b[i+1]..b[n-1].
+            // Therefore a[i]*nb is the number of combinations of a[i] and b[j]
+            // for all i < j.
+            l += a[i]*nb;    // a<b
+
+            na += a[i];
+            nb += b[i];
+            int p = a[i]+b[i];
+            t += (p*p-1)*p;  // adjustment score for ties
+        }
+    }
+
+    if (na+nb <= 1)
+        return HUGE_VAL;
+
+    double U, m;
+    U = l + e*0.5; // Mann-Whitney U score
+    m = na*nb / 2.0;
+
+    // With ties adjustment
+    double var2 = (na*nb)/12.0 * ((na+nb+1) - t/(double)((na+nb)*(na+nb-1)));
+    // var = na*nb*(na+nb+1)/12.0; // simpler; minus tie adjustment
+    if (var2 <= 0)
+        return HUGE_VAL;
+
+    if (do_Z) {
+        // S.D. normalised Z-score
+        //Z = (U - m - (U-m >= 0 ? 0.5 : -0.5)) / sd; // gatk method?
+        return (U - m) / sqrt(var2);
+    }
+
+    // Else U score, which can be asymmetric for some data types.
+    if (left_only && U > m)
+        return HUGE_VAL; // one-sided, +ve bias is OK, -ve is not.
+
+    if (na >= 8 || nb >= 8) {
+        // Normal approximation, very good for na>=8 && nb>=8 and
+        // reasonable if na<8 or nb<8
+        return exp(-0.5*(U-m)*(U-m)/var2);
+    }
+
+    // Exact calculation
+    if (na==1 || nb == 1)
+        return mann_whitney_1947_(na, nb, U) * sqrt(2*M_PI*var2);
+    else
+        return mann_whitney_1947(na, nb, U) * sqrt(2*M_PI*var2);
+}
+
 static inline double logsumexp2(double a, double b)
 {
     if ( a>b )
@@ -558,7 +744,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
 int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call)
 {
     int ref4, i, j;
-    float qsum[5] = {0,0,0,0,0};
+    float qsum[B2B_MAX_ALLELES] = {0,0,0,0,0};
     if (ref_base >= 0) {
         call->ori_ref = ref4 = seq_nt16_int[ref_base];
         if (ref4 > 4) ref4 = 4;
@@ -569,9 +755,9 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     for (i = 0; i < n; ++i)
     {
         float sum = 0;
-        for (j = 0; j < 4; ++j) sum += calls[i].qsum[j];
+        for (j = 0; j < 4; ++j) sum += calls[i].QS[j];
         if ( sum )
-            for (j = 0; j < 4; j++) qsum[j] += calls[i].qsum[j] / sum;
+            for (j = 0; j < 4; j++) qsum[j] += (float)calls[i].QS[j] / sum;
     }
 
     // sort qsum in ascending order (insertion sort)
@@ -583,7 +769,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
 
     // Set the reference allele and alternative allele(s)
     for (i=0; i<5; i++) call->a[i] = -1;
-    for (i=0; i<5; i++) call->qsum[i] = 0;
+    for (i=0; i<B2B_MAX_ALLELES; i++) call->qsum[i] = 0;
     call->unseen = -1;
     call->a[0] = ref4;
     for (i=3, j=1; i>=0; i--)   // i: alleles sorted by QS; j, a[j]: output allele ordering
@@ -695,6 +881,21 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
                 adf += B2B_MAX_ALLELES;
             }
         }
+        if ( bca->fmt_flag & B2B_FMT_QS )
+        {
+            assert( call->n_alleles<=B2B_MAX_ALLELES );   // this is always true for SNPs and so far for indels as well
+
+            // reorder QS to match the allele ordering at this site
+            int32_t tmp[B2B_MAX_ALLELES];
+            int32_t *qs = call->QS, *qs_out = call->QS;
+            for (i=0; i<n; i++)
+            {
+                for (j=0; j<call->n_alleles; j++) tmp[j] = qs[ call->a[j] ];
+                for (j=0; j<call->n_alleles; j++) qs_out[j] = tmp[j] < BCF_MAX_BT_INT32 ? tmp[j] : BCF_MAX_BT_INT32;
+                qs_out += call->n_alleles;
+                qs += B2B_MAX_ALLELES;
+            }
+        }
 
 //      if (ref_base < 0) fprintf(stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
         call->shift = (int)(sum_min + .499);
@@ -717,11 +918,43 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     // calc_chisq_bias("XMQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_mq, bca->alt_mq, bca->nqual);
     // calc_chisq_bias("XBQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_bq, bca->alt_bq, bca->nqual);
 
-    if ( bca->fmt_flag & B2B_INFO_RPB )
-        call->mwu_pos = calc_mwu_bias(bca->ref_pos, bca->alt_pos, bca->npos);
-    call->mwu_mq  = calc_mwu_bias(bca->ref_mq,  bca->alt_mq,  bca->nqual);
-    call->mwu_bq  = calc_mwu_bias(bca->ref_bq,  bca->alt_bq,  bca->nqual);
-    call->mwu_mqs = calc_mwu_bias(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
+    if (bca->fmt_flag & B2B_INFO_ZSCORE) {
+        // U z-normalised as +/- number of standard deviations from mean.
+        if (call->ori_ref < 0) {
+            if (bca->fmt_flag & B2B_INFO_RPB)
+                call->mwu_pos = calc_mwu_biasZ(bca->iref_pos, bca->ialt_pos,
+                                               bca->npos, 0, 1);
+            call->mwu_mq  = calc_mwu_biasZ(bca->iref_mq,  bca->ialt_mq,
+                                           bca->nqual,1,1);
+            if ( bca->fmt_flag & B2B_INFO_SCB )
+                call->mwu_sc  = calc_mwu_biasZ(bca->iref_scl, bca->ialt_scl,
+                                               100, 0,1);
+        } else {
+            if (bca->fmt_flag & B2B_INFO_RPB)
+                call->mwu_pos = calc_mwu_biasZ(bca->ref_pos, bca->alt_pos,
+                                               bca->npos, 0, 1);
+            call->mwu_mq  = calc_mwu_biasZ(bca->ref_mq,  bca->alt_mq,
+                                           bca->nqual,1,1);
+            call->mwu_bq  = calc_mwu_biasZ(bca->ref_bq,  bca->alt_bq,
+                                           bca->nqual,0,1);
+            call->mwu_mqs = calc_mwu_biasZ(bca->fwd_mqs, bca->rev_mqs,
+                                           bca->nqual,0,1);
+            if ( bca->fmt_flag & B2B_INFO_SCB )
+                call->mwu_sc  = calc_mwu_biasZ(bca->ref_scl, bca->alt_scl,
+                                               100, 0,1);
+        }
+    } else {
+        // Old method; U as probability between 0 and 1
+        if ( bca->fmt_flag & B2B_INFO_RPB )
+            call->mwu_pos = calc_mwu_biasZ(bca->ref_pos, bca->alt_pos,
+                                           bca->npos, 0, 0);
+        call->mwu_mq  = calc_mwu_biasZ(bca->ref_mq,  bca->alt_mq,
+                                       bca->nqual, 1, 0);
+        call->mwu_bq  = calc_mwu_biasZ(bca->ref_bq,  bca->alt_bq,
+                                       bca->nqual, 0, 0);
+        call->mwu_mqs = calc_mwu_biasZ(bca->fwd_mqs, bca->rev_mqs,
+                                       bca->nqual, 0, 0);
+    }
 
 #if CDF_MWU_TESTS
     // CDF version of MWU tests is not calculated by default
@@ -732,7 +965,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     call->mwu_mqs_cdf = calc_mwu_bias_cdf(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
 #endif
 
-    if ( bca->fmt_flag & B2B_INFO_VDB ) 
+    if ( bca->fmt_flag & B2B_INFO_VDB )
         call->vdb = calc_vdb(bca->alt_pos, bca->npos);
 
     return 0;
@@ -819,10 +1052,32 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
 
     if ( bc->vdb != HUGE_VAL )      bcf_update_info_float(hdr, rec, "VDB", &bc->vdb, 1);
     if ( bc->seg_bias != HUGE_VAL ) bcf_update_info_float(hdr, rec, "SGB", &bc->seg_bias, 1);
-    if ( bc->mwu_pos != HUGE_VAL )  bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
-    if ( bc->mwu_mq != HUGE_VAL )   bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
-    if ( bc->mwu_mqs != HUGE_VAL )  bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
-    if ( bc->mwu_bq != HUGE_VAL )   bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
+
+    if (bca->fmt_flag & B2B_INFO_ZSCORE) {
+        if ( bc->mwu_pos != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "RPBZ", &bc->mwu_pos, 1);
+        if ( bc->mwu_mq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQBZ", &bc->mwu_mq, 1);
+        if ( bc->mwu_mqs != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQSBZ", &bc->mwu_mqs, 1);
+        if ( bc->mwu_bq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "BQBZ", &bc->mwu_bq, 1);
+        if ( bc->mwu_sc != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "SCBZ", &bc->mwu_sc, 1);
+    } else {
+        if ( bc->mwu_pos != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
+        if ( bc->mwu_mq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
+        if ( bc->mwu_mqs != HUGE_VAL )
+             bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
+        if ( bc->mwu_bq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
+    }
+
+    if ( bc->strand_bias != HUGE_VAL )
+        bcf_update_info_float(hdr, rec, "FS", &bc->strand_bias, 1);
+
 #if CDF_MWU_TESTS
     if ( bc->mwu_pos_cdf != HUGE_VAL )  bcf_update_info_float(hdr, rec, "RPB2", &bc->mwu_pos_cdf, 1);
     if ( bc->mwu_mq_cdf != HUGE_VAL )   bcf_update_info_float(hdr, rec, "MQB2", &bc->mwu_mq_cdf, 1);
@@ -884,6 +1139,8 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
     }
     if ( fmt_flag&B2B_FMT_SCR )
         bcf_update_format_int32(hdr, rec, "SCR", bc->SCR+1, rec->n_sample);
+    if ( fmt_flag&B2B_FMT_QS )
+        bcf_update_format_int32(hdr, rec, "QS", bc->QS, rec->n_sample*rec->n_allele);
 
     return 0;
 }
diff --git a/bcftools/bam2bcf.c.pysam.c b/bcftools/bam2bcf.c.pysam.c
index 16a559a..001363e 100644
--- a/bcftools/bam2bcf.c.pysam.c
+++ b/bcftools/bam2bcf.c.pysam.c
@@ -3,7 +3,7 @@
 /*  bam2bcf.c -- variant calling.
 
     Copyright (C) 2010-2012 Broad Institute.
-    Copyright (C) 2012-2014 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -42,7 +42,8 @@ extern  void ks_introsort_uint32_t(size_t n, uint32_t a[]);
 
 #define CAP_DIST 25
 
-bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
+bcf_callaux_t *bcf_call_init(double theta, int min_baseQ, int max_baseQ,
+                             int delta_baseQ)
 {
     bcf_callaux_t *bca;
     if (theta <= 0.) theta = CALL_DEFTHETA;
@@ -50,6 +51,8 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->capQ = 60;
     bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100;
     bca->min_baseQ = min_baseQ;
+    bca->max_baseQ = max_baseQ;
+    bca->delta_baseQ = delta_baseQ;
     bca->e = errmod_init(1. - theta);
     bca->min_frac = 0.002;
     bca->min_support = 1;
@@ -57,9 +60,13 @@ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
     bca->npos = 100;
     bca->ref_pos = (int*) malloc(bca->npos*sizeof(int));
     bca->alt_pos = (int*) malloc(bca->npos*sizeof(int));
+    bca->iref_pos= (int*) malloc(bca->npos*sizeof(int));
+    bca->ialt_pos= (int*) malloc(bca->npos*sizeof(int));
     bca->nqual = 60;
     bca->ref_mq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->alt_mq  = (int*) malloc(bca->nqual*sizeof(int));
+    bca->iref_mq = (int*) malloc(bca->nqual*sizeof(int));
+    bca->ialt_mq = (int*) malloc(bca->nqual*sizeof(int));
     bca->ref_bq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->alt_bq  = (int*) malloc(bca->nqual*sizeof(int));
     bca->fwd_mqs = (int*) malloc(bca->nqual*sizeof(int));
@@ -71,47 +78,68 @@ void bcf_call_destroy(bcf_callaux_t *bca)
 {
     if (bca == 0) return;
     errmod_destroy(bca->e);
-    if (bca->npos) { free(bca->ref_pos); free(bca->alt_pos); bca->npos = 0; }
-    free(bca->ref_mq); free(bca->alt_mq); free(bca->ref_bq); free(bca->alt_bq);
+    if (bca->npos) {
+        free(bca->ref_pos);  free(bca->alt_pos);
+        free(bca->iref_pos); free(bca->ialt_pos);
+        bca->npos = 0;
+    }
+    free(bca->ref_mq); free(bca->alt_mq);
+    free(bca->iref_mq); free(bca->ialt_mq);
+    free(bca->ref_bq); free(bca->alt_bq);
     free(bca->fwd_mqs); free(bca->rev_mqs);
     bca->nqual = 0;
     free(bca->bases); free(bca->inscns); free(bca);
 }
 
 // position in the sequence with respect to the aligned part of the read
-static int get_position(const bam_pileup1_t *p, int *len)
-{
-    int icig, n_tot_bases = 0, iread = 0, edist = p->qpos + 1;
-    for (icig=0; icig<p->b->core.n_cigar; icig++)
-    {
-        int cig  = bam_get_cigar(p->b)[icig] & BAM_CIGAR_MASK;
-        int ncig = bam_get_cigar(p->b)[icig] >> BAM_CIGAR_SHIFT;
-        if ( cig==BAM_CMATCH || cig==BAM_CEQUAL || cig==BAM_CDIFF )
-        {
-            n_tot_bases += ncig;
-            iread += ncig;
-            continue;
-        }
-        if ( cig==BAM_CINS )
-        {
-            n_tot_bases += ncig;
-            iread += ncig;
+static int get_position(const bam_pileup1_t *p, int *len,
+                        int *sc_len, int *sc_dist) {
+    int i, j, edist = p->qpos + 1;
+    int sc_left = 0, sc_right = 0;
+    int sc_left_dist = -1, sc_right_dist = -1;
+
+    // left end
+    for (i = 0; i < p->b->core.n_cigar; i++) {
+        int cig  = bam_get_cigar(p->b)[i] & BAM_CIGAR_MASK;
+        if (cig == BAM_CHARD_CLIP)
             continue;
-        }
-        if ( cig==BAM_CSOFT_CLIP )
-        {
-            iread += ncig;
-            if ( iread<=p->qpos ) edist -= ncig;
+        else if (cig == BAM_CSOFT_CLIP)
+            sc_left += bam_get_cigar(p->b)[i] >> BAM_CIGAR_SHIFT;
+        else
+            break;
+    }
+    if (sc_left)
+        sc_left_dist = p->qpos+1 - sc_left;
+    edist -= sc_left;
+
+    // right end
+    for (j = p->b->core.n_cigar-1; j >= i; j--) {
+        int cig  = bam_get_cigar(p->b)[j] & BAM_CIGAR_MASK;
+        if (cig == BAM_CHARD_CLIP)
             continue;
+        else if (cig == BAM_CSOFT_CLIP)
+            sc_right += bam_get_cigar(p->b)[j] >> BAM_CIGAR_SHIFT;
+        else
+            break;
+    }
+    if (sc_right)
+        sc_right_dist = p->b->core.l_qseq - sc_right - p->qpos;
+
+    // Distance to nearest soft-clips and length of that clip.
+    if (sc_left_dist >= 0) {
+        if (sc_right_dist < 0 || sc_left_dist < sc_right_dist) {
+            *sc_len  = sc_left;
+            *sc_dist = sc_left_dist;
         }
-        if ( cig==BAM_CDEL ) continue;
-        if ( cig==BAM_CHARD_CLIP ) continue;
-        if ( cig==BAM_CPAD ) continue;
-        if ( cig==BAM_CREF_SKIP ) continue;
-        fprintf(bcftools_stderr,"todo: cigar %d\n", cig);
-        assert(0);
-    }
-    *len = n_tot_bases;
+    } else if (sc_right_dist >= 0) {
+        *sc_len  = sc_right;
+        *sc_dist = sc_right_dist;
+    } else {
+        *sc_len  = 0;
+        *sc_dist = 0;
+    }
+
+    *len = p->b->core.l_qseq - sc_left - sc_right;
     return edist;
 }
 
@@ -119,8 +147,12 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
 {
     memset(bca->ref_pos,0,sizeof(int)*bca->npos);
     memset(bca->alt_pos,0,sizeof(int)*bca->npos);
+    memset(bca->iref_pos,0,sizeof(int)*bca->npos);
+    memset(bca->ialt_pos,0,sizeof(int)*bca->npos);
     memset(bca->ref_mq,0,sizeof(int)*bca->nqual);
     memset(bca->alt_mq,0,sizeof(int)*bca->nqual);
+    memset(bca->iref_mq,0,sizeof(int)*bca->nqual);
+    memset(bca->ialt_mq,0,sizeof(int)*bca->nqual);
     memset(bca->ref_bq,0,sizeof(int)*bca->nqual);
     memset(bca->alt_bq,0,sizeof(int)*bca->nqual);
     memset(bca->fwd_mqs,0,sizeof(int)*bca->nqual);
@@ -128,13 +160,18 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
     if ( call->ADF ) memset(call->ADF,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
     if ( call->ADR ) memset(call->ADR,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
     if ( call->SCR ) memset(call->SCR,0,sizeof(*call->SCR)*(call->n+1));
+    memset(call->QS,0,sizeof(*call->QS)*call->n*B2B_MAX_ALLELES);
+    memset(bca->ref_scl,  0, 100*sizeof(int));
+    memset(bca->alt_scl,  0, 100*sizeof(int));
+    memset(bca->iref_scl, 0, 100*sizeof(int));
+    memset(bca->ialt_scl, 0, 100*sizeof(int));
 }
 
 /*
     Notes:
-    - Called from bam_plcmd.c by mpileup. Amongst other things, sets the bcf_callret1_t.qsum frequencies
-        which are carried over via bcf_call_combine and bcf_call2bcf to the output BCF as the QS annotation.
-        Later it's used for multiallelic calling by bcftools -m
+    - Called from bam_plcmd.c by mpileup. Amongst other things, sets the bcf_callret1_t.QS frequencies
+        which are carried over via bcf_call_combine and bcf_call2bcf to the output BCF as the INFO/QS and FMT/QS annotations.
+        Later it's used for multiallelic calling by `call -m`, `call -mG` and `+trio-dnm`.
     - ref_base is the 4-bit representation of the reference base. It is negative if we are looking at an indel.
  */
 /*
@@ -152,7 +189,6 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
     // clean from previous run
     r->ori_depth = 0;
     r->mq0 = 0;
-    memset(r->qsum,0,sizeof(float)*4);
     memset(r->anno,0,sizeof(double)*16);
     memset(r->p,0,sizeof(float)*25);
     r->SCR = 0;
@@ -168,30 +204,65 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         kroundup32(bca->max_bases);
         bca->bases = (uint16_t*)realloc(bca->bases, 2 * bca->max_bases);
     }
+
     // fill the bases array
+    double nqual_over_60 = bca->nqual / 60.0;
+    int ADR_ref_missed[4] = {0};
+    int ADF_ref_missed[4] = {0};
     for (i = n = 0; i < _n; ++i) {
         const bam_pileup1_t *p = pl + i;
         int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
+        if ( bca->fmt_flag&(B2B_INFO_SCR|B2B_FMT_SCR) && PLP_HAS_SOFT_CLIP(p->cd.i) ) r->SCR++;
         if (p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
         if (p->is_del && !is_indel) continue;
         ++ori_depth;
         if (is_indel)
         {
-            b     = p->aux>>16&0x3f;
-            baseQ = q = p->aux&0xff;
-            // This read is not counted as indel. Instead of skipping it, treat it as ref. It is
-            // still only an approximation, but gives more accurate AD counts and calls correctly
-            // hets instead of alt-homs in some cases (see test/mpileup/indel-AD.1.sam)
-            if ( q < bca->min_baseQ ) b = 0, q = (int)bam_get_qual(p->b)[p->qpos];
-            seqQ  = p->aux>>8&0xff;
+            b = p->aux>>16&0x3f;
+            seqQ = q = (p->aux & 0xff); // mp2 + builtin indel-bias
+            if (q < bca->min_baseQ)
+            {
+                if (!p->indel && b < 4)
+                {
+                    if (bam_is_rev(p->b))
+                        ADR_ref_missed[b]++;
+                    else
+                        ADF_ref_missed[b]++;
+                }
+                continue;
+            }
+            if (p->indel == 0 && (q < _n/2 || _n > 20)) {
+                // high quality indel calls without p->indel set aren't
+                // particularly indicative of being a good REF match either,
+                // at least not in low coverage.  So require solid coverage
+                // before we start utilising such quals.
+                b = 0;
+                q = (int)bam_get_qual(p->b)[p->qpos];
+                seqQ = (3*seqQ + 2*q)/8;
+            }
+            if (_n > 20 && seqQ > 40) seqQ = 40;
+            baseQ  = p->aux>>8&0xff;
+
             is_diff = (b != 0);
         }
         else
         {
             b = bam_seqi(bam_get_seq(p->b), p->qpos); // base
             b = seq_nt16_int[b? b : ref_base]; // b is the 2-bit base
-            baseQ = q = (int)bam_get_qual(p->b)[p->qpos];
+
+            // Lowest of this and neighbour quality values
+            uint8_t *qual = bam_get_qual(p->b);
+            q = qual[p->qpos];
+            if (p->qpos > 0 &&
+                q > qual[p->qpos-1]+bca->delta_baseQ)
+                q = qual[p->qpos-1]+bca->delta_baseQ;
+            if (p->qpos+1 < p->b->core.l_qseq &&
+                q > qual[p->qpos+1]+bca->delta_baseQ)
+                q = qual[p->qpos+1]+bca->delta_baseQ;
+
             if (q < bca->min_baseQ) continue;
+            if (q > bca->max_baseQ) q = bca->max_baseQ;
+            baseQ = q;
             seqQ  = 99;
             is_diff = (ref4 < 4 && b == ref4)? 0 : 1;
         }
@@ -203,11 +274,10 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         if (q > 63) q = 63;
         if (q < 4) q = 4;       // MQ=0 reads count as BQ=4
         bca->bases[n++] = q<<5 | (int)bam_is_rev(p->b)<<4 | b;
-        if ( bca->fmt_flag&(B2B_INFO_SCR|B2B_FMT_SCR) && PLP_HAS_SOFT_CLIP(p->cd.i) ) r->SCR++;
         // collect annotations
         if (b < 4)
         {
-            r->qsum[b] += q;
+            r->QS[b] += q;
             if ( r->ADF )
             {
                 if ( bam_is_rev(p->b) )
@@ -230,29 +300,65 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
         // collect for bias tests
         if ( baseQ > 59 ) baseQ = 59;
         if ( mapQ > 59 ) mapQ = 59;
-        int len, epos = 0;
-        if ( bca->fmt_flag & (B2B_INFO_RPB|B2B_INFO_VDB) )
+        int len, epos = 0, sc_len = 0, sc_dist = 0;
+        if ( bca->fmt_flag & (B2B_INFO_RPB|B2B_INFO_VDB|B2B_INFO_SCB) )
         {
-            int pos = get_position(p, &len);
+            int pos = get_position(p, &len, &sc_len, &sc_dist);
             epos = (double)pos/(len+1) * bca->npos;
+
+            if (sc_len) {
+                sc_len = 15.0*sc_len / sc_dist;
+                if (sc_len > 99) sc_len = 99;
+            }
         }
-        int ibq  = baseQ/60. * bca->nqual;
-        int imq  = mapQ/60. * bca->nqual;
-        if ( bam_is_rev(p->b) ) bca->rev_mqs[imq]++;
-        else bca->fwd_mqs[imq]++;
+
+        int imq  = mapQ * nqual_over_60;
+        int ibq  = baseQ * nqual_over_60;
+
+        if ( bam_is_rev(p->b) )
+            bca->rev_mqs[imq]++;
+        else
+            bca->fwd_mqs[imq]++;
+
         if ( bam_seqi(bam_get_seq(p->b),p->qpos) == ref_base )
         {
             bca->ref_pos[epos]++;
             bca->ref_bq[ibq]++;
             bca->ref_mq[imq]++;
+            bca->ref_scl[sc_len]++;
         }
         else
         {
             bca->alt_pos[epos]++;
             bca->alt_bq[ibq]++;
             bca->alt_mq[imq]++;
+            bca->alt_scl[sc_len]++;
         }
     }
+
+    // Compensate for AD not being counted on low quality REF indel matches.
+    if ( r->ADF && bca->ambig_reads==B2B_INC_AD0 )
+    {
+        for (i=0; i<4; i++) // verify: are the counters ever non-zero for i!=0?
+        {
+            r->ADR[i] += ADR_ref_missed[i];
+            r->ADF[i] += ADF_ref_missed[i];
+        }
+    }
+    else if ( r->ADF && bca->ambig_reads==B2B_INC_AD )
+    {
+        int dp = 0, dp_ambig = 0;
+        for (i=0; i<4; i++) dp += r->ADR[i];
+        for (i=0; i<4; i++) dp_ambig += ADR_ref_missed[i];
+        if ( dp )
+            for (i=0; i<4; i++) r->ADR[i] += lroundf((float)dp_ambig * r->ADR[i]/dp);
+        dp = 0, dp_ambig = 0;
+        for (i=0; i<4; i++) dp += r->ADF[i];
+        for (i=0; i<4; i++) dp_ambig += ADF_ref_missed[i];
+        if ( dp )
+            for (i=0; i<4; i++) r->ADF[i] += lroundf((float)dp_ambig * r->ADF[i]/dp);
+    }
+
     r->ori_depth = ori_depth;
     // glfgen
     errmod_cal(bca->e, n, 5, bca->bases, r->p); // calculate PL of each genotype
@@ -439,7 +545,7 @@ double calc_mwu_bias_cdf(int *a, int *b, int n)
     return pval>1 ? 1 : pval;
 }
 
-double calc_mwu_bias(int *a, int *b, int n)
+double calc_mwu_bias(int *a, int *b, int n, int left)
 {
     int na = 0, nb = 0, i;
     double U = 0, ties = 0;
@@ -463,6 +569,7 @@ double calc_mwu_bias(int *a, int *b, int n)
     if ( na==1 || nb==1 ) return 1.0;       // Flat probability, all U values are equally likely
 
     double mean = ((double)na*nb)*0.5;
+    if (left && U > mean) return 1; // for MQB which is asymmetrical
     if ( na==2 || nb==2 )
     {
         // Linear approximation
@@ -485,6 +592,85 @@ double calc_mwu_bias(int *a, int *b, int n)
     return mann_whitney_1947(na,nb,U) * sqrt(2*M_PI*var2);
 }
 
+// A Z-score version of the above function.
+//
+// See "Normal approximation and tie correction" at
+// https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test
+//
+// The Z score is the number of standard deviations above or below the mean
+// with 0 being equality of the two distributions and +ve/-ve from there.
+//
+// This is a more robust score to filter on.
+double calc_mwu_biasZ(int *a, int *b, int n, int left_only, int do_Z) {
+    int i;
+    int64_t t;
+
+    // Optimisation
+    for (i = 0; i < n; i++)
+        if (b[i])
+            break;
+    int b_empty = (i == n);
+
+    // Count equal (e), less-than (l) and greater-than (g) permutations.
+    int e = 0, l = 0, na = 0, nb = 0;
+    if (b_empty) {
+        for (t = 0, i = n-1; i >= 0; i--) {
+            na += a[i];
+            t += (a[i]*a[i]-1)*a[i];  // adjustment score for ties
+        }
+    } else {
+        for (t = 0, i = n-1; i >= 0; i--) {
+            // Combinations of a[i] and b[j] for i==j
+            e += a[i]*b[i];
+
+            // nb is running total of b[i+1]..b[n-1].
+            // Therefore a[i]*nb is the number of combinations of a[i] and b[j]
+            // for all i < j.
+            l += a[i]*nb;    // a<b
+
+            na += a[i];
+            nb += b[i];
+            int p = a[i]+b[i];
+            t += (p*p-1)*p;  // adjustment score for ties
+        }
+    }
+
+    if (na+nb <= 1)
+        return HUGE_VAL;
+
+    double U, m;
+    U = l + e*0.5; // Mann-Whitney U score
+    m = na*nb / 2.0;
+
+    // With ties adjustment
+    double var2 = (na*nb)/12.0 * ((na+nb+1) - t/(double)((na+nb)*(na+nb-1)));
+    // var = na*nb*(na+nb+1)/12.0; // simpler; minus tie adjustment
+    if (var2 <= 0)
+        return HUGE_VAL;
+
+    if (do_Z) {
+        // S.D. normalised Z-score
+        //Z = (U - m - (U-m >= 0 ? 0.5 : -0.5)) / sd; // gatk method?
+        return (U - m) / sqrt(var2);
+    }
+
+    // Else U score, which can be asymmetric for some data types.
+    if (left_only && U > m)
+        return HUGE_VAL; // one-sided, +ve bias is OK, -ve is not.
+
+    if (na >= 8 || nb >= 8) {
+        // Normal approximation, very good for na>=8 && nb>=8 and
+        // reasonable if na<8 or nb<8
+        return exp(-0.5*(U-m)*(U-m)/var2);
+    }
+
+    // Exact calculation
+    if (na==1 || nb == 1)
+        return mann_whitney_1947_(na, nb, U) * sqrt(2*M_PI*var2);
+    else
+        return mann_whitney_1947(na, nb, U) * sqrt(2*M_PI*var2);
+}
+
 static inline double logsumexp2(double a, double b)
 {
     if ( a>b )
@@ -560,7 +746,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
 int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call)
 {
     int ref4, i, j;
-    float qsum[5] = {0,0,0,0,0};
+    float qsum[B2B_MAX_ALLELES] = {0,0,0,0,0};
     if (ref_base >= 0) {
         call->ori_ref = ref4 = seq_nt16_int[ref_base];
         if (ref4 > 4) ref4 = 4;
@@ -571,9 +757,9 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     for (i = 0; i < n; ++i)
     {
         float sum = 0;
-        for (j = 0; j < 4; ++j) sum += calls[i].qsum[j];
+        for (j = 0; j < 4; ++j) sum += calls[i].QS[j];
         if ( sum )
-            for (j = 0; j < 4; j++) qsum[j] += calls[i].qsum[j] / sum;
+            for (j = 0; j < 4; j++) qsum[j] += (float)calls[i].QS[j] / sum;
     }
 
     // sort qsum in ascending order (insertion sort)
@@ -585,7 +771,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
 
     // Set the reference allele and alternative allele(s)
     for (i=0; i<5; i++) call->a[i] = -1;
-    for (i=0; i<5; i++) call->qsum[i] = 0;
+    for (i=0; i<B2B_MAX_ALLELES; i++) call->qsum[i] = 0;
     call->unseen = -1;
     call->a[0] = ref4;
     for (i=3, j=1; i>=0; i--)   // i: alleles sorted by QS; j, a[j]: output allele ordering
@@ -697,6 +883,21 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
                 adf += B2B_MAX_ALLELES;
             }
         }
+        if ( bca->fmt_flag & B2B_FMT_QS )
+        {
+            assert( call->n_alleles<=B2B_MAX_ALLELES );   // this is always true for SNPs and so far for indels as well
+
+            // reorder QS to match the allele ordering at this site
+            int32_t tmp[B2B_MAX_ALLELES];
+            int32_t *qs = call->QS, *qs_out = call->QS;
+            for (i=0; i<n; i++)
+            {
+                for (j=0; j<call->n_alleles; j++) tmp[j] = qs[ call->a[j] ];
+                for (j=0; j<call->n_alleles; j++) qs_out[j] = tmp[j] < BCF_MAX_BT_INT32 ? tmp[j] : BCF_MAX_BT_INT32;
+                qs_out += call->n_alleles;
+                qs += B2B_MAX_ALLELES;
+            }
+        }
 
 //      if (ref_base < 0) fprintf(bcftools_stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
         call->shift = (int)(sum_min + .499);
@@ -719,11 +920,43 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     // calc_chisq_bias("XMQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_mq, bca->alt_mq, bca->nqual);
     // calc_chisq_bias("XBQ", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_bq, bca->alt_bq, bca->nqual);
 
-    if ( bca->fmt_flag & B2B_INFO_RPB )
-        call->mwu_pos = calc_mwu_bias(bca->ref_pos, bca->alt_pos, bca->npos);
-    call->mwu_mq  = calc_mwu_bias(bca->ref_mq,  bca->alt_mq,  bca->nqual);
-    call->mwu_bq  = calc_mwu_bias(bca->ref_bq,  bca->alt_bq,  bca->nqual);
-    call->mwu_mqs = calc_mwu_bias(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
+    if (bca->fmt_flag & B2B_INFO_ZSCORE) {
+        // U z-normalised as +/- number of standard deviations from mean.
+        if (call->ori_ref < 0) {
+            if (bca->fmt_flag & B2B_INFO_RPB)
+                call->mwu_pos = calc_mwu_biasZ(bca->iref_pos, bca->ialt_pos,
+                                               bca->npos, 0, 1);
+            call->mwu_mq  = calc_mwu_biasZ(bca->iref_mq,  bca->ialt_mq,
+                                           bca->nqual,1,1);
+            if ( bca->fmt_flag & B2B_INFO_SCB )
+                call->mwu_sc  = calc_mwu_biasZ(bca->iref_scl, bca->ialt_scl,
+                                               100, 0,1);
+        } else {
+            if (bca->fmt_flag & B2B_INFO_RPB)
+                call->mwu_pos = calc_mwu_biasZ(bca->ref_pos, bca->alt_pos,
+                                               bca->npos, 0, 1);
+            call->mwu_mq  = calc_mwu_biasZ(bca->ref_mq,  bca->alt_mq,
+                                           bca->nqual,1,1);
+            call->mwu_bq  = calc_mwu_biasZ(bca->ref_bq,  bca->alt_bq,
+                                           bca->nqual,0,1);
+            call->mwu_mqs = calc_mwu_biasZ(bca->fwd_mqs, bca->rev_mqs,
+                                           bca->nqual,0,1);
+            if ( bca->fmt_flag & B2B_INFO_SCB )
+                call->mwu_sc  = calc_mwu_biasZ(bca->ref_scl, bca->alt_scl,
+                                               100, 0,1);
+        }
+    } else {
+        // Old method; U as probability between 0 and 1
+        if ( bca->fmt_flag & B2B_INFO_RPB )
+            call->mwu_pos = calc_mwu_biasZ(bca->ref_pos, bca->alt_pos,
+                                           bca->npos, 0, 0);
+        call->mwu_mq  = calc_mwu_biasZ(bca->ref_mq,  bca->alt_mq,
+                                       bca->nqual, 1, 0);
+        call->mwu_bq  = calc_mwu_biasZ(bca->ref_bq,  bca->alt_bq,
+                                       bca->nqual, 0, 0);
+        call->mwu_mqs = calc_mwu_biasZ(bca->fwd_mqs, bca->rev_mqs,
+                                       bca->nqual, 0, 0);
+    }
 
 #if CDF_MWU_TESTS
     // CDF version of MWU tests is not calculated by default
@@ -734,7 +967,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
     call->mwu_mqs_cdf = calc_mwu_bias_cdf(bca->fwd_mqs, bca->rev_mqs, bca->nqual);
 #endif
 
-    if ( bca->fmt_flag & B2B_INFO_VDB ) 
+    if ( bca->fmt_flag & B2B_INFO_VDB )
         call->vdb = calc_vdb(bca->alt_pos, bca->npos);
 
     return 0;
@@ -821,10 +1054,32 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
 
     if ( bc->vdb != HUGE_VAL )      bcf_update_info_float(hdr, rec, "VDB", &bc->vdb, 1);
     if ( bc->seg_bias != HUGE_VAL ) bcf_update_info_float(hdr, rec, "SGB", &bc->seg_bias, 1);
-    if ( bc->mwu_pos != HUGE_VAL )  bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
-    if ( bc->mwu_mq != HUGE_VAL )   bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
-    if ( bc->mwu_mqs != HUGE_VAL )  bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
-    if ( bc->mwu_bq != HUGE_VAL )   bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
+
+    if (bca->fmt_flag & B2B_INFO_ZSCORE) {
+        if ( bc->mwu_pos != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "RPBZ", &bc->mwu_pos, 1);
+        if ( bc->mwu_mq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQBZ", &bc->mwu_mq, 1);
+        if ( bc->mwu_mqs != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQSBZ", &bc->mwu_mqs, 1);
+        if ( bc->mwu_bq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "BQBZ", &bc->mwu_bq, 1);
+        if ( bc->mwu_sc != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "SCBZ", &bc->mwu_sc, 1);
+    } else {
+        if ( bc->mwu_pos != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
+        if ( bc->mwu_mq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
+        if ( bc->mwu_mqs != HUGE_VAL )
+             bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
+        if ( bc->mwu_bq != HUGE_VAL )
+            bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
+    }
+
+    if ( bc->strand_bias != HUGE_VAL )
+        bcf_update_info_float(hdr, rec, "FS", &bc->strand_bias, 1);
+
 #if CDF_MWU_TESTS
     if ( bc->mwu_pos_cdf != HUGE_VAL )  bcf_update_info_float(hdr, rec, "RPB2", &bc->mwu_pos_cdf, 1);
     if ( bc->mwu_mq_cdf != HUGE_VAL )   bcf_update_info_float(hdr, rec, "MQB2", &bc->mwu_mq_cdf, 1);
@@ -886,6 +1141,8 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
     }
     if ( fmt_flag&B2B_FMT_SCR )
         bcf_update_format_int32(hdr, rec, "SCR", bc->SCR+1, rec->n_sample);
+    if ( fmt_flag&B2B_FMT_QS )
+        bcf_update_format_int32(hdr, rec, "QS", bc->QS, rec->n_sample*rec->n_allele);
 
     return 0;
 }
diff --git a/bcftools/bam2bcf.h b/bcftools/bam2bcf.h
index 2d2cf83..e8b0fb9 100644
--- a/bcftools/bam2bcf.h
+++ b/bcftools/bam2bcf.h
@@ -1,7 +1,7 @@
 /*  bam2bcf.h -- variant calling.
 
     Copyright (C) 2010-2012 Broad Institute.
-    Copyright (C) 2012-2014,2016 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -59,21 +59,36 @@ DEALINGS IN THE SOFTWARE.  */
 #define B2B_FMT_SCR     (1<<13)
 #define B2B_INFO_VDB    (1<<14)
 #define B2B_INFO_RPB    (1<<15)
+#define B2B_FMT_QS      (1<<16)
+#define B2B_INFO_SCB    (1<<17)
+#define B2B_INFO_ZSCORE (1<<30) // MWU as-is or Z-normalised
 
 #define B2B_MAX_ALLELES 5
 
+#define B2B_DROP      0
+#define B2B_INC_AD    1
+#define B2B_INC_AD0   2
+
 #define PLP_HAS_SOFT_CLIP(i) ((i)&1)
-#define PLP_SAMPLE_ID(i)     ((i)>>1)
+#define PLP_HAS_INDEL(i)     ((i)&2)
+#define PLP_SAMPLE_ID(i)     ((i)>>2)
+
+#define PLP_SET_SOFT_CLIP(i)     ((i)|=1)
+#define PLP_SET_INDEL(i)         ((i)|=2)
+#define PLP_SET_SAMPLE_ID(i,n)   ((i)|=(n)<<2)
 
 typedef struct __bcf_callaux_t {
-    int fmt_flag;
-    int capQ, min_baseQ;
+    int fmt_flag, ambig_reads;
+    int capQ, min_baseQ, max_baseQ, delta_baseQ;
     int openQ, extQ, tandemQ; // for indels
     uint32_t min_support, max_support; // for collecting indel candidates
     double min_frac; // for collecting indel candidates
     float max_frac; // for collecting indel candidates
     int per_sample_flt; // indel filtering strategy
     int *ref_pos, *alt_pos, npos, *ref_mq, *alt_mq, *ref_bq, *alt_bq, *fwd_mqs, *rev_mqs, nqual; // for bias tests
+    int *iref_pos, *ialt_pos, *iref_mq, *ialt_mq; // for indels
+    int ref_scl[100], alt_scl[100];   // soft-clip length bias; SNP
+    int iref_scl[100], ialt_scl[100]; // soft-clip length bias; INDEL
     // for internal uses
     int max_bases;
     int indel_types[4];     // indel lengths
@@ -83,14 +98,14 @@ typedef struct __bcf_callaux_t {
     uint16_t *bases;        // 5bit: unused, 6:quality, 1:is_rev, 4:2-bit base or indel allele (index to bcf_callaux_t.indel_types)
     errmod_t *e;
     void *rghash;
+    float indel_bias;  // adjusts indel score threshold; lower => call more.
 } bcf_callaux_t;
 
 // per-sample values
 typedef struct {
-    uint32_t ori_depth;
+    uint32_t ori_depth;     // ori_depth = anno[0..3] but before --min-BQ is applied
     unsigned int mq0;
-    int32_t *ADF, *ADR, SCR;
-    float qsum[4];
+    int32_t *ADF, *ADR, SCR, *QS;   // FMT/QS
     // The fields are:
     //      depth fwd   .. ref (0) and non-ref (2)
     //      depth rev   .. ref (1) and non-ref (3)
@@ -112,19 +127,20 @@ typedef struct {
     int tid, pos;
     bcf_hdr_t *bcf_hdr;
     int a[5]; // alleles: ref, alt, alt2, alt3
-    float qsum[5];  // for the QS tag
+    float qsum[B2B_MAX_ALLELES];  // INFO/QS tag
     int n, n_alleles, shift, ori_ref, unseen;
     int n_supp; // number of supporting non-reference reads
     double anno[16];
     unsigned int depth, ori_depth, mq0;
-    int32_t *PL, *DP4, *ADR, *ADF, *SCR;
+    int32_t *PL, *DP4, *ADR, *ADF, *SCR, *QS;
     uint8_t *fmt_arr;
     float vdb; // variant distance bias
-    float mwu_pos, mwu_mq, mwu_bq, mwu_mqs;
+    float mwu_pos, mwu_mq, mwu_bq, mwu_mqs, mwu_sc;
 #if CDF_MWU_TESTS
     float mwu_pos_cdf, mwu_mq_cdf, mwu_bq_cdf, mwu_mqs_cdf;
 #endif
     float seg_bias;
+    float strand_bias; // phred-scaled fisher-exact test
     kstring_t tmp;
 } bcf_call_t;
 
@@ -132,7 +148,8 @@ typedef struct {
 extern "C" {
 #endif
 
-    bcf_callaux_t *bcf_call_init(double theta, int min_baseQ);
+    bcf_callaux_t *bcf_call_init(double theta, int min_baseQ, int max_baseQ,
+                                 int delta_baseQ);
     void bcf_call_destroy(bcf_callaux_t *bca);
     int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r);
     int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call);
diff --git a/bcftools/bam2bcf_indel.c b/bcftools/bam2bcf_indel.c
index 6c367da..facb3bf 100644
--- a/bcftools/bam2bcf_indel.c
+++ b/bcftools/bam2bcf_indel.c
@@ -1,7 +1,7 @@
 /*  bam2bcf_indel.c -- indel caller.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012-2014,2016 Genome Research Ltd.
+    Copyright (C) 2012-2014,2016-2017, 2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -26,19 +26,29 @@ DEALINGS IN THE SOFTWARE.  */
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <math.h>
 #include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/khash_str2int.h>
 #include "bam2bcf.h"
+#include "str_finder.h"
 
 #include <htslib/ksort.h>
 KSORT_INIT_GENERIC(uint32_t)
 
 #define MINUS_CONST 0x10000000
-#define INDEL_WINDOW_SIZE 50
+#define INDEL_WINDOW_SIZE 110
 
+#define MAX_TYPES 64
+
+// Take a reference position tpos and convert to a query position (returned).
+// This uses the CIGAR string plus alignment c->pos to do the mapping.
+//
+// *_tpos is returned as tpos if query overlaps tpos, but for deletions
+// it'll be either the start (is_left) or end (!is_left) ref position.
 static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
 {
+    // x = pos in ref, y = pos in query seq
     int k, x = c->pos, y = 0, last_y = 0;
     *_tpos = c->pos;
     for (k = 0; k < c->n_cigar; ++k) {
@@ -64,6 +74,7 @@ static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos,
     *_tpos = x;
     return last_y;
 }
+
 // FIXME: check if the inserted sequence is consistent with the homopolymer run
 // l is the relative gap length and l_run is the length of the homopolymer on the reference
 static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run)
@@ -87,21 +98,609 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
     return max_i - pos;
 }
 
+// Identify spft-clip length, position in seq, and clipped seq len
+static inline void get_pos(const bcf_callaux_t *bca, bam_pileup1_t *p,
+                           int *sc_len_r, int *slen_r, int *epos_r, int *end) {
+    bam1_t *b = p->b;
+    int sc_len = 0, sc_dist = -1, at_left = 1;
+    int epos = p->qpos, slen = b->core.l_qseq;
+    int k;
+    uint32_t *cigar = bam_get_cigar(b);
+    *end = -1;
+    for (k = 0; k < b->core.n_cigar; k++) {
+        int op = bam_cigar_op(cigar[k]);
+        if (op == BAM_CSOFT_CLIP) {
+            slen -= bam_cigar_oplen(cigar[k]);
+            if (at_left) {
+                // left end
+                sc_len += bam_cigar_oplen(cigar[k]);
+                epos -= sc_len; // don't count SC in seq pos
+                sc_dist = epos;
+                *end = 0;
+            } else {
+                // right end
+                int srlen = bam_cigar_oplen(cigar[k]);
+                int rd = b->core.l_qseq - srlen - p->qpos;
+                if (sc_dist < 0 || sc_dist > rd) {
+                    // closer to right end than left
+                    // FIXME: compensate for indel length too?
+                    sc_dist = rd;
+                    sc_len = srlen;
+                    *end = 1;
+                }
+            }
+        } else if (op != BAM_CHARD_CLIP) {
+            at_left = 0;
+        }
+    }
+
+    if (p->indel > 0 && slen - (epos+p->indel) < epos)
+        epos += p->indel-1; // end of insertion, if near end of seq
+
+    // slen is now length of sequence minus soft-clips and
+    // epos is position of indel in seq minus left-clip.
+    *epos_r = (double)epos / (slen+1) * bca->npos;
+
+    if (sc_len) {
+        // scale importance of clip by distance to closest end
+        *sc_len_r = 15.0*sc_len / (sc_dist+1);
+        if (*sc_len_r > 99) *sc_len_r = 99;
+    } else {
+        *sc_len_r = 0;
+    }
+
+    *slen_r = slen;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Scans the pileup to identify all the different sizes of indels
+// present.
+//
+// Returns types and fills out n_types_r,  max_rd_len_r and ref_type_r,
+//         or NULL on error.
+static int *bcf_cgp_find_types(int n, int *n_plp, bam_pileup1_t **plp,
+                               int pos, bcf_callaux_t *bca, const char *ref,
+                               int *max_rd_len_r, int *n_types_r,
+                               int *ref_type_r, int *N_r) {
+    int i, j, t, s, N, m, max_rd_len, n_types;
+    int n_alt = 0, n_tot = 0, indel_support_ok = 0;
+    uint32_t *aux;
+    int *types;
+
+    // N is the total number of reads
+    for (s = N = 0; s < n; ++s)
+        N += n_plp[s];
+
+    bca->max_support = bca->max_frac = 0;
+    aux = (uint32_t*) calloc(N + 1, 4);
+    if (!aux)
+        return NULL;
+
+    m = max_rd_len = 0;
+    aux[m++] = MINUS_CONST; // zero indel is always a type (REF)
+
+    // Fill out aux[] array with all the non-zero indel sizes.
+    // Also tally number with indels (n_alt) and total (n_tot).
+    for (s = 0; s < n; ++s) {
+        int na = 0, nt = 0;
+        for (i = 0; i < n_plp[s]; ++i) {
+            const bam_pileup1_t *p = plp[s] + i;
+            ++nt;
+            if (p->indel != 0) {
+                ++na;
+                aux[m++] = MINUS_CONST + p->indel;
+            }
+
+            // FIXME: cache me in pileup struct.
+            j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
+            if (j > max_rd_len) max_rd_len = j;
+        }
+        double frac = (double)na/nt;
+        if ( !indel_support_ok && na >= bca->min_support
+             && frac >= bca->min_frac )
+            indel_support_ok = 1;
+        if ( na > bca->max_support && frac > 0 )
+            bca->max_support = na, bca->max_frac = frac;
+
+        n_alt += na;
+        n_tot += nt;
+    }
+
+    // Sort aux[] and dedup
+    ks_introsort(uint32_t, m, aux);
+    for (i = 1, n_types = 1; i < m; ++i)
+        if (aux[i] != aux[i-1]) ++n_types;
+
+    // Taking totals makes it hard to call rare indels (IMF filter)
+    if ( !bca->per_sample_flt )
+        indel_support_ok = ( (double)n_alt / n_tot < bca->min_frac
+                             || n_alt < bca->min_support )
+            ? 0 : 1;
+    if ( n_types == 1 || !indel_support_ok ) { // then skip
+        free(aux);
+        return NULL;
+    }
+
+    // Bail out if we have far too many types of indel
+    if (n_types >= MAX_TYPES) {
+        free(aux);
+        // TODO revisit how/whether to control printing this warning
+        if (hts_verbose >= 2)
+            fprintf(stderr, "[%s] excessive INDEL alleles at position %d. "
+                    "Skip the position.\n", __func__, pos + 1);
+        return NULL;
+    }
+
+    // To prevent long stretches of N's to be mistaken for indels
+    // (sometimes thousands of bases), check the number of N's in the
+    // sequence and skip places where half or more reference bases are Ns.
+    int nN=0, i_end = pos + (2*INDEL_WINDOW_SIZE < max_rd_len
+                            ?2*INDEL_WINDOW_SIZE : max_rd_len);
+    for (i=pos; i<i_end && ref[i]; i++)
+        nN += ref[i] == 'N';
+    if ( nN*2>(i-pos) ) {
+        free(aux);
+        return NULL;
+    }
+
+    // Finally fill out the types[] array detailing the size of insertion
+    // or deletion.
+    types = (int*)calloc(n_types, sizeof(int));
+    if (!types) {
+        free(aux);
+        return NULL;
+    }
+    t = 0;
+    types[t++] = aux[0] - MINUS_CONST;
+    for (i = 1; i < m; ++i)
+        if (aux[i] != aux[i-1])
+            types[t++] = aux[i] - MINUS_CONST;
+    free(aux);
+
+    // Find reference type; types[?] == 0)
+    for (t = 0; t < n_types; ++t)
+        if (types[t] == 0) break;
+
+    *ref_type_r   = t;
+    *n_types_r    = n_types;
+    *max_rd_len_r = max_rd_len;
+    *N_r          = N;
+
+    return types;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Construct per-sample consensus.
+//
+// Returns an array of consensus seqs,
+//         or NULL on failure.
+static char **bcf_cgp_ref_sample(int n, int *n_plp, bam_pileup1_t **plp,
+                                 int pos, bcf_callaux_t *bca, const char *ref,
+                                 int left, int right) {
+    int i, k, s, L = right - left + 1, max_i, max2_i;
+    char **ref_sample; // returned
+    uint32_t *cns = NULL, max, max2;
+    char *ref0 = NULL, *r;
+    ref_sample = (char**) calloc(n, sizeof(char*));
+    cns = (uint32_t*) calloc(L, 4);
+    ref0 = (char*) calloc(L, 1);
+    if (!ref_sample || !cns || !ref0) {
+        n = 0;
+        goto err;
+    }
+
+    // Convert ref ASCII to 0-15.
+    for (i = 0; i < right - left; ++i)
+        ref0[i] = seq_nt16_table[(int)ref[i+left]];
+
+    // NB: one consensus per sample 'n', not per indel type.
+    // FIXME: consider fixing this.  We should compute alignments vs
+    // types, not vs samples?  Or types/sample combined?
+    for (s = 0; s < n; ++s) {
+        r = ref_sample[s] = (char*) calloc(L, 1);
+        if (!r) {
+            n = s-1;
+            goto err;
+        }
+
+        memset(cns, 0, sizeof(int) * L);
+
+        // collect ref and non-ref counts in cns
+        for (i = 0; i < n_plp[s]; ++i) {
+            bam_pileup1_t *p = plp[s] + i;
+            bam1_t *b = p->b;
+            uint32_t *cigar = bam_get_cigar(b);
+            uint8_t *seq = bam_get_seq(b);
+            int x = b->core.pos, y = 0;
+
+            // TODO: pileup exposes pileup_ind, but we also need e.g.
+            // pileup_len to know how much of the current CIGAR op-len
+            // we've used (or have remaining).  If we had that, we
+            // could start at p->qpos without having to scan through
+            // the entire CIGAR string until we find it.
+            //
+            // Without it about all we could do is have a side channel
+            // to cache the last known coords.  Messy, so punt for now.
+            // This is no longer the bottle neck until we get to 1000s of
+            // CIGAR ops.
+
+            for (k = 0; k < b->core.n_cigar; ++k) {
+                int op = cigar[k]&0xf;
+                int j, l = cigar[k]>>4;
+                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+                    if (x + l >= left) {
+                        j = left - x > 0 ? left - x : 0;
+                        int j_end = right - x < l ? right - x : l;
+                        for (; j < j_end; j++)
+                            // Append to cns.  Note this is ref coords,
+                            // so insertions aren't in cns and deletions
+                            // will have lower coverage.
+
+                            // FIXME: want true consensus (with ins) per
+                            // type, so we can independently compare each
+                            // seq to each consensus and see which it
+                            // matches best, so we get proper GT analysis.
+                            cns[x+j-left] +=
+                                (bam_seqi(seq, y+j) == ref0[x+j-left])
+                                ? 1        // REF
+                                : (1<<16); // ALT
+                    }
+                    x += l; y += l;
+                } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
+                    x += l;
+                } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
+                    y += l;
+                }
+
+                if (x > right)
+                    break;
+            }
+        }
+
+        // Determine a sample specific reference.
+        for (i = 0; i < right - left; ++i)
+            r[i] = ref0[i];
+
+        // Find deepest and 2nd deepest ALT region (max & max2).
+        max = max2 = 0; max_i = max2_i = -1;
+        for (i = 0; i < right - left; ++i) {
+            if (cns[i]>>16 >= max>>16)
+                max2 = max, max2_i = max_i, max = cns[i], max_i = i;
+            else if (cns[i]>>16 >= max2>>16)
+                max2 = cns[i], max2_i = i;
+        }
+
+        // Masks mismatches present in at least 70% of the reads with 'N'.
+        // This code is nREF/(nREF+n_ALT) >= 70% for deepest region.
+        // The effect is that at least 30% of bases differing to REF will
+        // use "N" in consensus, so we don't penalise ALT or REF when
+        // aligning against it.  (A poor man IUPAC code)
+        //
+        // Why is it only done in two loci at most?
+        if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7)
+            max_i = -1;
+        if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7)
+            max2_i = -1;
+        if (max_i >= 0) r[max_i] = 15;
+        if (max2_i >= 0) r[max2_i] = 15;
+
+        //for (i = 0; i < right - left; ++i)
+        //    fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], stderr);
+        //fputc('\n', stderr);
+    }
+
+    free(ref0);
+    free(cns);
+
+    return ref_sample;
+
+ err:
+    free(ref0);
+    free(cns);
+    if (ref_sample) {
+        for (s = 0; s < n; s++)
+            free(ref_sample[s]);
+        free(ref_sample);
+    }
+
+    return NULL;
+}
+
+// The length of the homopolymer run around the current position
+static int bcf_cgp_l_run(const char *ref, int pos) {
+    int i, l_run;
+
+    int c = seq_nt16_table[(int)ref[pos + 1]];
+    if (c == 15) {
+        l_run = 1;
+    } else {
+        for (i = pos + 2; ref[i]; ++i)
+            if (seq_nt16_table[(int)ref[i]] != c) break;
+        l_run = i;
+        for (i = pos; i >= 0; --i)
+            if (seq_nt16_table[(int)ref[i]] != c) break;
+        l_run -= i + 1;
+    }
+
+    return l_run;
+}
+
+
+// Compute the consensus for this sample 's', minus indels which
+// get added later.
+static char *bcf_cgp_calc_cons(int n, int *n_plp, bam_pileup1_t **plp,
+                               int pos, int *types, int n_types,
+                               int max_ins, int s) {
+    int i, j, t, k;
+    int *inscns_aux = (int*)calloc(5 * n_types * max_ins, sizeof(int));
+    if (!inscns_aux)
+        return NULL;
+
+    // Count the number of occurrences of each base at each position for
+    // each type of insertion.
+    for (t = 0; t < n_types; ++t) {
+        if (types[t] > 0) {
+            for (s = 0; s < n; ++s) {
+                for (i = 0; i < n_plp[s]; ++i) {
+                    bam_pileup1_t *p = plp[s] + i;
+                    if (p->indel == types[t]) {
+                        uint8_t *seq = bam_get_seq(p->b);
+                        for (k = 1; k <= p->indel; ++k) {
+                            int c = seq_nt16_int[bam_seqi(seq, p->qpos + k)];
+                            assert(c<5);
+                            ++inscns_aux[(t*max_ins+(k-1))*5 + c];
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Use the majority rule to construct the consensus
+    char *inscns = (char *)calloc(n_types * max_ins, 1);
+    for (t = 0; t < n_types; ++t) {
+        for (j = 0; j < types[t]; ++j) {
+            int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
+            for (k = 0; k < 5; ++k)
+                if (ia[k] > max)
+                    max = ia[k], max_k = k;
+            inscns[t*max_ins + j] = max ? max_k : 4;
+            if (max_k == 4) {
+                // discard insertions which contain N's
+                types[t] = 0;
+                break;
+            }
+        }
+    }
+    free(inscns_aux);
+
+    return inscns;
+}
+
+#ifndef MIN
+#  define MIN(a,b) ((a)<(b)?(a):(b))
+#endif
+
+// Part of bcf_call_gap_prep.
+//
+// Realign using BAQ to get an alignment score of a single read vs
+// a haplotype consensus.
+//
+// Fills out score
+// Returns 0 on success,
+//        <0 on error
+static int bcf_cgp_align_score(bam_pileup1_t *p, bcf_callaux_t *bca,
+                               int type, uint8_t *ref2, uint8_t *query,
+                               int r_start, int r_end, int long_read,
+                               int tbeg, int tend,
+                               int left, int right,
+                               int qbeg, int qend,
+                               int qpos, int max_deletion,
+                               int *score) {
+    // Illumina
+    probaln_par_t apf = { 1e-4, 1e-2, 10 };
+
+    // Parameters that work better on PacBio CCS 15k.
+    // We should consider querying the header and RG PU field.
+    // See also htslib/realn.c:sam_prob_realn()
+    if (long_read) {
+        apf.d = 1e-3;
+        apf.e = 1e-1;
+    }
+
+    type = abs(type);
+    apf.bw = type + 3;
+    int l, sc;
+    const uint8_t *qual = bam_get_qual(p->b), *bq;
+    uint8_t *qq;
+
+    // Get segment of quality, either ZQ tag or if absent QUAL.
+    if (!(qq = (uint8_t*) calloc(qend - qbeg, 1)))
+        return -1;
+    bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
+    if (bq) ++bq; // skip type
+    for (l = qbeg; l < qend; ++l) {
+        int qval = bq? qual[l] + (bq[l] - 64) : qual[l];
+        if (qval > 30)
+            qval = 30;
+        if (qval < 7)
+            qval = 7;
+        qq[l - qbeg] = qval;
+    }
+
+    // The bottom 8 bits are length-normalised score while
+    // the top bits are unnormalised.
+    sc = probaln_glocal(ref2 + tbeg - left, tend - tbeg + type,
+                        query, qend - qbeg, qq, &apf, 0, 0);
+    if (sc < 0) {
+        *score = 0xffffff;
+        free(qq);
+        return 0;
+    }
+
+    // used for adjusting indelQ below
+    l = (int)(100. * sc / (qend - qbeg) + .499) * bca->indel_bias;
+    *score = sc<<8 | MIN(255, l);
+
+    rep_ele *reps, *elt, *tmp;
+    uint8_t *seg = ref2 + tbeg - left;
+    int seg_len = tend - tbeg + type;
+
+    // Note: although seg moves (tbeg varies), ref2 is reused many times
+    // so we could factor out some find_STR calls.  However it's not the
+    // bottleneck for now.
+
+    // FIXME: need to make this work on IUPAC.
+    reps = find_STR((char *)seg, seg_len, 0);
+    int iscore = 0;
+
+    // Identify STRs in ref covering the indel up to
+    // (or close to) the end of the sequence.
+    // Those having an indel and right at the sequence
+    // end do not confirm the total length of indel
+    // size.  Specifically a *lack* of indel at the
+    // end, where we know indels occur in other
+    // sequences, is a possible reference bias.
+    //
+    // This is emphasised further if the sequence ends with
+    // soft clipping.
+    DL_FOREACH_SAFE(reps, elt, tmp) {
+        if (elt->start <= qpos && elt->end >= qpos) {
+            iscore += (elt->end-elt->start) / elt->rep_len;  // c
+            if (elt->start+tbeg <= r_start ||
+                elt->end+tbeg   >= r_end)
+                iscore += 2*(elt->end-elt->start);
+       }
+
+        DL_DELETE(reps, elt);
+        free(elt);
+    }
+
+    // Apply STR score to existing indelQ
+    l  =  (*score&0xff)*.8 + iscore*2;
+    *score = (*score & ~0xff) | MIN(255, l);
+
+    free(qq);
+
+    return 0;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Returns n_alt on success
+//         -1 on failure
+static int bcf_cgp_compute_indelQ(int n, int *n_plp, bam_pileup1_t **plp,
+                                  bcf_callaux_t *bca, char *inscns,
+                                  int l_run, int max_ins,
+                                  int ref_type, int *types, int n_types,
+                                  int *score) {
+    // FIXME: n_types has a maximum; no need to alloc - use a #define?
+    int sc[MAX_TYPES], sumq[MAX_TYPES], s, i, j, t, K, n_alt, tmp;
+    memset(sumq, 0, n_types * sizeof(int));
+    for (s = K = 0; s < n; ++s) {
+        for (i = 0; i < n_plp[s]; ++i, ++K) {
+            bam_pileup1_t *p = plp[s] + i;
+            int *sct = &score[K*n_types], seqQ, indelQ;
+            for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
+            for (t = 1; t < n_types; ++t) // insertion sort
+                for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
+                    tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
+
+            /* errmod_cal() assumes that if the call is wrong, the
+             * likelihoods of other events are equal. This is about
+             * right for substitutions, but is not desired for
+             * indels. To reuse errmod_cal(), I have to make
+             * compromise for multi-allelic indels.
+             */
+            if ((sc[0]&0x3f) == ref_type) {
+                indelQ = (sc[1]>>14) - (sc[0]>>14);
+                seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
+            } else {
+                for (t = 0; t < n_types; ++t) // look for the reference type
+                    if ((sc[t]&0x3f) == ref_type) break;
+                indelQ = (sc[t]>>14) - (sc[0]>>14);
+                seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
+            }
+            tmp = sc[0]>>6 & 0xff;
+            // reduce indelQ
+            indelQ = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ + .499);
+
+            // Doesn't really help accuracy, but permits -h to take
+            // affect still.
+            if (indelQ > seqQ) indelQ = seqQ;
+            if (indelQ > 255) indelQ = 255;
+            if (seqQ > 255) seqQ = 255;
+            p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
+            sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
+            //              fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
+        }
+    }
+    // determine bca->indel_types[] and bca->inscns
+    bca->maxins = max_ins;
+    bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
+    if (bca->maxins && !bca->inscns)
+        return -1;
+    for (t = 0; t < n_types; ++t)
+        sumq[t] = sumq[t]<<6 | t;
+    for (t = 1; t < n_types; ++t) // insertion sort
+        for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j)
+            tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
+    for (t = 0; t < n_types; ++t) // look for the reference type
+        if ((sumq[t]&0x3f) == ref_type) break;
+    if (t) { // then move the reference type to the first
+        tmp = sumq[t];
+        for (; t > 0; --t) sumq[t] = sumq[t-1];
+        sumq[0] = tmp;
+    }
+    for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
+    for (t = 0; t < 4 && t < n_types; ++t) {
+        bca->indel_types[t] = types[sumq[t]&0x3f];
+        if (bca->maxins)
+            memcpy(&bca->inscns[t * bca->maxins],
+                   &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
+    }
+    // update p->aux
+    for (s = n_alt = 0; s < n; ++s) {
+        for (i = 0; i < n_plp[s]; ++i) {
+            bam_pileup1_t *p = plp[s] + i;
+            int x = types[p->aux>>16&0x3f];
+            for (j = 0; j < 4; ++j)
+                if (x == bca->indel_types[j]) break;
+            p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
+            if ((p->aux>>16&0x3f) > 0) ++n_alt;
+            //fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam_get_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
+        }
+    }
+
+    return n_alt;
+}
+
+/*
+FIXME: with high number of samples, do we handle IMF correctly?  Is it
+fraction of indels across entire data set, or just fraction for this
+specific sample? Needs to check bca->per_sample_flt (--per-sample-mF) opt.
+ */
+
 /*
     notes:
-        - n .. number of samples
-        - the routine sets bam_pileup1_t.aux of each read as follows:
-            - 6: unused
-            - 6: the call; index to bcf_callaux_t.indel_types   .. (aux>>16)&0x3f
-            - 8: estimated sequence quality                     .. (aux>>8)&0xff
-            - 8: indel quality                                  .. aux&0xff
+    - n .. number of samples
+    - the routine sets bam_pileup1_t.aux of each read as follows:
+        - 6: unused
+        - 6: the call; index to bcf_callaux_t.indel_types   .. (aux>>16)&0x3f
+        - 8: estimated sequence quality                     .. (aux>>8)&0xff
+        - 8: indel quality                                  .. aux&0xff
  */
-int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
+int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
+                      bcf_callaux_t *bca, const char *ref)
 {
-    int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
+    if (ref == 0 || bca == 0) return -1;
+
+    int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins;
+    int *score, max_ref2;
     int N, K, l_run, ref_type, n_alt;
     char *inscns = 0, *ref2, *query, **ref_sample;
-    if (ref == 0 || bca == 0) return -1;
 
     // determine if there is a gap
     for (s = N = 0; s < n; ++s) {
@@ -109,77 +708,29 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
             if (plp[s][i].indel != 0) break;
         if (i < n_plp[s]) break;
     }
-    if (s == n) return -1; // there is no indel at this position.
-    for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads
-    { // find out how many types of indels are present
-        bca->max_support = bca->max_frac = 0;
-        int m, n_alt = 0, n_tot = 0, indel_support_ok = 0;
-        uint32_t *aux;
-        aux = (uint32_t*) calloc(N + 1, 4);
-        m = max_rd_len = 0;
-        aux[m++] = MINUS_CONST; // zero indel is always a type
-        for (s = 0; s < n; ++s) {
-            int na = 0, nt = 0;
-            for (i = 0; i < n_plp[s]; ++i) {
-                const bam_pileup1_t *p = plp[s] + i;
-                ++nt;
-                if (p->indel != 0) {
-                    ++na;
-                    aux[m++] = MINUS_CONST + p->indel;
-                }
-                j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
-                if (j > max_rd_len) max_rd_len = j;
-            }
-            double frac = (double)na/nt;
-            if ( !indel_support_ok && na >= bca->min_support && frac >= bca->min_frac )
-                indel_support_ok = 1;
-            if ( na > bca->max_support && frac > 0 ) bca->max_support = na, bca->max_frac = frac;
-            n_alt += na;
-            n_tot += nt;
-        }
-        // To prevent long stretches of N's to be mistaken for indels (sometimes thousands of bases),
-        //  check the number of N's in the sequence and skip places where half or more reference bases are Ns.
-        int nN=0; for (i=pos; i-pos<max_rd_len && ref[i]; i++) if ( ref[i]=='N' ) nN++;
-        if ( nN*2>(i-pos) ) { free(aux); return -1; }
-
-        ks_introsort(uint32_t, m, aux);
-        // squeeze out identical types
-        for (i = 1, n_types = 1; i < m; ++i)
-            if (aux[i] != aux[i-1]) ++n_types;
-        // Taking totals makes it hard to call rare indels
-        if ( !bca->per_sample_flt )
-            indel_support_ok = ( (double)n_alt / n_tot < bca->min_frac || n_alt < bca->min_support ) ? 0 : 1;
-        if ( n_types == 1 || !indel_support_ok ) { // then skip
-            free(aux); return -1;
-        }
-        if (n_types >= 64) {
-            free(aux);
-            // TODO revisit how/whether to control printing this warning
-            if (hts_verbose >= 2)
-                fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
-            return -1;
-        }
-        types = (int*)calloc(n_types, sizeof(int));
-        t = 0;
-        types[t++] = aux[0] - MINUS_CONST;
-        for (i = 1; i < m; ++i)
-            if (aux[i] != aux[i-1])
-                types[t++] = aux[i] - MINUS_CONST;
-        free(aux);
-        for (t = 0; t < n_types; ++t)
-            if (types[t] == 0) break;
-        ref_type = t; // the index of the reference type (0)
-    }
-    { // calculate left and right boundary
-        left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
-        right = pos + INDEL_WINDOW_SIZE;
-        if (types[0] < 0) right -= types[0];
-        // in case the alignments stand out the reference
-        for (i = pos; i < right; ++i)
-            if (ref[i] == 0) break;
-        right = i;
-    }
-    /* The following block fixes a long-existing flaw in the INDEL
+    if (s == n)
+        // there is no indel at this position.
+        return -1;
+
+    // find out how many types of indels are present
+    types = bcf_cgp_find_types(n, n_plp, plp, pos, bca, ref,
+                               &max_rd_len, &n_types, &ref_type, &N);
+    if (!types)
+        return -1;
+
+
+    // calculate left and right boundary
+    left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
+    right = pos + INDEL_WINDOW_SIZE;
+    if (types[0] < 0) right -= types[0];
+
+    // in case the alignments stand out the reference
+    for (i = pos; i < right; ++i)
+        if (ref[i] == 0) break;
+    right = i;
+
+
+    /* The following call fixes a long-existing flaw in the INDEL
      * calling model: the interference of nearby SNPs. However, it also
      * reduces the power because sometimes, substitutions caused by
      * indels are not distinguishable from true mutations. Multiple
@@ -187,284 +738,211 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
      *
      * Masks mismatches present in at least 70% of the reads with 'N'.
      */
-    { // construct per-sample consensus
-        int L = right - left + 1, max_i, max2_i;
-        uint32_t *cns, max, max2;
-        char *ref0, *r;
-        ref_sample = (char**) calloc(n, sizeof(char*));
-        cns = (uint32_t*) calloc(L, 4);
-        ref0 = (char*) calloc(L, 1);
-        for (i = 0; i < right - left; ++i)
-            ref0[i] = seq_nt16_table[(int)ref[i+left]];
-        for (s = 0; s < n; ++s) {
-            r = ref_sample[s] = (char*) calloc(L, 1);
-            memset(cns, 0, sizeof(int) * L);
-            // collect ref and non-ref counts
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                bam1_t *b = p->b;
-                uint32_t *cigar = bam_get_cigar(b);
-                uint8_t *seq = bam_get_seq(b);
-                int x = b->core.pos, y = 0;
-                for (k = 0; k < b->core.n_cigar; ++k) {
-                    int op = cigar[k]&0xf;
-                    int j, l = cigar[k]>>4;
-                    if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                        for (j = 0; j < l; ++j)
-                            if (x + j >= left && x + j < right)
-                                cns[x+j-left] += (bam_seqi(seq, y+j) == ref0[x+j-left])? 1 : 0x10000;
-                        x += l; y += l;
-                    } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
-                    else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
-                }
-            }
-            // determine the consensus
-            for (i = 0; i < right - left; ++i) r[i] = ref0[i];
-            max = max2 = 0; max_i = max2_i = -1;
-            for (i = 0; i < right - left; ++i) {
-                if (cns[i]>>16 >= max>>16) max2 = max, max2_i = max_i, max = cns[i], max_i = i;
-                else if (cns[i]>>16 >= max2>>16) max2 = cns[i], max2_i = i;
-            }
-            if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7) max_i = -1;
-            if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1;
-            if (max_i >= 0) r[max_i] = 15;
-            if (max2_i >= 0) r[max2_i] = 15;
-            //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], stderr); fputc('\n', stderr);
-        }
-        free(ref0); free(cns);
-    }
-    { // the length of the homopolymer run around the current position
-        int c = seq_nt16_table[(int)ref[pos + 1]];
-        if (c == 15) l_run = 1;
-        else {
-            for (i = pos + 2; ref[i]; ++i)
-                if (seq_nt16_table[(int)ref[i]] != c) break;
-            l_run = i;
-            for (i = pos; i >= 0; --i)
-                if (seq_nt16_table[(int)ref[i]] != c) break;
-            l_run -= i + 1;
-        }
-    }
-    // construct the consensus sequence
+    ref_sample = bcf_cgp_ref_sample(n, n_plp, plp, pos, bca, ref, left, right);
+
+    // The length of the homopolymer run around the current position
+    l_run = bcf_cgp_l_run(ref, pos);
+
+    // construct the consensus sequence (minus indels, which are added later)
     max_ins = types[n_types - 1];   // max_ins is at least 0
     if (max_ins > 0) {
-        int *inscns_aux = (int*) calloc(5 * n_types * max_ins, sizeof(int));
-        // count the number of occurrences of each base at each position for each type of insertion
-        for (t = 0; t < n_types; ++t) {
-            if (types[t] > 0) {
-                for (s = 0; s < n; ++s) {
-                    for (i = 0; i < n_plp[s]; ++i) {
-                        bam_pileup1_t *p = plp[s] + i;
-                        if (p->indel == types[t]) {
-                            uint8_t *seq = bam_get_seq(p->b);
-                            for (k = 1; k <= p->indel; ++k) {
-                                int c = seq_nt16_int[bam_seqi(seq, p->qpos + k)];
-                                assert(c<5);
-                                ++inscns_aux[(t*max_ins+(k-1))*5 + c];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        // use the majority rule to construct the consensus
-        inscns = (char*) calloc(n_types * max_ins, 1);
-        for (t = 0; t < n_types; ++t) {
-            for (j = 0; j < types[t]; ++j) {
-                int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
-                for (k = 0; k < 5; ++k)
-                    if (ia[k] > max)
-                        max = ia[k], max_k = k;
-                inscns[t*max_ins + j] = max? max_k : 4;
-                if ( max_k==4 ) { types[t] = 0; break; } // discard insertions which contain N's
-            }
-        }
-        free(inscns_aux);
+        inscns = bcf_cgp_calc_cons(n, n_plp, plp, pos,
+                                   types, n_types, max_ins, s);
+        if (!inscns)
+            return -1;
     }
+
     // compute the likelihood given each type of indel for each read
     max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]);
     ref2  = (char*) calloc(max_ref2, 1);
     query = (char*) calloc(right - left + max_rd_len + max_ins + 2, 1);
-    score1 = (int*) calloc(N * n_types, sizeof(int));
-    score2 = (int*) calloc(N * n_types, sizeof(int));
+    score = (int*) calloc(N * n_types, sizeof(int));
     bca->indelreg = 0;
+    double nqual_over_60 = bca->nqual / 60.0;
+
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
-        apf1.bw = apf2.bw = abs(types[t]) + 3;
+
         // compute indelreg
-        if (types[t] == 0) ir = 0;
-        else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
-        else ir = est_indelreg(pos, ref, -types[t], 0);
-        if (ir > bca->indelreg) bca->indelreg = ir;
-//      fprintf(stderr, "%d, %d, %d\n", pos, types[t], ir);
-        // realignment
+        if (types[t] == 0)
+            ir = 0;
+        else if (types[t] > 0)
+            ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
+        else
+            ir = est_indelreg(pos, ref, -types[t], 0);
+
+        if (ir > bca->indelreg)
+            bca->indelreg = ir;
+
+        // Identify max deletion length
+        int max_deletion = 0;
+        for (s = 0; s < n; ++s) {
+            for (i = 0; i < n_plp[s]; ++i, ++K) {
+                bam_pileup1_t *p = plp[s] + i;
+                if (max_deletion < -p->indel)
+                    max_deletion = -p->indel;
+            }
+        }
+
+        // Realignment score, computed via BAQ
         for (s = K = 0; s < n; ++s) {
-            // write ref2
+            // Construct ref2 from ref_sample, inscns and indels.
+            // This is now the true sample consensus (possibly prepended
+            // and appended with reference if sample data doesn't span
+            // the full length).
             for (k = 0, j = left; j <= pos; ++j)
                 ref2[k++] = seq_nt16_int[(int)ref_sample[s][j-left]];
-            if (types[t] <= 0) j += -types[t];
-            else for (l = 0; l < types[t]; ++l)
-                     ref2[k++] = inscns[t*max_ins + l];
+
+            if (types[t] <= 0)
+                j += -types[t];
+            else
+                for (l = 0; l < types[t]; ++l)
+                    ref2[k++] = inscns[t*max_ins + l];
+
             for (; j < right && ref[j]; ++j)
                 ref2[k++] = seq_nt16_int[(int)ref_sample[s][j-left]];
-            for (; k < max_ref2; ++k) ref2[k] = 4;
-            if (j < right) right = j;
+            for (; k < max_ref2; ++k)
+                ref2[k] = 4;
+
+            if (right > j)
+                right = j;
+
             // align each read to ref2
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
-                int qbeg, qend, tbeg, tend, sc, kk;
+
+                // Some basic ref vs alt stats.
+                int imq = p->b->core.qual > 59 ? 59 : p->b->core.qual;
+                imq *= nqual_over_60;
+
+                int sc_len, slen, epos, sc_end;
+
+                // Only need to gather stats on one type, as it's
+                // identical calculation for all the subsequent ones
+                // and we're sharing the same stats array
+                if (t == 0) {
+                    // Gather stats for INFO field to aid filtering.
+                    // mq and sc_len not very helpful for filtering, but could
+                    // help in assigning a better QUAL value.
+                    //
+                    // Pos is slightly useful.
+                    // Base qual can be useful, but need qual prior to BAQ?
+                    // May need to cache orig quals in aux tag so we can fetch
+                    // them even after mpileup step.
+                    get_pos(bca, p, &sc_len, &slen, &epos, &sc_end);
+
+                    assert(imq >= 0 && imq < bca->nqual);
+                    assert(epos >= 0 && epos < bca->npos);
+                    assert(sc_len >= 0 && sc_len < 100);
+                    if (p->indel) {
+                        bca->ialt_mq[imq]++;
+                        bca->ialt_scl[sc_len]++;
+                        bca->ialt_pos[epos]++;
+                    } else {
+                        bca->iref_mq[imq]++;
+                        bca->iref_scl[sc_len]++;
+                        bca->iref_pos[epos]++;
+                    }
+                }
+
+                int qbeg, qpos, qend, tbeg, tend, kk;
                 uint8_t *seq = bam_get_seq(p->b);
                 uint32_t *cigar = bam_get_cigar(p->b);
-                if (p->b->core.flag&4) continue; // unmapped reads
-                // FIXME: the following loop should be better moved outside; nonetheless, realignment should be much slower anyway.
+                if (p->b->core.flag & BAM_FUNMAP) continue;
+
+                // FIXME: the following loop should be better moved outside;
+                // nonetheless, realignment should be much slower anyway.
                 for (kk = 0; kk < p->b->core.n_cigar; ++kk)
-                    if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) break;
-                if (kk < p->b->core.n_cigar) continue;
-                // FIXME: the following skips soft clips, but using them may be more sensitive.
+                    if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
+                        break;
+                if (kk < p->b->core.n_cigar)
+                    continue;
+
                 // determine the start and end of sequences for alignment
-                qbeg = tpos2qpos(&p->b->core, bam_get_cigar(p->b), left,  0, &tbeg);
-                qend = tpos2qpos(&p->b->core, bam_get_cigar(p->b), right, 1, &tend);
+                // FIXME: loops over CIGAR multiple times
+                int left2 = left, right2 = right;
+                if (p->b->core.l_qseq > 1000) {
+                    // long read data needs less context.  It also tends to
+                    // have many more candidate indels to investigate so
+                    // speed here matters more.
+                    if (pos - left >= INDEL_WINDOW_SIZE)
+                        left2 += INDEL_WINDOW_SIZE/2;
+                    if (right-pos >= INDEL_WINDOW_SIZE)
+                        right2 -= INDEL_WINDOW_SIZE/2;
+                }
+
+                int r_start = p->b->core.pos;
+                int r_end = bam_cigar2rlen(p->b->core.n_cigar,
+                                           bam_get_cigar(p->b))
+                            -1 + r_start;
+
+                qbeg = tpos2qpos(&p->b->core, bam_get_cigar(p->b), left2,
+                                 0, &tbeg);
+                qpos = tpos2qpos(&p->b->core, bam_get_cigar(p->b), pos,
+                                     0, &tend) - qbeg;
+                qend = tpos2qpos(&p->b->core, bam_get_cigar(p->b), right2,
+                                 1, &tend);
+
                 if (types[t] < 0) {
                     int l = -types[t];
                     tbeg = tbeg - l > left?  tbeg - l : left;
                 }
+
                 // write the query sequence
                 for (l = qbeg; l < qend; ++l)
                     query[l - qbeg] = seq_nt16_int[bam_seqi(seq, l)];
-                { // do realignment; this is the bottleneck
-                    const uint8_t *qual = bam_get_qual(p->b), *bq;
-                    uint8_t *qq;
-                    qq = (uint8_t*) calloc(qend - qbeg, 1);
-                    bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
-                    if (bq) ++bq; // skip type
-                    for (l = qbeg; l < qend; ++l) {
-                        qq[l - qbeg] = bq? qual[l] + (bq[l] - 64) : qual[l];
-                        if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
-                        if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
-                    }
-                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
-                    l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
-                    if (l > 255) l = 255;
-                    score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
-                    if (sc > 5) {
-                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
-                        l = (int)(100. * sc / (qend - qbeg) + .499);
-                        if (l > 255) l = 255;
-                        score2[K*n_types + t] = sc<<8 | l;
+
+                // A fudge for now.  Consider checking SAM header for
+                // RG platform field.
+                int long_read = p->b->core.l_qseq > 1000;
+
+                // do realignment; this is the bottleneck
+                if (tend > tbeg) {
+                    if (bcf_cgp_align_score(p, bca, types[t],
+                                            (uint8_t *)ref2 + left2-left,
+                                            (uint8_t *)query,
+                                            r_start, r_end, long_read,
+                                            tbeg, tend, left2, right2,
+                                            qbeg, qend, qpos, max_deletion,
+                                            &score[K*n_types + t]) < 0) {
+                        score[K*n_types + t] = 0xffffff;
+                        return -1;
                     }
-                    free(qq);
+                } else {
+                    // place holder large cost for reads that cover the
+                    // region entirely within a deletion (thus tend < tbeg).
+                    score[K*n_types + t] = 0xffffff;
                 }
 #if 0
                 for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
                     fputc("ACGTN"[(int)ref2[tbeg-left+l]], stderr);
                 fputc('\n', stderr);
-                for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], stderr);
+                for (l = 0; l < qend - qbeg; ++l)
+                    fputc("ACGTN"[(int)query[l]], stderr);
                 fputc('\n', stderr);
-                fprintf(stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam_get_qname(p->b), qbeg, tbeg, sc);
+                fprintf(stderr, "pos=%d type=%d read=%d:%d name=%s "
+                        "qbeg=%d tbeg=%d score=%d\n",
+                        pos, types[t], s, i, bam_get_qname(p->b),
+                        qbeg, tbeg, sc);
 #endif
             }
         }
     }
-    free(ref2); free(query);
-    { // compute indelQ
-        int sc_a[16], sumq_a[16];
-        int tmp, *sc = sc_a, *sumq = sumq_a;
-        if (n_types > 16) {
-            sc   = (int *)malloc(n_types * sizeof(int));
-            sumq = (int *)malloc(n_types * sizeof(int));
-        }
-        memset(sumq, 0, n_types * sizeof(int));
-        for (s = K = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i, ++K) {
-                bam_pileup1_t *p = plp[s] + i;
-                int *sct = &score1[K*n_types], indelQ1, indelQ2, seqQ, indelQ;
-                for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
-                for (t = 1; t < n_types; ++t) // insertion sort
-                    for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
-                        tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
-                /* errmod_cal() assumes that if the call is wrong, the
-                 * likelihoods of other events are equal. This is about
-                 * right for substitutions, but is not desired for
-                 * indels. To reuse errmod_cal(), I have to make
-                 * compromise for multi-allelic indels.
-                 */
-                if ((sc[0]&0x3f) == ref_type) {
-                    indelQ1 = (sc[1]>>14) - (sc[0]>>14);
-                    seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
-                } else {
-                    for (t = 0; t < n_types; ++t) // look for the reference type
-                        if ((sc[t]&0x3f) == ref_type) break;
-                    indelQ1 = (sc[t]>>14) - (sc[0]>>14);
-                    seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
-                }
-                tmp = sc[0]>>6 & 0xff;
-                indelQ1 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ1 + .499); // reduce indelQ
-                sct = &score2[K*n_types];
-                for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
-                for (t = 1; t < n_types; ++t) // insertion sort
-                    for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
-                        tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
-                if ((sc[0]&0x3f) == ref_type) {
-                    indelQ2 = (sc[1]>>14) - (sc[0]>>14);
-                } else {
-                    for (t = 0; t < n_types; ++t) // look for the reference type
-                        if ((sc[t]&0x3f) == ref_type) break;
-                    indelQ2 = (sc[t]>>14) - (sc[0]>>14);
-                }
-                tmp = sc[0]>>6 & 0xff;
-                indelQ2 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ2 + .499);
-                // pick the smaller between indelQ1 and indelQ2
-                indelQ = indelQ1 < indelQ2? indelQ1 : indelQ2;
-                if (indelQ > 255) indelQ = 255;
-                if (seqQ > 255) seqQ = 255;
-                p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
-                sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-//              fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
-            }
-        }
-        // determine bca->indel_types[] and bca->inscns
-        bca->maxins = max_ins;
-        bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
-        for (t = 0; t < n_types; ++t)
-            sumq[t] = sumq[t]<<6 | t;
-        for (t = 1; t < n_types; ++t) // insertion sort
-            for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j)
-                tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
-        for (t = 0; t < n_types; ++t) // look for the reference type
-            if ((sumq[t]&0x3f) == ref_type) break;
-        if (t) { // then move the reference type to the first
-            tmp = sumq[t];
-            for (; t > 0; --t) sumq[t] = sumq[t-1];
-            sumq[0] = tmp;
-        }
-        for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
-        for (t = 0; t < 4 && t < n_types; ++t) {
-            bca->indel_types[t] = types[sumq[t]&0x3f];
-            memcpy(&bca->inscns[t * bca->maxins], &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
-        }
-        // update p->aux
-        for (s = n_alt = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                int x = types[p->aux>>16&0x3f];
-                for (j = 0; j < 4; ++j)
-                    if (x == bca->indel_types[j]) break;
-                p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
-                if ((p->aux>>16&0x3f) > 0) ++n_alt;
-                //fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam_get_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
-            }
-        }
 
-        if (sc   != sc_a)   free(sc);
-        if (sumq != sumq_a) free(sumq);
-    }
-    free(score1); free(score2);
+    // compute indelQ
+    n_alt = bcf_cgp_compute_indelQ(n, n_plp, plp, bca, inscns, l_run, max_ins,
+                                   ref_type, types, n_types, score);
+
     // free
-    for (i = 0; i < n; ++i) free(ref_sample[i]);
+    free(ref2);
+    free(query);
+    free(score);
+
+    for (i = 0; i < n; ++i)
+        free(ref_sample[i]);
+
     free(ref_sample);
     free(types); free(inscns);
+
     return n_alt > 0? 0 : -1;
 }
diff --git a/bcftools/bam2bcf_indel.c.pysam.c b/bcftools/bam2bcf_indel.c.pysam.c
index 67fff21..82bf31c 100644
--- a/bcftools/bam2bcf_indel.c.pysam.c
+++ b/bcftools/bam2bcf_indel.c.pysam.c
@@ -3,7 +3,7 @@
 /*  bam2bcf_indel.c -- indel caller.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012-2014,2016 Genome Research Ltd.
+    Copyright (C) 2012-2014,2016-2017, 2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -28,19 +28,29 @@ DEALINGS IN THE SOFTWARE.  */
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <math.h>
 #include <htslib/hts.h>
 #include <htslib/sam.h>
 #include <htslib/khash_str2int.h>
 #include "bam2bcf.h"
+#include "str_finder.h"
 
 #include <htslib/ksort.h>
 KSORT_INIT_GENERIC(uint32_t)
 
 #define MINUS_CONST 0x10000000
-#define INDEL_WINDOW_SIZE 50
+#define INDEL_WINDOW_SIZE 110
 
+#define MAX_TYPES 64
+
+// Take a reference position tpos and convert to a query position (returned).
+// This uses the CIGAR string plus alignment c->pos to do the mapping.
+//
+// *_tpos is returned as tpos if query overlaps tpos, but for deletions
+// it'll be either the start (is_left) or end (!is_left) ref position.
 static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
 {
+    // x = pos in ref, y = pos in query seq
     int k, x = c->pos, y = 0, last_y = 0;
     *_tpos = c->pos;
     for (k = 0; k < c->n_cigar; ++k) {
@@ -66,6 +76,7 @@ static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos,
     *_tpos = x;
     return last_y;
 }
+
 // FIXME: check if the inserted sequence is consistent with the homopolymer run
 // l is the relative gap length and l_run is the length of the homopolymer on the reference
 static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run)
@@ -89,21 +100,609 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
     return max_i - pos;
 }
 
+// Identify spft-clip length, position in seq, and clipped seq len
+static inline void get_pos(const bcf_callaux_t *bca, bam_pileup1_t *p,
+                           int *sc_len_r, int *slen_r, int *epos_r, int *end) {
+    bam1_t *b = p->b;
+    int sc_len = 0, sc_dist = -1, at_left = 1;
+    int epos = p->qpos, slen = b->core.l_qseq;
+    int k;
+    uint32_t *cigar = bam_get_cigar(b);
+    *end = -1;
+    for (k = 0; k < b->core.n_cigar; k++) {
+        int op = bam_cigar_op(cigar[k]);
+        if (op == BAM_CSOFT_CLIP) {
+            slen -= bam_cigar_oplen(cigar[k]);
+            if (at_left) {
+                // left end
+                sc_len += bam_cigar_oplen(cigar[k]);
+                epos -= sc_len; // don't count SC in seq pos
+                sc_dist = epos;
+                *end = 0;
+            } else {
+                // right end
+                int srlen = bam_cigar_oplen(cigar[k]);
+                int rd = b->core.l_qseq - srlen - p->qpos;
+                if (sc_dist < 0 || sc_dist > rd) {
+                    // closer to right end than left
+                    // FIXME: compensate for indel length too?
+                    sc_dist = rd;
+                    sc_len = srlen;
+                    *end = 1;
+                }
+            }
+        } else if (op != BAM_CHARD_CLIP) {
+            at_left = 0;
+        }
+    }
+
+    if (p->indel > 0 && slen - (epos+p->indel) < epos)
+        epos += p->indel-1; // end of insertion, if near end of seq
+
+    // slen is now length of sequence minus soft-clips and
+    // epos is position of indel in seq minus left-clip.
+    *epos_r = (double)epos / (slen+1) * bca->npos;
+
+    if (sc_len) {
+        // scale importance of clip by distance to closest end
+        *sc_len_r = 15.0*sc_len / (sc_dist+1);
+        if (*sc_len_r > 99) *sc_len_r = 99;
+    } else {
+        *sc_len_r = 0;
+    }
+
+    *slen_r = slen;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Scans the pileup to identify all the different sizes of indels
+// present.
+//
+// Returns types and fills out n_types_r,  max_rd_len_r and ref_type_r,
+//         or NULL on error.
+static int *bcf_cgp_find_types(int n, int *n_plp, bam_pileup1_t **plp,
+                               int pos, bcf_callaux_t *bca, const char *ref,
+                               int *max_rd_len_r, int *n_types_r,
+                               int *ref_type_r, int *N_r) {
+    int i, j, t, s, N, m, max_rd_len, n_types;
+    int n_alt = 0, n_tot = 0, indel_support_ok = 0;
+    uint32_t *aux;
+    int *types;
+
+    // N is the total number of reads
+    for (s = N = 0; s < n; ++s)
+        N += n_plp[s];
+
+    bca->max_support = bca->max_frac = 0;
+    aux = (uint32_t*) calloc(N + 1, 4);
+    if (!aux)
+        return NULL;
+
+    m = max_rd_len = 0;
+    aux[m++] = MINUS_CONST; // zero indel is always a type (REF)
+
+    // Fill out aux[] array with all the non-zero indel sizes.
+    // Also tally number with indels (n_alt) and total (n_tot).
+    for (s = 0; s < n; ++s) {
+        int na = 0, nt = 0;
+        for (i = 0; i < n_plp[s]; ++i) {
+            const bam_pileup1_t *p = plp[s] + i;
+            ++nt;
+            if (p->indel != 0) {
+                ++na;
+                aux[m++] = MINUS_CONST + p->indel;
+            }
+
+            // FIXME: cache me in pileup struct.
+            j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
+            if (j > max_rd_len) max_rd_len = j;
+        }
+        double frac = (double)na/nt;
+        if ( !indel_support_ok && na >= bca->min_support
+             && frac >= bca->min_frac )
+            indel_support_ok = 1;
+        if ( na > bca->max_support && frac > 0 )
+            bca->max_support = na, bca->max_frac = frac;
+
+        n_alt += na;
+        n_tot += nt;
+    }
+
+    // Sort aux[] and dedup
+    ks_introsort(uint32_t, m, aux);
+    for (i = 1, n_types = 1; i < m; ++i)
+        if (aux[i] != aux[i-1]) ++n_types;
+
+    // Taking totals makes it hard to call rare indels (IMF filter)
+    if ( !bca->per_sample_flt )
+        indel_support_ok = ( (double)n_alt / n_tot < bca->min_frac
+                             || n_alt < bca->min_support )
+            ? 0 : 1;
+    if ( n_types == 1 || !indel_support_ok ) { // then skip
+        free(aux);
+        return NULL;
+    }
+
+    // Bail out if we have far too many types of indel
+    if (n_types >= MAX_TYPES) {
+        free(aux);
+        // TODO revisit how/whether to control printing this warning
+        if (hts_verbose >= 2)
+            fprintf(bcftools_stderr, "[%s] excessive INDEL alleles at position %d. "
+                    "Skip the position.\n", __func__, pos + 1);
+        return NULL;
+    }
+
+    // To prevent long stretches of N's to be mistaken for indels
+    // (sometimes thousands of bases), check the number of N's in the
+    // sequence and skip places where half or more reference bases are Ns.
+    int nN=0, i_end = pos + (2*INDEL_WINDOW_SIZE < max_rd_len
+                            ?2*INDEL_WINDOW_SIZE : max_rd_len);
+    for (i=pos; i<i_end && ref[i]; i++)
+        nN += ref[i] == 'N';
+    if ( nN*2>(i-pos) ) {
+        free(aux);
+        return NULL;
+    }
+
+    // Finally fill out the types[] array detailing the size of insertion
+    // or deletion.
+    types = (int*)calloc(n_types, sizeof(int));
+    if (!types) {
+        free(aux);
+        return NULL;
+    }
+    t = 0;
+    types[t++] = aux[0] - MINUS_CONST;
+    for (i = 1; i < m; ++i)
+        if (aux[i] != aux[i-1])
+            types[t++] = aux[i] - MINUS_CONST;
+    free(aux);
+
+    // Find reference type; types[?] == 0)
+    for (t = 0; t < n_types; ++t)
+        if (types[t] == 0) break;
+
+    *ref_type_r   = t;
+    *n_types_r    = n_types;
+    *max_rd_len_r = max_rd_len;
+    *N_r          = N;
+
+    return types;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Construct per-sample consensus.
+//
+// Returns an array of consensus seqs,
+//         or NULL on failure.
+static char **bcf_cgp_ref_sample(int n, int *n_plp, bam_pileup1_t **plp,
+                                 int pos, bcf_callaux_t *bca, const char *ref,
+                                 int left, int right) {
+    int i, k, s, L = right - left + 1, max_i, max2_i;
+    char **ref_sample; // returned
+    uint32_t *cns = NULL, max, max2;
+    char *ref0 = NULL, *r;
+    ref_sample = (char**) calloc(n, sizeof(char*));
+    cns = (uint32_t*) calloc(L, 4);
+    ref0 = (char*) calloc(L, 1);
+    if (!ref_sample || !cns || !ref0) {
+        n = 0;
+        goto err;
+    }
+
+    // Convert ref ASCII to 0-15.
+    for (i = 0; i < right - left; ++i)
+        ref0[i] = seq_nt16_table[(int)ref[i+left]];
+
+    // NB: one consensus per sample 'n', not per indel type.
+    // FIXME: consider fixing this.  We should compute alignments vs
+    // types, not vs samples?  Or types/sample combined?
+    for (s = 0; s < n; ++s) {
+        r = ref_sample[s] = (char*) calloc(L, 1);
+        if (!r) {
+            n = s-1;
+            goto err;
+        }
+
+        memset(cns, 0, sizeof(int) * L);
+
+        // collect ref and non-ref counts in cns
+        for (i = 0; i < n_plp[s]; ++i) {
+            bam_pileup1_t *p = plp[s] + i;
+            bam1_t *b = p->b;
+            uint32_t *cigar = bam_get_cigar(b);
+            uint8_t *seq = bam_get_seq(b);
+            int x = b->core.pos, y = 0;
+
+            // TODO: pileup exposes pileup_ind, but we also need e.g.
+            // pileup_len to know how much of the current CIGAR op-len
+            // we've used (or have remaining).  If we had that, we
+            // could start at p->qpos without having to scan through
+            // the entire CIGAR string until we find it.
+            //
+            // Without it about all we could do is have a side channel
+            // to cache the last known coords.  Messy, so punt for now.
+            // This is no longer the bottle neck until we get to 1000s of
+            // CIGAR ops.
+
+            for (k = 0; k < b->core.n_cigar; ++k) {
+                int op = cigar[k]&0xf;
+                int j, l = cigar[k]>>4;
+                if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+                    if (x + l >= left) {
+                        j = left - x > 0 ? left - x : 0;
+                        int j_end = right - x < l ? right - x : l;
+                        for (; j < j_end; j++)
+                            // Append to cns.  Note this is ref coords,
+                            // so insertions aren't in cns and deletions
+                            // will have lower coverage.
+
+                            // FIXME: want true consensus (with ins) per
+                            // type, so we can independently compare each
+                            // seq to each consensus and see which it
+                            // matches best, so we get proper GT analysis.
+                            cns[x+j-left] +=
+                                (bam_seqi(seq, y+j) == ref0[x+j-left])
+                                ? 1        // REF
+                                : (1<<16); // ALT
+                    }
+                    x += l; y += l;
+                } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
+                    x += l;
+                } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
+                    y += l;
+                }
+
+                if (x > right)
+                    break;
+            }
+        }
+
+        // Determine a sample specific reference.
+        for (i = 0; i < right - left; ++i)
+            r[i] = ref0[i];
+
+        // Find deepest and 2nd deepest ALT region (max & max2).
+        max = max2 = 0; max_i = max2_i = -1;
+        for (i = 0; i < right - left; ++i) {
+            if (cns[i]>>16 >= max>>16)
+                max2 = max, max2_i = max_i, max = cns[i], max_i = i;
+            else if (cns[i]>>16 >= max2>>16)
+                max2 = cns[i], max2_i = i;
+        }
+
+        // Masks mismatches present in at least 70% of the reads with 'N'.
+        // This code is nREF/(nREF+n_ALT) >= 70% for deepest region.
+        // The effect is that at least 30% of bases differing to REF will
+        // use "N" in consensus, so we don't penalise ALT or REF when
+        // aligning against it.  (A poor man IUPAC code)
+        //
+        // Why is it only done in two loci at most?
+        if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7)
+            max_i = -1;
+        if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7)
+            max2_i = -1;
+        if (max_i >= 0) r[max_i] = 15;
+        if (max2_i >= 0) r[max2_i] = 15;
+
+        //for (i = 0; i < right - left; ++i)
+        //    fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], bcftools_stderr);
+        //fputc('\n', bcftools_stderr);
+    }
+
+    free(ref0);
+    free(cns);
+
+    return ref_sample;
+
+ err:
+    free(ref0);
+    free(cns);
+    if (ref_sample) {
+        for (s = 0; s < n; s++)
+            free(ref_sample[s]);
+        free(ref_sample);
+    }
+
+    return NULL;
+}
+
+// The length of the homopolymer run around the current position
+static int bcf_cgp_l_run(const char *ref, int pos) {
+    int i, l_run;
+
+    int c = seq_nt16_table[(int)ref[pos + 1]];
+    if (c == 15) {
+        l_run = 1;
+    } else {
+        for (i = pos + 2; ref[i]; ++i)
+            if (seq_nt16_table[(int)ref[i]] != c) break;
+        l_run = i;
+        for (i = pos; i >= 0; --i)
+            if (seq_nt16_table[(int)ref[i]] != c) break;
+        l_run -= i + 1;
+    }
+
+    return l_run;
+}
+
+
+// Compute the consensus for this sample 's', minus indels which
+// get added later.
+static char *bcf_cgp_calc_cons(int n, int *n_plp, bam_pileup1_t **plp,
+                               int pos, int *types, int n_types,
+                               int max_ins, int s) {
+    int i, j, t, k;
+    int *inscns_aux = (int*)calloc(5 * n_types * max_ins, sizeof(int));
+    if (!inscns_aux)
+        return NULL;
+
+    // Count the number of occurrences of each base at each position for
+    // each type of insertion.
+    for (t = 0; t < n_types; ++t) {
+        if (types[t] > 0) {
+            for (s = 0; s < n; ++s) {
+                for (i = 0; i < n_plp[s]; ++i) {
+                    bam_pileup1_t *p = plp[s] + i;
+                    if (p->indel == types[t]) {
+                        uint8_t *seq = bam_get_seq(p->b);
+                        for (k = 1; k <= p->indel; ++k) {
+                            int c = seq_nt16_int[bam_seqi(seq, p->qpos + k)];
+                            assert(c<5);
+                            ++inscns_aux[(t*max_ins+(k-1))*5 + c];
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Use the majority rule to construct the consensus
+    char *inscns = (char *)calloc(n_types * max_ins, 1);
+    for (t = 0; t < n_types; ++t) {
+        for (j = 0; j < types[t]; ++j) {
+            int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
+            for (k = 0; k < 5; ++k)
+                if (ia[k] > max)
+                    max = ia[k], max_k = k;
+            inscns[t*max_ins + j] = max ? max_k : 4;
+            if (max_k == 4) {
+                // discard insertions which contain N's
+                types[t] = 0;
+                break;
+            }
+        }
+    }
+    free(inscns_aux);
+
+    return inscns;
+}
+
+#ifndef MIN
+#  define MIN(a,b) ((a)<(b)?(a):(b))
+#endif
+
+// Part of bcf_call_gap_prep.
+//
+// Realign using BAQ to get an alignment score of a single read vs
+// a haplotype consensus.
+//
+// Fills out score
+// Returns 0 on success,
+//        <0 on error
+static int bcf_cgp_align_score(bam_pileup1_t *p, bcf_callaux_t *bca,
+                               int type, uint8_t *ref2, uint8_t *query,
+                               int r_start, int r_end, int long_read,
+                               int tbeg, int tend,
+                               int left, int right,
+                               int qbeg, int qend,
+                               int qpos, int max_deletion,
+                               int *score) {
+    // Illumina
+    probaln_par_t apf = { 1e-4, 1e-2, 10 };
+
+    // Parameters that work better on PacBio CCS 15k.
+    // We should consider querying the header and RG PU field.
+    // See also htslib/realn.c:sam_prob_realn()
+    if (long_read) {
+        apf.d = 1e-3;
+        apf.e = 1e-1;
+    }
+
+    type = abs(type);
+    apf.bw = type + 3;
+    int l, sc;
+    const uint8_t *qual = bam_get_qual(p->b), *bq;
+    uint8_t *qq;
+
+    // Get segment of quality, either ZQ tag or if absent QUAL.
+    if (!(qq = (uint8_t*) calloc(qend - qbeg, 1)))
+        return -1;
+    bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
+    if (bq) ++bq; // skip type
+    for (l = qbeg; l < qend; ++l) {
+        int qval = bq? qual[l] + (bq[l] - 64) : qual[l];
+        if (qval > 30)
+            qval = 30;
+        if (qval < 7)
+            qval = 7;
+        qq[l - qbeg] = qval;
+    }
+
+    // The bottom 8 bits are length-normalised score while
+    // the top bits are unnormalised.
+    sc = probaln_glocal(ref2 + tbeg - left, tend - tbeg + type,
+                        query, qend - qbeg, qq, &apf, 0, 0);
+    if (sc < 0) {
+        *score = 0xffffff;
+        free(qq);
+        return 0;
+    }
+
+    // used for adjusting indelQ below
+    l = (int)(100. * sc / (qend - qbeg) + .499) * bca->indel_bias;
+    *score = sc<<8 | MIN(255, l);
+
+    rep_ele *reps, *elt, *tmp;
+    uint8_t *seg = ref2 + tbeg - left;
+    int seg_len = tend - tbeg + type;
+
+    // Note: although seg moves (tbeg varies), ref2 is reused many times
+    // so we could factor out some find_STR calls.  However it's not the
+    // bottleneck for now.
+
+    // FIXME: need to make this work on IUPAC.
+    reps = find_STR((char *)seg, seg_len, 0);
+    int iscore = 0;
+
+    // Identify STRs in ref covering the indel up to
+    // (or close to) the end of the sequence.
+    // Those having an indel and right at the sequence
+    // end do not confirm the total length of indel
+    // size.  Specifically a *lack* of indel at the
+    // end, where we know indels occur in other
+    // sequences, is a possible reference bias.
+    //
+    // This is emphasised further if the sequence ends with
+    // soft clipping.
+    DL_FOREACH_SAFE(reps, elt, tmp) {
+        if (elt->start <= qpos && elt->end >= qpos) {
+            iscore += (elt->end-elt->start) / elt->rep_len;  // c
+            if (elt->start+tbeg <= r_start ||
+                elt->end+tbeg   >= r_end)
+                iscore += 2*(elt->end-elt->start);
+       }
+
+        DL_DELETE(reps, elt);
+        free(elt);
+    }
+
+    // Apply STR score to existing indelQ
+    l  =  (*score&0xff)*.8 + iscore*2;
+    *score = (*score & ~0xff) | MIN(255, l);
+
+    free(qq);
+
+    return 0;
+}
+
+// Part of bcf_call_gap_prep.
+//
+// Returns n_alt on success
+//         -1 on failure
+static int bcf_cgp_compute_indelQ(int n, int *n_plp, bam_pileup1_t **plp,
+                                  bcf_callaux_t *bca, char *inscns,
+                                  int l_run, int max_ins,
+                                  int ref_type, int *types, int n_types,
+                                  int *score) {
+    // FIXME: n_types has a maximum; no need to alloc - use a #define?
+    int sc[MAX_TYPES], sumq[MAX_TYPES], s, i, j, t, K, n_alt, tmp;
+    memset(sumq, 0, n_types * sizeof(int));
+    for (s = K = 0; s < n; ++s) {
+        for (i = 0; i < n_plp[s]; ++i, ++K) {
+            bam_pileup1_t *p = plp[s] + i;
+            int *sct = &score[K*n_types], seqQ, indelQ;
+            for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
+            for (t = 1; t < n_types; ++t) // insertion sort
+                for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
+                    tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
+
+            /* errmod_cal() assumes that if the call is wrong, the
+             * likelihoods of other events are equal. This is about
+             * right for substitutions, but is not desired for
+             * indels. To reuse errmod_cal(), I have to make
+             * compromise for multi-allelic indels.
+             */
+            if ((sc[0]&0x3f) == ref_type) {
+                indelQ = (sc[1]>>14) - (sc[0]>>14);
+                seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
+            } else {
+                for (t = 0; t < n_types; ++t) // look for the reference type
+                    if ((sc[t]&0x3f) == ref_type) break;
+                indelQ = (sc[t]>>14) - (sc[0]>>14);
+                seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
+            }
+            tmp = sc[0]>>6 & 0xff;
+            // reduce indelQ
+            indelQ = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ + .499);
+
+            // Doesn't really help accuracy, but permits -h to take
+            // affect still.
+            if (indelQ > seqQ) indelQ = seqQ;
+            if (indelQ > 255) indelQ = 255;
+            if (seqQ > 255) seqQ = 255;
+            p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
+            sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
+            //              fprintf(bcftools_stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
+        }
+    }
+    // determine bca->indel_types[] and bca->inscns
+    bca->maxins = max_ins;
+    bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
+    if (bca->maxins && !bca->inscns)
+        return -1;
+    for (t = 0; t < n_types; ++t)
+        sumq[t] = sumq[t]<<6 | t;
+    for (t = 1; t < n_types; ++t) // insertion sort
+        for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j)
+            tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
+    for (t = 0; t < n_types; ++t) // look for the reference type
+        if ((sumq[t]&0x3f) == ref_type) break;
+    if (t) { // then move the reference type to the first
+        tmp = sumq[t];
+        for (; t > 0; --t) sumq[t] = sumq[t-1];
+        sumq[0] = tmp;
+    }
+    for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
+    for (t = 0; t < 4 && t < n_types; ++t) {
+        bca->indel_types[t] = types[sumq[t]&0x3f];
+        if (bca->maxins)
+            memcpy(&bca->inscns[t * bca->maxins],
+                   &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
+    }
+    // update p->aux
+    for (s = n_alt = 0; s < n; ++s) {
+        for (i = 0; i < n_plp[s]; ++i) {
+            bam_pileup1_t *p = plp[s] + i;
+            int x = types[p->aux>>16&0x3f];
+            for (j = 0; j < 4; ++j)
+                if (x == bca->indel_types[j]) break;
+            p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
+            if ((p->aux>>16&0x3f) > 0) ++n_alt;
+            //fprintf(bcftools_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam_get_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
+        }
+    }
+
+    return n_alt;
+}
+
+/*
+FIXME: with high number of samples, do we handle IMF correctly?  Is it
+fraction of indels across entire data set, or just fraction for this
+specific sample? Needs to check bca->per_sample_flt (--per-sample-mF) opt.
+ */
+
 /*
     notes:
-        - n .. number of samples
-        - the routine sets bam_pileup1_t.aux of each read as follows:
-            - 6: unused
-            - 6: the call; index to bcf_callaux_t.indel_types   .. (aux>>16)&0x3f
-            - 8: estimated sequence quality                     .. (aux>>8)&0xff
-            - 8: indel quality                                  .. aux&0xff
+    - n .. number of samples
+    - the routine sets bam_pileup1_t.aux of each read as follows:
+        - 6: unused
+        - 6: the call; index to bcf_callaux_t.indel_types   .. (aux>>16)&0x3f
+        - 8: estimated sequence quality                     .. (aux>>8)&0xff
+        - 8: indel quality                                  .. aux&0xff
  */
-int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
+int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
+                      bcf_callaux_t *bca, const char *ref)
 {
-    int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
+    if (ref == 0 || bca == 0) return -1;
+
+    int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins;
+    int *score, max_ref2;
     int N, K, l_run, ref_type, n_alt;
     char *inscns = 0, *ref2, *query, **ref_sample;
-    if (ref == 0 || bca == 0) return -1;
 
     // determine if there is a gap
     for (s = N = 0; s < n; ++s) {
@@ -111,77 +710,29 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
             if (plp[s][i].indel != 0) break;
         if (i < n_plp[s]) break;
     }
-    if (s == n) return -1; // there is no indel at this position.
-    for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads
-    { // find out how many types of indels are present
-        bca->max_support = bca->max_frac = 0;
-        int m, n_alt = 0, n_tot = 0, indel_support_ok = 0;
-        uint32_t *aux;
-        aux = (uint32_t*) calloc(N + 1, 4);
-        m = max_rd_len = 0;
-        aux[m++] = MINUS_CONST; // zero indel is always a type
-        for (s = 0; s < n; ++s) {
-            int na = 0, nt = 0;
-            for (i = 0; i < n_plp[s]; ++i) {
-                const bam_pileup1_t *p = plp[s] + i;
-                ++nt;
-                if (p->indel != 0) {
-                    ++na;
-                    aux[m++] = MINUS_CONST + p->indel;
-                }
-                j = bam_cigar2qlen(p->b->core.n_cigar, bam_get_cigar(p->b));
-                if (j > max_rd_len) max_rd_len = j;
-            }
-            double frac = (double)na/nt;
-            if ( !indel_support_ok && na >= bca->min_support && frac >= bca->min_frac )
-                indel_support_ok = 1;
-            if ( na > bca->max_support && frac > 0 ) bca->max_support = na, bca->max_frac = frac;
-            n_alt += na;
-            n_tot += nt;
-        }
-        // To prevent long stretches of N's to be mistaken for indels (sometimes thousands of bases),
-        //  check the number of N's in the sequence and skip places where half or more reference bases are Ns.
-        int nN=0; for (i=pos; i-pos<max_rd_len && ref[i]; i++) if ( ref[i]=='N' ) nN++;
-        if ( nN*2>(i-pos) ) { free(aux); return -1; }
-
-        ks_introsort(uint32_t, m, aux);
-        // squeeze out identical types
-        for (i = 1, n_types = 1; i < m; ++i)
-            if (aux[i] != aux[i-1]) ++n_types;
-        // Taking totals makes it hard to call rare indels
-        if ( !bca->per_sample_flt )
-            indel_support_ok = ( (double)n_alt / n_tot < bca->min_frac || n_alt < bca->min_support ) ? 0 : 1;
-        if ( n_types == 1 || !indel_support_ok ) { // then skip
-            free(aux); return -1;
-        }
-        if (n_types >= 64) {
-            free(aux);
-            // TODO revisit how/whether to control printing this warning
-            if (hts_verbose >= 2)
-                fprintf(bcftools_stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
-            return -1;
-        }
-        types = (int*)calloc(n_types, sizeof(int));
-        t = 0;
-        types[t++] = aux[0] - MINUS_CONST;
-        for (i = 1; i < m; ++i)
-            if (aux[i] != aux[i-1])
-                types[t++] = aux[i] - MINUS_CONST;
-        free(aux);
-        for (t = 0; t < n_types; ++t)
-            if (types[t] == 0) break;
-        ref_type = t; // the index of the reference type (0)
-    }
-    { // calculate left and right boundary
-        left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
-        right = pos + INDEL_WINDOW_SIZE;
-        if (types[0] < 0) right -= types[0];
-        // in case the alignments stand out the reference
-        for (i = pos; i < right; ++i)
-            if (ref[i] == 0) break;
-        right = i;
-    }
-    /* The following block fixes a long-existing flaw in the INDEL
+    if (s == n)
+        // there is no indel at this position.
+        return -1;
+
+    // find out how many types of indels are present
+    types = bcf_cgp_find_types(n, n_plp, plp, pos, bca, ref,
+                               &max_rd_len, &n_types, &ref_type, &N);
+    if (!types)
+        return -1;
+
+
+    // calculate left and right boundary
+    left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
+    right = pos + INDEL_WINDOW_SIZE;
+    if (types[0] < 0) right -= types[0];
+
+    // in case the alignments stand out the reference
+    for (i = pos; i < right; ++i)
+        if (ref[i] == 0) break;
+    right = i;
+
+
+    /* The following call fixes a long-existing flaw in the INDEL
      * calling model: the interference of nearby SNPs. However, it also
      * reduces the power because sometimes, substitutions caused by
      * indels are not distinguishable from true mutations. Multiple
@@ -189,284 +740,211 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
      *
      * Masks mismatches present in at least 70% of the reads with 'N'.
      */
-    { // construct per-sample consensus
-        int L = right - left + 1, max_i, max2_i;
-        uint32_t *cns, max, max2;
-        char *ref0, *r;
-        ref_sample = (char**) calloc(n, sizeof(char*));
-        cns = (uint32_t*) calloc(L, 4);
-        ref0 = (char*) calloc(L, 1);
-        for (i = 0; i < right - left; ++i)
-            ref0[i] = seq_nt16_table[(int)ref[i+left]];
-        for (s = 0; s < n; ++s) {
-            r = ref_sample[s] = (char*) calloc(L, 1);
-            memset(cns, 0, sizeof(int) * L);
-            // collect ref and non-ref counts
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                bam1_t *b = p->b;
-                uint32_t *cigar = bam_get_cigar(b);
-                uint8_t *seq = bam_get_seq(b);
-                int x = b->core.pos, y = 0;
-                for (k = 0; k < b->core.n_cigar; ++k) {
-                    int op = cigar[k]&0xf;
-                    int j, l = cigar[k]>>4;
-                    if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                        for (j = 0; j < l; ++j)
-                            if (x + j >= left && x + j < right)
-                                cns[x+j-left] += (bam_seqi(seq, y+j) == ref0[x+j-left])? 1 : 0x10000;
-                        x += l; y += l;
-                    } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
-                    else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
-                }
-            }
-            // determine the consensus
-            for (i = 0; i < right - left; ++i) r[i] = ref0[i];
-            max = max2 = 0; max_i = max2_i = -1;
-            for (i = 0; i < right - left; ++i) {
-                if (cns[i]>>16 >= max>>16) max2 = max, max2_i = max_i, max = cns[i], max_i = i;
-                else if (cns[i]>>16 >= max2>>16) max2 = cns[i], max2_i = i;
-            }
-            if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7) max_i = -1;
-            if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1;
-            if (max_i >= 0) r[max_i] = 15;
-            if (max2_i >= 0) r[max2_i] = 15;
-            //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], bcftools_stderr); fputc('\n', bcftools_stderr);
-        }
-        free(ref0); free(cns);
-    }
-    { // the length of the homopolymer run around the current position
-        int c = seq_nt16_table[(int)ref[pos + 1]];
-        if (c == 15) l_run = 1;
-        else {
-            for (i = pos + 2; ref[i]; ++i)
-                if (seq_nt16_table[(int)ref[i]] != c) break;
-            l_run = i;
-            for (i = pos; i >= 0; --i)
-                if (seq_nt16_table[(int)ref[i]] != c) break;
-            l_run -= i + 1;
-        }
-    }
-    // construct the consensus sequence
+    ref_sample = bcf_cgp_ref_sample(n, n_plp, plp, pos, bca, ref, left, right);
+
+    // The length of the homopolymer run around the current position
+    l_run = bcf_cgp_l_run(ref, pos);
+
+    // construct the consensus sequence (minus indels, which are added later)
     max_ins = types[n_types - 1];   // max_ins is at least 0
     if (max_ins > 0) {
-        int *inscns_aux = (int*) calloc(5 * n_types * max_ins, sizeof(int));
-        // count the number of occurrences of each base at each position for each type of insertion
-        for (t = 0; t < n_types; ++t) {
-            if (types[t] > 0) {
-                for (s = 0; s < n; ++s) {
-                    for (i = 0; i < n_plp[s]; ++i) {
-                        bam_pileup1_t *p = plp[s] + i;
-                        if (p->indel == types[t]) {
-                            uint8_t *seq = bam_get_seq(p->b);
-                            for (k = 1; k <= p->indel; ++k) {
-                                int c = seq_nt16_int[bam_seqi(seq, p->qpos + k)];
-                                assert(c<5);
-                                ++inscns_aux[(t*max_ins+(k-1))*5 + c];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        // use the majority rule to construct the consensus
-        inscns = (char*) calloc(n_types * max_ins, 1);
-        for (t = 0; t < n_types; ++t) {
-            for (j = 0; j < types[t]; ++j) {
-                int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*5];
-                for (k = 0; k < 5; ++k)
-                    if (ia[k] > max)
-                        max = ia[k], max_k = k;
-                inscns[t*max_ins + j] = max? max_k : 4;
-                if ( max_k==4 ) { types[t] = 0; break; } // discard insertions which contain N's
-            }
-        }
-        free(inscns_aux);
+        inscns = bcf_cgp_calc_cons(n, n_plp, plp, pos,
+                                   types, n_types, max_ins, s);
+        if (!inscns)
+            return -1;
     }
+
     // compute the likelihood given each type of indel for each read
     max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]);
     ref2  = (char*) calloc(max_ref2, 1);
     query = (char*) calloc(right - left + max_rd_len + max_ins + 2, 1);
-    score1 = (int*) calloc(N * n_types, sizeof(int));
-    score2 = (int*) calloc(N * n_types, sizeof(int));
+    score = (int*) calloc(N * n_types, sizeof(int));
     bca->indelreg = 0;
+    double nqual_over_60 = bca->nqual / 60.0;
+
     for (t = 0; t < n_types; ++t) {
         int l, ir;
-        probaln_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
-        apf1.bw = apf2.bw = abs(types[t]) + 3;
+
         // compute indelreg
-        if (types[t] == 0) ir = 0;
-        else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
-        else ir = est_indelreg(pos, ref, -types[t], 0);
-        if (ir > bca->indelreg) bca->indelreg = ir;
-//      fprintf(bcftools_stderr, "%d, %d, %d\n", pos, types[t], ir);
-        // realignment
+        if (types[t] == 0)
+            ir = 0;
+        else if (types[t] > 0)
+            ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
+        else
+            ir = est_indelreg(pos, ref, -types[t], 0);
+
+        if (ir > bca->indelreg)
+            bca->indelreg = ir;
+
+        // Identify max deletion length
+        int max_deletion = 0;
+        for (s = 0; s < n; ++s) {
+            for (i = 0; i < n_plp[s]; ++i, ++K) {
+                bam_pileup1_t *p = plp[s] + i;
+                if (max_deletion < -p->indel)
+                    max_deletion = -p->indel;
+            }
+        }
+
+        // Realignment score, computed via BAQ
         for (s = K = 0; s < n; ++s) {
-            // write ref2
+            // Construct ref2 from ref_sample, inscns and indels.
+            // This is now the true sample consensus (possibly prepended
+            // and appended with reference if sample data doesn't span
+            // the full length).
             for (k = 0, j = left; j <= pos; ++j)
                 ref2[k++] = seq_nt16_int[(int)ref_sample[s][j-left]];
-            if (types[t] <= 0) j += -types[t];
-            else for (l = 0; l < types[t]; ++l)
-                     ref2[k++] = inscns[t*max_ins + l];
+
+            if (types[t] <= 0)
+                j += -types[t];
+            else
+                for (l = 0; l < types[t]; ++l)
+                    ref2[k++] = inscns[t*max_ins + l];
+
             for (; j < right && ref[j]; ++j)
                 ref2[k++] = seq_nt16_int[(int)ref_sample[s][j-left]];
-            for (; k < max_ref2; ++k) ref2[k] = 4;
-            if (j < right) right = j;
+            for (; k < max_ref2; ++k)
+                ref2[k] = 4;
+
+            if (right > j)
+                right = j;
+
             // align each read to ref2
             for (i = 0; i < n_plp[s]; ++i, ++K) {
                 bam_pileup1_t *p = plp[s] + i;
-                int qbeg, qend, tbeg, tend, sc, kk;
+
+                // Some basic ref vs alt stats.
+                int imq = p->b->core.qual > 59 ? 59 : p->b->core.qual;
+                imq *= nqual_over_60;
+
+                int sc_len, slen, epos, sc_end;
+
+                // Only need to gather stats on one type, as it's
+                // identical calculation for all the subsequent ones
+                // and we're sharing the same stats array
+                if (t == 0) {
+                    // Gather stats for INFO field to aid filtering.
+                    // mq and sc_len not very helpful for filtering, but could
+                    // help in assigning a better QUAL value.
+                    //
+                    // Pos is slightly useful.
+                    // Base qual can be useful, but need qual prior to BAQ?
+                    // May need to cache orig quals in aux tag so we can fetch
+                    // them even after mpileup step.
+                    get_pos(bca, p, &sc_len, &slen, &epos, &sc_end);
+
+                    assert(imq >= 0 && imq < bca->nqual);
+                    assert(epos >= 0 && epos < bca->npos);
+                    assert(sc_len >= 0 && sc_len < 100);
+                    if (p->indel) {
+                        bca->ialt_mq[imq]++;
+                        bca->ialt_scl[sc_len]++;
+                        bca->ialt_pos[epos]++;
+                    } else {
+                        bca->iref_mq[imq]++;
+                        bca->iref_scl[sc_len]++;
+                        bca->iref_pos[epos]++;
+                    }
+                }
+
+                int qbeg, qpos, qend, tbeg, tend, kk;
                 uint8_t *seq = bam_get_seq(p->b);
                 uint32_t *cigar = bam_get_cigar(p->b);
-                if (p->b->core.flag&4) continue; // unmapped reads
-                // FIXME: the following loop should be better moved outside; nonetheless, realignment should be much slower anyway.
+                if (p->b->core.flag & BAM_FUNMAP) continue;
+
+                // FIXME: the following loop should be better moved outside;
+                // nonetheless, realignment should be much slower anyway.
                 for (kk = 0; kk < p->b->core.n_cigar; ++kk)
-                    if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) break;
-                if (kk < p->b->core.n_cigar) continue;
-                // FIXME: the following skips soft clips, but using them may be more sensitive.
+                    if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
+                        break;
+                if (kk < p->b->core.n_cigar)
+                    continue;
+
                 // determine the start and end of sequences for alignment
-                qbeg = tpos2qpos(&p->b->core, bam_get_cigar(p->b), left,  0, &tbeg);
-                qend = tpos2qpos(&p->b->core, bam_get_cigar(p->b), right, 1, &tend);
+                // FIXME: loops over CIGAR multiple times
+                int left2 = left, right2 = right;
+                if (p->b->core.l_qseq > 1000) {
+                    // long read data needs less context.  It also tends to
+                    // have many more candidate indels to investigate so
+                    // speed here matters more.
+                    if (pos - left >= INDEL_WINDOW_SIZE)
+                        left2 += INDEL_WINDOW_SIZE/2;
+                    if (right-pos >= INDEL_WINDOW_SIZE)
+                        right2 -= INDEL_WINDOW_SIZE/2;
+                }
+
+                int r_start = p->b->core.pos;
+                int r_end = bam_cigar2rlen(p->b->core.n_cigar,
+                                           bam_get_cigar(p->b))
+                            -1 + r_start;
+
+                qbeg = tpos2qpos(&p->b->core, bam_get_cigar(p->b), left2,
+                                 0, &tbeg);
+                qpos = tpos2qpos(&p->b->core, bam_get_cigar(p->b), pos,
+                                     0, &tend) - qbeg;
+                qend = tpos2qpos(&p->b->core, bam_get_cigar(p->b), right2,
+                                 1, &tend);
+
                 if (types[t] < 0) {
                     int l = -types[t];
                     tbeg = tbeg - l > left?  tbeg - l : left;
                 }
+
                 // write the query sequence
                 for (l = qbeg; l < qend; ++l)
                     query[l - qbeg] = seq_nt16_int[bam_seqi(seq, l)];
-                { // do realignment; this is the bottleneck
-                    const uint8_t *qual = bam_get_qual(p->b), *bq;
-                    uint8_t *qq;
-                    qq = (uint8_t*) calloc(qend - qbeg, 1);
-                    bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
-                    if (bq) ++bq; // skip type
-                    for (l = qbeg; l < qend; ++l) {
-                        qq[l - qbeg] = bq? qual[l] + (bq[l] - 64) : qual[l];
-                        if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
-                        if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
-                    }
-                    sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                        (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
-                    l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
-                    if (l > 255) l = 255;
-                    score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
-                    if (sc > 5) {
-                        sc = probaln_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
-                                            (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
-                        l = (int)(100. * sc / (qend - qbeg) + .499);
-                        if (l > 255) l = 255;
-                        score2[K*n_types + t] = sc<<8 | l;
+
+                // A fudge for now.  Consider checking SAM header for
+                // RG platform field.
+                int long_read = p->b->core.l_qseq > 1000;
+
+                // do realignment; this is the bottleneck
+                if (tend > tbeg) {
+                    if (bcf_cgp_align_score(p, bca, types[t],
+                                            (uint8_t *)ref2 + left2-left,
+                                            (uint8_t *)query,
+                                            r_start, r_end, long_read,
+                                            tbeg, tend, left2, right2,
+                                            qbeg, qend, qpos, max_deletion,
+                                            &score[K*n_types + t]) < 0) {
+                        score[K*n_types + t] = 0xffffff;
+                        return -1;
                     }
-                    free(qq);
+                } else {
+                    // place holder large cost for reads that cover the
+                    // region entirely within a deletion (thus tend < tbeg).
+                    score[K*n_types + t] = 0xffffff;
                 }
 #if 0
                 for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
                     fputc("ACGTN"[(int)ref2[tbeg-left+l]], bcftools_stderr);
                 fputc('\n', bcftools_stderr);
-                for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], bcftools_stderr);
+                for (l = 0; l < qend - qbeg; ++l)
+                    fputc("ACGTN"[(int)query[l]], bcftools_stderr);
                 fputc('\n', bcftools_stderr);
-                fprintf(bcftools_stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam_get_qname(p->b), qbeg, tbeg, sc);
+                fprintf(bcftools_stderr, "pos=%d type=%d read=%d:%d name=%s "
+                        "qbeg=%d tbeg=%d score=%d\n",
+                        pos, types[t], s, i, bam_get_qname(p->b),
+                        qbeg, tbeg, sc);
 #endif
             }
         }
     }
-    free(ref2); free(query);
-    { // compute indelQ
-        int sc_a[16], sumq_a[16];
-        int tmp, *sc = sc_a, *sumq = sumq_a;
-        if (n_types > 16) {
-            sc   = (int *)malloc(n_types * sizeof(int));
-            sumq = (int *)malloc(n_types * sizeof(int));
-        }
-        memset(sumq, 0, n_types * sizeof(int));
-        for (s = K = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i, ++K) {
-                bam_pileup1_t *p = plp[s] + i;
-                int *sct = &score1[K*n_types], indelQ1, indelQ2, seqQ, indelQ;
-                for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
-                for (t = 1; t < n_types; ++t) // insertion sort
-                    for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
-                        tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
-                /* errmod_cal() assumes that if the call is wrong, the
-                 * likelihoods of other events are equal. This is about
-                 * right for substitutions, but is not desired for
-                 * indels. To reuse errmod_cal(), I have to make
-                 * compromise for multi-allelic indels.
-                 */
-                if ((sc[0]&0x3f) == ref_type) {
-                    indelQ1 = (sc[1]>>14) - (sc[0]>>14);
-                    seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
-                } else {
-                    for (t = 0; t < n_types; ++t) // look for the reference type
-                        if ((sc[t]&0x3f) == ref_type) break;
-                    indelQ1 = (sc[t]>>14) - (sc[0]>>14);
-                    seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
-                }
-                tmp = sc[0]>>6 & 0xff;
-                indelQ1 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ1 + .499); // reduce indelQ
-                sct = &score2[K*n_types];
-                for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
-                for (t = 1; t < n_types; ++t) // insertion sort
-                    for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
-                        tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
-                if ((sc[0]&0x3f) == ref_type) {
-                    indelQ2 = (sc[1]>>14) - (sc[0]>>14);
-                } else {
-                    for (t = 0; t < n_types; ++t) // look for the reference type
-                        if ((sc[t]&0x3f) == ref_type) break;
-                    indelQ2 = (sc[t]>>14) - (sc[0]>>14);
-                }
-                tmp = sc[0]>>6 & 0xff;
-                indelQ2 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ2 + .499);
-                // pick the smaller between indelQ1 and indelQ2
-                indelQ = indelQ1 < indelQ2? indelQ1 : indelQ2;
-                if (indelQ > 255) indelQ = 255;
-                if (seqQ > 255) seqQ = 255;
-                p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
-                sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-//              fprintf(bcftools_stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
-            }
-        }
-        // determine bca->indel_types[] and bca->inscns
-        bca->maxins = max_ins;
-        bca->inscns = (char*) realloc(bca->inscns, bca->maxins * 4);
-        for (t = 0; t < n_types; ++t)
-            sumq[t] = sumq[t]<<6 | t;
-        for (t = 1; t < n_types; ++t) // insertion sort
-            for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j)
-                tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
-        for (t = 0; t < n_types; ++t) // look for the reference type
-            if ((sumq[t]&0x3f) == ref_type) break;
-        if (t) { // then move the reference type to the first
-            tmp = sumq[t];
-            for (; t > 0; --t) sumq[t] = sumq[t-1];
-            sumq[0] = tmp;
-        }
-        for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
-        for (t = 0; t < 4 && t < n_types; ++t) {
-            bca->indel_types[t] = types[sumq[t]&0x3f];
-            memcpy(&bca->inscns[t * bca->maxins], &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
-        }
-        // update p->aux
-        for (s = n_alt = 0; s < n; ++s) {
-            for (i = 0; i < n_plp[s]; ++i) {
-                bam_pileup1_t *p = plp[s] + i;
-                int x = types[p->aux>>16&0x3f];
-                for (j = 0; j < 4; ++j)
-                    if (x == bca->indel_types[j]) break;
-                p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
-                if ((p->aux>>16&0x3f) > 0) ++n_alt;
-                //fprintf(bcftools_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam_get_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
-            }
-        }
 
-        if (sc   != sc_a)   free(sc);
-        if (sumq != sumq_a) free(sumq);
-    }
-    free(score1); free(score2);
+    // compute indelQ
+    n_alt = bcf_cgp_compute_indelQ(n, n_plp, plp, bca, inscns, l_run, max_ins,
+                                   ref_type, types, n_types, score);
+
     // free
-    for (i = 0; i < n; ++i) free(ref_sample[i]);
+    free(ref2);
+    free(query);
+    free(score);
+
+    for (i = 0; i < n; ++i)
+        free(ref_sample[i]);
+
     free(ref_sample);
     free(types); free(inscns);
+
     return n_alt > 0? 0 : -1;
 }
diff --git a/bcftools/bcftools.h b/bcftools/bcftools.h
index 96237ee..953cf6b 100644
--- a/bcftools/bcftools.h
+++ b/bcftools/bcftools.h
@@ -1,6 +1,6 @@
 /*  bcftools.h -- utility function declarations.
 
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -50,25 +50,40 @@ void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT
 
 void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
 const char *hts_bcf_wmode(int file_type);
+const char *hts_bcf_wmode2(int file_type, char *fname);
+char *init_tmp_prefix(const char *prefix);
 
 void *smalloc(size_t size);     // safe malloc
 
-static inline char gt2iupac(char a, char b)
+static inline int iupac2bitmask(char iupac)
 {
-    static const char iupac[4][4] = { {'A','M','R','W'},{'M','C','S','Y'},{'R','S','G','K'},{'W','Y','K','T'} };
-    if ( a>='a' ) a -= 'a' - 'A';
-    if ( b>='a' ) b -= 'a' - 'A';
-    if ( a=='A' ) a = 0;
-    else if ( a=='C' ) a = 1;
-    else if ( a=='G' ) a = 2;
-    else if ( a=='T' ) a = 3;
-    else return 'N';
-    if ( b=='A' ) b = 0;
-    else if ( b=='C' ) b = 1;
-    else if ( b=='G' ) b = 2;
-    else if ( b=='T' ) b = 3;
-    else return 'N';
-    return iupac[(int)a][(int)b];
+    const int A = 1;
+    const int C = 2;
+    const int G = 4;
+    const int T = 8;
+    if ( iupac >= 97 ) iupac -= 32;
+    if ( iupac == 'A' ) return A;
+    if ( iupac == 'C' ) return C;
+    if ( iupac == 'G' ) return G;
+    if ( iupac == 'T' ) return T;
+    if ( iupac == 'M' ) return A|C;
+    if ( iupac == 'R' ) return A|G;
+    if ( iupac == 'W' ) return A|T;
+    if ( iupac == 'S' ) return C|G;
+    if ( iupac == 'Y' ) return C|T;
+    if ( iupac == 'K' ) return G|T;
+    if ( iupac == 'V' ) return A|C|G;
+    if ( iupac == 'H' ) return A|C|T;
+    if ( iupac == 'D' ) return A|G|T;
+    if ( iupac == 'B' ) return C|G|T;
+    if ( iupac == 'N' ) return A|C|G|T;
+    return -1;
+}
+static inline char bitmask2iupac(int bitmask)
+{
+    const char iupac[16] = {'.','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N'};
+    if ( bitmask <= 0 || bitmask > 15 ) return 0;
+    return iupac[bitmask];
 }
 
 static inline int iupac_consistent(char iupac, char nt)
@@ -101,4 +116,24 @@ static inline double phred_score(double prob)
     return prob>99 ? 99 : prob;
 }
 
+static const uint64_t bcf_double_missing    = 0x7ff0000000000001;
+static const uint64_t bcf_double_vector_end = 0x7ff0000000000002;
+static inline void bcf_double_set(double *ptr, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.i = value;
+    *ptr = u.d;
+}
+static inline int bcf_double_test(double d, uint64_t value)
+{
+    union { uint64_t i; double d; } u;
+    u.d = d;
+    return u.i==value ? 1 : 0;
+}
+#define bcf_double_set_vector_end(x) bcf_double_set(&(x),bcf_double_vector_end)
+#define bcf_double_set_missing(x)    bcf_double_set(&(x),bcf_double_missing)
+#define bcf_double_is_vector_end(x)  bcf_double_test((x),bcf_double_vector_end)
+#define bcf_double_is_missing(x)     bcf_double_test((x),bcf_double_missing)
+#define bcf_double_is_missing_or_vector_end(x)     (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end))
+
 #endif
diff --git a/bcftools/bcftools.pysam.c b/bcftools/bcftools.pysam.c
index de8739d..c6f4fd8 100644
--- a/bcftools/bcftools.pysam.c
+++ b/bcftools/bcftools.pysam.c
@@ -1,6 +1,7 @@
 #include <ctype.h>
 #include <assert.h>
 #include <unistd.h>
+#include <setjmp.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -55,6 +56,25 @@ int bcftools_puts(const char *s)
   return putc('\n', bcftools_stdout);
 }
 
+
+static jmp_buf bcftools_jmpbuf;
+static int bcftools_status = 0;
+
+int bcftools_dispatch(int argc, char *argv[])
+{
+  if (setjmp(bcftools_jmpbuf) == 0)
+    return bcftools_main(argc, argv);
+  else
+    return bcftools_status;
+}
+
+void bcftools_exit(int status)
+{
+  bcftools_status = status;
+  longjmp(bcftools_jmpbuf, 1);
+}
+
+
 void bcftools_set_optind(int val)
 {
   // setting this in cython via 
diff --git a/bcftools/bcftools.pysam.h b/bcftools/bcftools.pysam.h
index 453567a..b8bf93e 100644
--- a/bcftools/bcftools.pysam.h
+++ b/bcftools/bcftools.pysam.h
@@ -3,6 +3,17 @@
 
 #include <stdio.h>
 
+#ifndef __has_attribute
+#define __has_attribute(attribute) 0
+#endif
+#ifndef PYSAM_NORETURN
+#if __has_attribute(__noreturn__) || __GNUC__ >= 3
+#define PYSAM_NORETURN __attribute__((__noreturn__))
+#else
+#define PYSAM_NORETURN
+#endif
+#endif
+
 extern FILE * bcftools_stderr;
 
 extern FILE * bcftools_stdout;
@@ -40,6 +51,8 @@ int bcftools_puts(const char *s);
 
 int bcftools_dispatch(int argc, char *argv[]);
 
+void PYSAM_NORETURN bcftools_exit(int status);
+
 void bcftools_set_optind(int);
 
 extern int bcftools_main(int argc, char *argv[]);
diff --git a/bcftools/bin.c b/bcftools/bin.c
index 95a2be1..a4817cf 100644
--- a/bcftools/bin.c
+++ b/bcftools/bin.c
@@ -25,6 +25,7 @@
  */
 
 #include <stdio.h>
+#include <assert.h>
 #include "bcftools.h"
 #include "bin.h"
 
diff --git a/bcftools/bin.c.pysam.c b/bcftools/bin.c.pysam.c
index 426ef45..1a177be 100644
--- a/bcftools/bin.c.pysam.c
+++ b/bcftools/bin.c.pysam.c
@@ -27,6 +27,7 @@
  */
 
 #include <stdio.h>
+#include <assert.h>
 #include "bcftools.h"
 #include "bin.h"
 
diff --git a/bcftools/call.h b/bcftools/call.h
index 50e4815..16bf0b6 100644
--- a/bcftools/call.h
+++ b/bcftools/call.h
@@ -1,6 +1,6 @@
 /*  call.h -- variant calling declarations.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2015, 2019-2020 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -34,7 +34,7 @@ THE SOFTWARE.  */
 #define CALL_CONSTR_TRIO    (1<<2)
 #define CALL_CONSTR_ALLELES (1<<3)
 //
-//
+#define CALL_FMT_PV4        (1<<5)
 #define CALL_FMT_GQ         (1<<6)
 #define CALL_FMT_GP         (1<<7)
 
@@ -52,18 +52,13 @@ family_t;
 // For the single-sample and grouped -G calling
 typedef struct
 {
+    double ref_lk, max_lk, lk_sum;
     float *qsum;    // QS(quality sum) values
-    int nqsum, dp;
-    double fa,fb,fc,fa2,fb2,fc2,fab,fac,fbc;
-}
-grp1_t;
-typedef struct
-{
-    grp1_t *grp;
-    int ngrp;
-    int *smpl2grp;
+    int nqsum;
+    uint32_t *smpl, nsmpl;
+    uint32_t nals, als;
 }
-grp_t;
+smpl_grp_t;
 
 // For the `-C alleles -i` constrained calling
 typedef struct
@@ -82,6 +77,7 @@ typedef struct
     int *pl_map, npl_map;   // same as above for PLs, but reverse (new -> old)
     char **als;             // array to hold the trimmed set of alleles to appear on output
     int nals;               // size of the als array
+    int als_new, nals_new;  // bitmask with final alleles and their number
     family_t *fams;         // list of families and samples for trio calling
     int nfams, mfams;
     int ntrio[5][5];        // possible trio genotype combinations and their counts; first idx:
@@ -96,18 +92,16 @@ typedef struct
     int32_t *ugts, *cgts;   // unconstraind and constrained GTs
     uint32_t output_tags;
     char *prior_AN, *prior_AC;  // reference panel AF tags (AF=AC/AN)
-    tgt_als_t *tgt_als;     // for CALL_CONSTR_ALLELES
-    char *sample_groups;    // for single-sample or grouped calling with -G
-    grp_t smpl_grp;
-    float *qsum;
-    int nqsum;
+    tgt_als_t *tgt_als;         // for CALL_CONSTR_ALLELES
+    char *sample_groups;        // for single-sample or grouped calling with -G
+    char *sample_groups_tag;    // for -G [AD|QS:]
+    smpl_grp_t *smpl_grp;
+    int nsmpl_grp;
 
     // ccall only
     double indel_frac, min_perm_p, min_lrt;
     double prior_type, pref;
-    double ref_lk, lk_sum;
     int ngrp1_samples, n_perm;
-    int nhets, ndiploid;
     char *prior_file;
     ccall_t *cdat;
 
@@ -149,7 +143,7 @@ void qcall_destroy(call_t *call);
 void call_init_pl2p(call_t *call);
 uint32_t *call_trio_prep(int is_x, int is_son);
 
-void init_allele_trimming_maps(call_t *call, int als, int nals);
-void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als);
+void init_allele_trimming_maps(call_t *call, int nals_ori, int als_out);
+void mcall_trim_and_update_numberR(call_t *call, bcf1_t *rec, int nals_ori, int nals_new);
 
 #endif
diff --git a/bcftools/ccall.c b/bcftools/ccall.c
index 9f6958a..6bf987b 100644
--- a/bcftools/ccall.c
+++ b/bcftools/ccall.c
@@ -24,6 +24,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <math.h>
+#include <assert.h>
 #include <htslib/kfunc.h>
 #include "call.h"
 #include "kmin.h"
@@ -302,8 +303,8 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
     // trim Number=R tags
     int out_als = 0;
     for (i=0; i<nals; i++) out_als |= 1<<i;
-    init_allele_trimming_maps(call, out_als, nals_ori);
-    mcall_trim_numberR(call, rec, nals_ori, nals, out_als);
+    init_allele_trimming_maps(call, nals_ori, out_als);
+    mcall_trim_and_update_numberR(call, rec, nals_ori, nals);
 
     return is_var;
 }
diff --git a/bcftools/ccall.c.pysam.c b/bcftools/ccall.c.pysam.c
index 696b455..eb7c615 100644
--- a/bcftools/ccall.c.pysam.c
+++ b/bcftools/ccall.c.pysam.c
@@ -26,6 +26,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
 #include <math.h>
+#include <assert.h>
 #include <htslib/kfunc.h>
 #include "call.h"
 #include "kmin.h"
@@ -304,8 +305,8 @@ static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double
     // trim Number=R tags
     int out_als = 0;
     for (i=0; i<nals; i++) out_als |= 1<<i;
-    init_allele_trimming_maps(call, out_als, nals_ori);
-    mcall_trim_numberR(call, rec, nals_ori, nals, out_als);
+    init_allele_trimming_maps(call, nals_ori, out_als);
+    mcall_trim_and_update_numberR(call, rec, nals_ori, nals);
 
     return is_var;
 }
diff --git a/bcftools/consensus.c b/bcftools/consensus.c
index 4652a39..a232174 100644
--- a/bcftools/consensus.c
+++ b/bcftools/consensus.c
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2014-2017 Genome Research Ltd.
+   Copyright (c) 2014-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <getopt.h>
 #include <unistd.h>
@@ -52,6 +53,9 @@
 #define PICK_SHORT 8
 #define PICK_IUPAC 16
 
+#define TO_UPPER 0
+#define TO_LOWER 1
+
 typedef struct
 {
     int num;                // number of ungapped blocks in this chain
@@ -64,6 +68,16 @@ typedef struct
 }
 chain_t;
 
+#define MASK_LC 1
+#define MASK_UC 2
+#define MASK_SKIP(x) (((x)->with!=MASK_LC && (x)->with!=MASK_UC) ? 1 : 0)
+typedef struct
+{
+    char *fname, with;
+    regidx_t *idx;
+    regitr_t *itr;
+}
+mask_t;
 
 typedef struct
 {
@@ -71,9 +85,10 @@ typedef struct
     int fa_ori_pos;     // start position of the fa_buffer (wrt original sequence)
     int fa_frz_pos;     // protected position to avoid conflicting variants (last pos for SNPs/ins)
     int fa_mod_off;     // position difference of fa_frz_pos in the ori and modified sequence (ins positive)
+    int fa_frz_mod;     // the fa_buf offset of the protected fa_frz_pos position, includes the modified sequence
     int fa_end_pos;     // region's end position in the original sequence
     int fa_length;      // region's length in the original sequence (in case end_pos not provided in the FASTA header)
-    int fa_case;        // output upper case or lower case?
+    int fa_case;        // output upper case or lower case: TO_UPPER|TO_LOWER
     int fa_src_pos;     // last genomic coordinate read from the input fasta (0-based)
     char prev_base;     // this is only to validate the REF allele in the VCF - the modified fa_buf cannot be used for inserts following deletions, see 600#issuecomment-383186778
     int prev_base_pos;  // the position of prev_base
@@ -84,8 +99,8 @@ typedef struct
     int nvcf_buf, rid;
     char *chr, *chr_prefix;
 
-    regidx_t *mask;
-    regitr_t *itr;
+    mask_t *mask;
+    int nmask;
 
     int chain_id;       // chain_id, to provide a unique ID to each chain in the chain output
     chain_t *chain;     // chain structure to store the sequence of ungapped blocks between the ref and alt sequences
@@ -101,7 +116,10 @@ typedef struct
     FILE *fp_chain;
     char **argv;
     int argc, output_iupac, haplotype, allele, isample, napplied;
-    char *fname, *ref_fname, *sample, *output_fname, *mask_fname, *chain_fname, missing_allele;
+    uint8_t *iupac_bitmask;
+    int miupac_bitmask;
+    char *fname, *ref_fname, *sample, *output_fname, *mask_fname, *chain_fname, missing_allele, absent_allele;
+    char mark_del, mark_ins, mark_snv;
 }
 args_t;
 
@@ -182,7 +200,7 @@ static void push_chain_gap(chain_t *chain, int ref_start, int ref_len, int alt_s
 //     fprintf(stderr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
     int num = chain->num;
 
-    if (ref_start <= chain->ref_last_block_ori) {
+    if (num && ref_start <= chain->ref_last_block_ori) {
         // In case this variant is back-to-back with the previous one
         chain->ref_last_block_ori = ref_start + ref_len;
         chain->alt_last_block_ori = alt_start + alt_len;
@@ -222,11 +240,13 @@ static void init_data(args_t *args)
         if ( bcf_hdr_nsamples(args->hdr) > 1 ) error("The --sample option is expected with --haplotype\n");
         args->isample = 0;
     }
-    if ( args->mask_fname )
+    int i;
+    for (i=0; i<args->nmask; i++)
     {
-        args->mask = regidx_init(args->mask_fname,NULL,NULL,0,NULL);
-        if ( !args->mask ) error("Failed to initialize mask regions\n");
-        args->itr = regitr_init(args->mask);
+        mask_t *mask = &args->mask[i];
+        mask->idx = regidx_init(mask->fname,NULL,NULL,0,NULL);
+        if ( !mask->idx ) error("Failed to initialize mask regions\n");
+        mask->itr = regitr_init(mask->idx);
     }
     // In case we want to store the chains
     if ( args->chain_fname )
@@ -245,10 +265,28 @@ static void init_data(args_t *args)
     if ( args->isample<0 ) fprintf(stderr,"Note: the --sample option not given, applying all records regardless of the genotype\n");
     if ( args->filter_str )
         args->filter = filter_init(args->hdr, args->filter_str);
+    args->rid = -1;
+}
+static void add_mask(args_t *args, char *fname)
+{
+    args->nmask++;
+    args->mask = (mask_t*)realloc(args->mask,args->nmask*sizeof(*args->mask));
+    mask_t *mask = &args->mask[args->nmask-1];
+    mask->fname = fname;
+    mask->with  = 'N';
+}
+static void add_mask_with(args_t *args, char *with)
+{
+    if ( !args->nmask ) error("The --mask-with option must follow --mask\n");
+    mask_t *mask = &args->mask[args->nmask-1];
+    if ( !strcasecmp(with,"uc") ) mask->with = MASK_UC;
+    else if ( !strcasecmp(with,"lc") ) mask->with = MASK_LC;
+    else if ( strlen(with)!=1 ) error("Expected \"lc\", \"uc\", or a single character with the --mask-with option\n");
+    else mask->with = *with;
 }
-
 static void destroy_data(args_t *args)
 {
+    free(args->iupac_bitmask);
     if (args->filter) filter_destroy(args->filter);
     bcf_sr_destroy(args->files);
     int i;
@@ -257,8 +295,13 @@ static void destroy_data(args_t *args)
     free(args->vcf_buf);
     free(args->fa_buf.s);
     free(args->chr);
-    if ( args->mask ) regidx_destroy(args->mask);
-    if ( args->itr ) regitr_destroy(args->itr);
+    for (i=0; i<args->nmask; i++)
+    {
+        mask_t *mask = &args->mask[i];
+        regidx_destroy(mask->idx);
+        regitr_destroy(mask->itr);
+    }
+    free(args->mask);
     if ( args->chain_fname )
         if ( fclose(args->fp_chain) ) error("Close failed: %s\n", args->chain_fname);
     if ( fclose(args->fp_out) ) error("Close failed: %s\n", args->output_fname);
@@ -297,6 +340,7 @@ static void init_region(args_t *args, char *line)
     args->fa_src_pos = from;
     args->fa_mod_off = 0;
     args->fa_frz_pos = -1;
+    args->fa_frz_mod = -1;
     args->fa_case    = -1;
     args->vcf_rbuf.n = 0;
     bcf_sr_seek(args->files,line,args->fa_ori_pos);
@@ -345,7 +389,6 @@ static void unread_vcf_line(args_t *args, bcf1_t **rec_ptr)
 static void flush_fa_buffer(args_t *args, int len)
 {
     if ( !args->fa_buf.l ) return;
-
     int nwr = 0;
     while ( nwr + 60 <= args->fa_buf.l )
     {
@@ -356,6 +399,8 @@ static void flush_fa_buffer(args_t *args, int len)
     if ( nwr )
         args->fa_ori_pos += nwr;
 
+    args->fa_frz_mod -= nwr;
+
     if ( len )
     {
         // not finished on this chr yet and the buffer cannot be emptied completely
@@ -375,21 +420,84 @@ static void flush_fa_buffer(args_t *args, int len)
     args->fa_mod_off = 0;
     args->fa_buf.l = 0;
 }
+static void apply_absent(args_t *args, hts_pos_t pos)
+{
+    if ( !args->fa_buf.l || pos <= args->fa_frz_pos + 1 || pos <= args->fa_ori_pos ) return;
+
+    int ie = pos && pos - args->fa_ori_pos + args->fa_mod_off < args->fa_buf.l ? pos - args->fa_ori_pos + args->fa_mod_off : args->fa_buf.l;
+    int ib = args->fa_frz_mod < 0 ? 0 : args->fa_frz_mod;
+    int i;
+    for (i=ib; i<ie; i++)
+        args->fa_buf.s[i] = args->absent_allele;
+}
+static void freeze_ref(args_t *args, bcf1_t *rec)
+{
+    if ( args->fa_frz_pos >= rec->pos + rec->rlen - 1 ) return;
+    args->fa_frz_pos = rec->pos + rec->rlen - 1;
+    args->fa_frz_mod = rec->pos - args->fa_ori_pos + args->fa_mod_off + rec->rlen;
+}
+static char *mark_del(char *ref, int rlen, char *alt, int mark)
+{
+    char *out = malloc(rlen+1);
+    int i;
+    if ( alt )
+    {
+        int nalt = strlen(alt);
+        for (i=0; i<nalt; i++) out[i] = alt[i];
+    }
+    else    // symbolic <DEL>
+    {
+        int nref = strlen(ref);
+        for (i=0; i<nref; i++) out[i] = ref[i];
+    }
+    for (; i<rlen; i++) out[i] = mark;
+    out[rlen] = 0;
+    return out;
+}
+static void mark_ins(char *ref, char *alt, char mark)
+{
+    int i, nref = strlen(ref), nalt = strlen(alt);
+    if ( mark=='l' )
+        for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]);
+    else
+        for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]);
+}
+static void mark_snv(char *ref, char *alt, char mark)
+{
+    int i, nref = strlen(ref), nalt = strlen(alt);
+    int n = nref < nalt ? nref : nalt;
+    if ( mark=='l' )
+    {
+        for (i=0; i<n; i++)
+            if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]);
+    }
+    else
+    {
+        for (i=0; i<n; i++)
+            if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
+    }
+}
 static void apply_variant(args_t *args, bcf1_t *rec)
 {
     static int warned_haplotype = 0;
 
-    if ( rec->n_allele==1 && !args->missing_allele ) return;
+    if ( args->absent_allele ) apply_absent(args, rec->pos);
+    if ( rec->n_allele==1 && !args->missing_allele && !args->absent_allele ) { return; }
 
+    int i,j;
     if ( args->mask )
     {
         char *chr = (char*)bcf_hdr_id2name(args->hdr,args->rid);
         int start = rec->pos;
         int end   = rec->pos + rec->rlen - 1;
-        if ( regidx_overlap(args->mask, chr,start,end,NULL) ) return;
+        for (i=0; i<args->nmask; i++)
+        {
+            mask_t *mask = &args->mask[i];
+            if ( MASK_SKIP(mask) && regidx_overlap(mask->idx, chr,start,end,NULL) ) return;
+        }
     }
 
-    int i, ialt = 1;    // the alternate allele
+    int ialt = 1;    // the alternate allele
     if ( args->isample >= 0 )
     {
         bcf_unpack(rec, BCF_UN_FMT);
@@ -403,6 +511,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         enum { use_hap, use_iupac, pick_one } action = use_hap;
         if ( args->allele==PICK_IUPAC )
         {
+            if ( !args->haplotype ) action = use_iupac;
             if ( !bcf_gt_is_phased(ptr[0]) && !bcf_gt_is_phased(ptr[fmt->n-1]) ) action = use_iupac;
         }
         else if ( args->output_iupac ) action = use_iupac;
@@ -441,41 +550,40 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         }
         else if ( action==use_iupac ) 
         {
-            ialt = ptr[0];
-            if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end )
+            ialt = -1;
+            int is_missing = 0, alen = 0, mlen = 0, fallback_alt = -1;
+            for (i=0; i<fmt->n; i++)
             {
-                if ( !args->missing_allele ) return;
-                ialt = -1;
-            }
-            else
-                ialt = bcf_gt_allele(ialt);
+                if ( bcf_gt_is_missing(ptr[i]) ) { is_missing = 1; continue; }
+                if ( ptr[i]==(uint8_t)bcf_int8_vector_end ) break;
+                int jalt = bcf_gt_allele(ptr[i]);
+                if ( jalt >= rec->n_allele ) error("Invalid VCF, too few ALT alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+                if ( fallback_alt <= 0 ) fallback_alt = jalt;
 
-            int jalt;
-            if ( fmt->n>1 )
-            {
-                jalt = ptr[1];
-                if ( bcf_gt_is_missing(jalt) )
+                int l = strlen(rec->d.allele[jalt]);
+                for (j=0; j<l; j++)
+                    if ( iupac2bitmask(rec->d.allele[jalt][j]) < 0 ) break;
+                if ( j<l ) continue; // symbolic allele, breakpoint or invalid character in the allele
+
+                if ( l > mlen )
                 {
-                    if ( !args->missing_allele ) return;
-                    ialt = -1;
+                    hts_expand(uint8_t,l,args->miupac_bitmask,args->iupac_bitmask);
+                    for (j=mlen; j<l; j++) args->iupac_bitmask[j] = 0;
+                    mlen = l;
                 }
-                else if ( jalt==bcf_int32_vector_end ) jalt = ialt;
-                else
-                    jalt = bcf_gt_allele(jalt);
-            }
-            else jalt = ialt;
-
-            if ( ialt>=0 )
-            {
-                if ( rec->n_allele <= ialt || rec->n_allele <= jalt ) error("Invalid VCF, too few ALT alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
-                if ( ialt!=jalt && !rec->d.allele[ialt][1] && !rec->d.allele[jalt][1] ) // is this a het snp?
+                if ( jalt>0 && l>alen )
                 {
-                    char ial = rec->d.allele[ialt][0];
-                    char jal = rec->d.allele[jalt][0];
-                    if ( !ialt ) ialt = jalt;   // only ialt is used, make sure 0/1 is not ignored
-                    rec->d.allele[ialt][0] = gt2iupac(ial,jal);
+                    alen = l;
+                    ialt = jalt;
                 }
+                for (j=0; j<l; j++)
+                    args->iupac_bitmask[j] |= iupac2bitmask(rec->d.allele[jalt][j]);
             }
+            if ( alen > 0 )
+                for (j=0; j<alen; j++) rec->d.allele[ialt][j] = bitmask2iupac(args->iupac_bitmask[j]);
+            else if ( fallback_alt >= 0 )
+                ialt = fallback_alt;
+            else if ( is_missing && !args->missing_allele ) return;
         }
         else
         {
@@ -520,17 +628,50 @@ static void apply_variant(args_t *args, bcf1_t *rec)
                 }
             }
         }
-        if ( !ialt ) return;  // ref allele
+        if ( !ialt )
+        {
+            // ref allele
+            if ( args->absent_allele ) freeze_ref(args,rec);
+            return;
+        }
         if ( rec->n_allele <= ialt ) error("Broken VCF, too few alts at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
     }
-    else if ( args->output_iupac && !rec->d.allele[0][1] && !rec->d.allele[1][1] )
+    else if ( args->output_iupac && rec->n_allele>1 )
     {
-        char ial = rec->d.allele[0][0];
-        char jal = rec->d.allele[1][0];
-        rec->d.allele[1][0] = gt2iupac(ial,jal);
+        int ialt, alen = 0, mlen = 0;
+        for (i=0; i<rec->n_allele; i++)
+        {
+            int l = strlen(rec->d.allele[i]);
+            for (j=0; j<l; j++)
+                if ( iupac2bitmask(rec->d.allele[i][j]) < 0 ) break;
+            if ( j<l ) continue;    // symbolic allele, breakpoint or invalid character in the allele
+
+            if ( l > mlen )
+            {
+                hts_expand(uint8_t,l,args->miupac_bitmask,args->iupac_bitmask);
+                for (j=mlen; j<l; j++) args->iupac_bitmask[j] = 0;
+                mlen = l;
+            }
+            if ( i>0 && l>alen )
+            {
+                alen = l;
+                ialt = i;
+            }
+            for (j=0; j<l; j++)
+                args->iupac_bitmask[j] |= iupac2bitmask(rec->d.allele[i][j]);
+        }
+        if ( alen > 0 )
+            for (j=0; j<alen; j++) rec->d.allele[ialt][j] = bitmask2iupac(args->iupac_bitmask[j]);
+        else
+            ialt = 1;
     }
 
-    if ( rec->n_allele==1 && ialt!=-1 ) return; // non-missing reference
+    if ( rec->n_allele==1 && ialt!=-1 )
+    {
+        // non-missing reference
+        if ( args->absent_allele ) freeze_ref(args,rec);
+        return;
+    }
     if ( ialt==-1 )
     {
         char alleles[4];
@@ -542,15 +683,34 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         ialt = 1;
     }
 
+    // For some variant types POS+REF refer to the base *before* the event; in such case set trim_beg
+    int trim_beg = 0;
+    int var_type = bcf_get_variant_type(rec,ialt);
+    int var_len  = rec->d.var[ialt].n;
+    if ( var_type & VCF_INDEL )
+    {
+        // normally indel starts one base after, but not if the first base of the fa reference is deleted
+        if ( rec->d.allele[0][0] == rec->d.allele[ialt][0] )
+            trim_beg = 1;
+        else
+            trim_beg = 0;
+    }
+    else if ( (var_type & VCF_OTHER) && !strcasecmp(rec->d.allele[ialt],"<DEL>") )
+    {
+        trim_beg = 1;
+        var_len  = 1 - rec->rlen;
+    }
+    else if ( (var_type & VCF_OTHER) && !strncasecmp(rec->d.allele[ialt],"<INS",4) ) trim_beg = 1;
+
     // Overlapping variant?
     if ( rec->pos <= args->fa_frz_pos )
     {
         // Can be still OK iff this is an insertion (and which does not follow another insertion, see #888).
         // This still may not be enough for more complicated cases with multiple duplicate positions
         // and other types in between. In such case let the user normalize the VCF and remove duplicates.
+
         int overlap = 0;
-        if ( rec->pos < args->fa_frz_pos || !(bcf_get_variant_type(rec,ialt) & VCF_INDEL) ) overlap = 1;
-        else if ( rec->d.var[ialt].n <= 0 || args->prev_is_insert ) overlap = 1;
+        if ( rec->pos < args->fa_frz_pos || !trim_beg || var_len==0 || args->prev_is_insert ) overlap = 1;
 
         if ( overlap )
         {
@@ -560,6 +720,9 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         
     }
 
+    char *alt_allele = rec->d.allele[ialt];
+    int rmme_alt = 0;
+
     int len_diff = 0, alen = 0;
     int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off;
     if ( idx<0 )
@@ -570,10 +733,10 @@ static void apply_variant(args_t *args, bcf1_t *rec)
     if ( rec->rlen > args->fa_buf.l - idx )
     {
         rec->rlen = args->fa_buf.l - idx;
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         if ( alen > rec->rlen )
         {
-            rec->d.allele[ialt][rec->rlen] = 0;
+            alt_allele[rec->rlen] = 0;
             fprintf(stderr,"Warning: trimming variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
         }
     }
@@ -581,14 +744,44 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         error("FIXME: %s:%"PRId64" .. idx=%d, ori_pos=%d, len=%"PRIu64", off=%d\n",bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1,idx,args->fa_ori_pos,(uint64_t)args->fa_buf.l,args->fa_mod_off);
 
     // sanity check the reference base
-    if ( rec->d.allele[ialt][0]=='<' )
+    if ( alt_allele[0]=='<' )
     {
-        if ( strcasecmp(rec->d.allele[ialt], "<DEL>") )
-            error("Symbolic alleles other than <DEL> are currently not supported: %s at %s:%"PRId64"\n",rec->d.allele[ialt],bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
-        assert( rec->d.allele[0][1]==0 );           // todo: for now expecting strlen(REF) = 1
-        len_diff = 1-rec->rlen;
-        rec->d.allele[ialt] = rec->d.allele[0];     // according to VCF spec, REF must precede the event
-        alen = strlen(rec->d.allele[ialt]);
+        // TODO: symbolic deletions probably need more work above with PICK_SHORT|PICK_LONG
+
+        if ( strcasecmp(alt_allele,"<DEL>") && strcasecmp(alt_allele,"<*>") && strcasecmp(alt_allele,"<NON_REF>") )
+            error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. %s at %s:%"PRId64".\n"
+                  "Please use filtering expressions to exclude such sites, for example by running with: -e 'ALT~\"<.*>\"'\n",
+                alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+        if ( !strcasecmp(alt_allele,"<DEL>") )
+        {
+            static int multibase_ref_del_warned = 0;
+            if ( rec->d.allele[0][1]!=0 && !multibase_ref_del_warned )
+            {
+                fprintf(stderr,
+                    "Warning: one REF base is expected with <DEL>, assuming the actual deletion starts at POS+1 at %s:%"PRId64".\n"
+                    "         (This warning is printed only once.)\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+                multibase_ref_del_warned = 1;
+            }
+            if ( args->mark_del )   // insert dashes instead of delete sequence
+            {
+                alt_allele = mark_del(rec->d.allele[0], rec->rlen, NULL, args->mark_del);
+                alen = rec->rlen;
+                len_diff = 0;
+                rmme_alt = 1;
+            }
+            else
+            {
+                len_diff = 1-rec->rlen;
+                alt_allele = rec->d.allele[0];     // according to VCF spec, the first REF base must precede the event
+                alen = 1;
+            }
+        }
+        else
+        {
+            // <*>  or <NON_REF> .. gVCF, evidence for the reference allele throughout the whole block
+            freeze_ref(args,rec);
+            return;
+        }
     }
     else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) )
     {
@@ -614,39 +807,63 @@ static void apply_variant(args_t *args, bcf1_t *rec)
             }
             error(
                     "The fasta sequence does not match the REF allele at %s:%"PRId64":\n"
-                    "   .vcf: [%s] <- (REF)\n" 
-                    "   .vcf: [%s] <- (ALT)\n" 
-                    "   .fa:  [%s]%c%s\n",
-                    bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx,
+                    "   REF .vcf: [%s]\n"
+                    "   ALT .vcf: [%s]\n"
+                    "   REF .fa : [%s]%c%s\n",
+                    bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1, rec->d.allele[0], alt_allele, args->fa_buf.s+idx,
                     tmp?tmp:' ',tmp?args->fa_buf.s+idx+rec->rlen+1:""
                  );
         }
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         len_diff = alen - rec->rlen;
+
+        if ( args->mark_del && len_diff<0 ) 
+        {
+            alt_allele = mark_del(rec->d.allele[0], rec->rlen, alt_allele, args->mark_del);
+            alen = rec->rlen;
+            len_diff = 0;
+            rmme_alt = 1;
+        }
     }
     else
     {
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         len_diff = alen - rec->rlen;
+
+        if ( args->mark_del && len_diff<0 ) 
+        {
+            alt_allele = mark_del(rec->d.allele[0], rec->rlen, alt_allele, args->mark_del);
+            alen = rec->rlen;
+            len_diff = 0;
+            rmme_alt = 1;
+        }
     }
 
-    if ( args->fa_case )
-        for (i=0; i<alen; i++) rec->d.allele[ialt][i] = toupper(rec->d.allele[ialt][i]);
+    args->fa_case = toupper(args->fa_buf.s[idx])==args->fa_buf.s[idx] ? TO_UPPER : TO_LOWER;
+    if ( args->fa_case==TO_UPPER )
+        for (i=0; i<alen; i++) alt_allele[i] = toupper(alt_allele[i]);
     else
-        for (i=0; i<alen; i++) rec->d.allele[ialt][i] = tolower(rec->d.allele[ialt][i]);
+        for (i=0; i<alen; i++) alt_allele[i] = tolower(alt_allele[i]);
+
+    if ( args->mark_ins && len_diff>0 )
+        mark_ins(rec->d.allele[0], alt_allele, args->mark_ins);
+    if ( args->mark_snv )
+        mark_snv(rec->d.allele[0], alt_allele, args->mark_snv);
 
     if ( len_diff <= 0 )
     {
         // deletion or same size event
-        for (i=0; i<alen; i++)
-            args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
+        assert( args->fa_buf.l >= idx+rec->rlen );
+        args->prev_base = args->fa_buf.s[idx+rec->rlen-1];
+        args->prev_base_pos = rec->pos + rec->rlen - 1;
+        args->prev_is_insert = 0;
+        args->fa_frz_mod = idx + alen;
+
+        for (i=trim_beg; i<alen; i++)
+            args->fa_buf.s[idx+i] = alt_allele[i];
 
         if ( len_diff )
             memmove(args->fa_buf.s+idx+alen,args->fa_buf.s+idx+rec->rlen,args->fa_buf.l-idx-rec->rlen);
-
-        args->prev_base = rec->d.allele[0][rec->rlen - 1];
-        args->prev_base_pos = rec->pos + rec->rlen - 1;
-        args->prev_is_insert = 0;
     }
     else
     {
@@ -663,14 +880,16 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         //      1   C   T
         //      1   C   CAA
         int ibeg = 0;
-        while ( ibeg<alen && rec->d.allele[0][ibeg]==rec->d.allele[ialt][ibeg] && rec->pos + ibeg <= args->prev_base_pos  ) ibeg++;
+        while ( ibeg<alen && rec->d.allele[0][ibeg]==alt_allele[ibeg] && rec->pos + ibeg <= args->prev_base_pos  ) ibeg++;
         for (i=ibeg; i<alen; i++)
-            args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
+            args->fa_buf.s[idx+i] = alt_allele[i];
+
+        args->fa_frz_mod = idx + alen - ibeg + 1;
     }
     if (args->chain && len_diff != 0)
     {
         // If first nucleotide of both REF and ALT are the same... (indels typically include the nucleotide before the variant)
-        if ( strncasecmp(rec->d.allele[0],rec->d.allele[ialt],1) == 0)
+        if ( strncasecmp(rec->d.allele[0],alt_allele,1) == 0)
         {
             // ...extend the block by 1 bp: start is 1 bp further and alleles are 1 bp shorter
             push_chain_gap(args->chain, rec->pos + 1, rec->rlen - 1, rec->pos + 1 + args->fa_mod_off, alen - 1);
@@ -685,6 +904,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
     args->fa_mod_off += len_diff;
     args->fa_frz_pos  = rec->pos + rec->rlen - 1;
     args->napplied++;
+    if ( rmme_alt ) free(alt_allele);
 }
 
 
@@ -692,17 +912,27 @@ static void mask_region(args_t *args, char *seq, int len)
 {
     int start = args->fa_src_pos - len;
     int end   = args->fa_src_pos;
+    int i;
 
-    if ( !regidx_overlap(args->mask, args->chr,start,end, args->itr) ) return;
-
-    int idx_start, idx_end, i;
-    while ( regitr_overlap(args->itr) )
+    for (i=0; i<args->nmask; i++)
     {
-        idx_start = args->itr->beg - start;
-        idx_end   = args->itr->end - start;
-        if ( idx_start < 0 ) idx_start = 0;
-        if ( idx_end >= len ) idx_end = len - 1;
-        for (i=idx_start; i<=idx_end; i++) seq[i] = 'N';
+        mask_t *mask = &args->mask[i];
+        if ( !regidx_overlap(mask->idx, args->chr,start,end, mask->itr) ) continue;
+
+        int idx_start, idx_end, j;
+        while ( regitr_overlap(mask->itr) )
+        {
+            idx_start = mask->itr->beg - start;
+            idx_end   = mask->itr->end - start;
+            if ( idx_start < 0 ) idx_start = 0;
+            if ( idx_end >= len ) idx_end = len - 1;
+            if ( mask->with==MASK_UC )
+                for (j=idx_start; j<=idx_end; j++) seq[j] = toupper(seq[j]);
+            else if ( mask->with==MASK_LC )
+                for (j=idx_start; j<=idx_end; j++) seq[j] = tolower(seq[j]);
+            else
+                for (j=idx_start; j<=idx_end; j++) seq[j] = mask->with;
+        }
     }
 }
 
@@ -720,13 +950,20 @@ static void consensus(args_t *args)
                 print_chain(args);
                 destroy_chain(args);
             }
-            // apply all cached variants
-            while ( args->vcf_rbuf.n )
+            // apply all cached variants and variants that might have been missed because of short fasta (see test/consensus.9.*)
+            bcf1_t **rec_ptr = NULL;
+            while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
             {
-                bcf1_t *rec = args->vcf_buf[args->vcf_rbuf.f];
+                bcf1_t *rec = *rec_ptr;
                 if ( rec->rid!=args->rid || ( args->fa_end_pos && rec->pos > args->fa_end_pos ) ) break;
-                int i = rbuf_shift(&args->vcf_rbuf);
-                apply_variant(args, args->vcf_buf[i]);
+                apply_variant(args, rec);
+            }
+            if ( args->absent_allele )
+            {
+                int pos = 0;
+                if ( args->vcf_rbuf.n && args->vcf_buf[args->vcf_rbuf.f]->rid==args->rid )
+                    pos = args->vcf_buf[args->vcf_rbuf.f]->pos;
+                apply_absent(args, pos);
             }
             flush_fa_buffer(args, 0);
             init_region(args, str.s+1);
@@ -771,7 +1008,11 @@ static void consensus(args_t *args)
             }
             apply_variant(args, rec);
         }
-        if ( !rec_ptr ) flush_fa_buffer(args, 60);
+        if ( !rec_ptr )
+        {
+            if ( args->absent_allele ) apply_absent(args, args->fa_ori_pos - args->fa_mod_off + args->fa_buf.l);
+            flush_fa_buffer(args, 60);
+        }
     }
     bcf1_t **rec_ptr = NULL;
     while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
@@ -787,6 +1028,7 @@ static void consensus(args_t *args)
         print_chain(args);
         destroy_chain(args);
     }
+    if ( args->absent_allele ) apply_absent(args, HTS_POS_MAX);
     flush_fa_buffer(args, 0);
     bgzf_close(fasta);
     free(str.s);
@@ -801,27 +1043,33 @@ static void usage(args_t *args)
     fprintf(stderr, "       --sample (and, optionally, --haplotype) option will apply genotype\n");
     fprintf(stderr, "       (or haplotype) calls from FORMAT/GT. The program ignores allelic depth\n");
     fprintf(stderr, "       information, such as INFO/AD or FORMAT/AD.\n");
-    fprintf(stderr, "Usage:   bcftools consensus [OPTIONS] <file.vcf.gz>\n");
+    fprintf(stderr, "Usage: bcftools consensus [OPTIONS] <file.vcf.gz>\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "    -c, --chain <file>         write a chain file for liftover\n");
-    fprintf(stderr, "    -e, --exclude <expr>       exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
-    fprintf(stderr, "    -H, --haplotype <which>    choose which allele to use from the FORMAT/GT field, note\n");
-    fprintf(stderr, "                               the codes are case-insensitive:\n");
-    fprintf(stderr, "                                   1: first allele from GT, regardless of phasing\n");
-    fprintf(stderr, "                                   2: second allele from GT, regardless of phasing\n");
-    fprintf(stderr, "                                   R: REF allele in het genotypes\n");
-    fprintf(stderr, "                                   A: ALT allele\n");
-    fprintf(stderr, "                                   LR,LA: longer allele and REF/ALT if equal length\n");
-    fprintf(stderr, "                                   SR,SA: shorter allele and REF/ALT if equal length\n");
-    fprintf(stderr, "                                   1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
-    fprintf(stderr, "    -i, --include <expr>       select sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "    -I, --iupac-codes          output variants in the form of IUPAC ambiguity codes\n");
-    fprintf(stderr, "    -m, --mask <file>          replace regions with N\n");
-    fprintf(stderr, "    -M, --missing <char>       output <char> instead of skipping the missing genotypes\n");
-    fprintf(stderr, "    -o, --output <file>        write output to a file [standard output]\n");
-    fprintf(stderr, "    -p, --prefix <string>      prefix to add to output sequence names\n");
-    fprintf(stderr, "    -s, --sample <name>        apply variants of the given sample\n");
+    fprintf(stderr, "    -c, --chain FILE               write a chain file for liftover\n");
+    fprintf(stderr, "    -a, --absent CHAR              replace positions absent from VCF with CHAR\n");
+    fprintf(stderr, "    -e, --exclude EXPR             exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "    -f, --fasta-ref FILE           reference sequence in fasta format\n");
+    fprintf(stderr, "    -H, --haplotype WHICH          choose which allele to use from the FORMAT/GT field, note\n");
+    fprintf(stderr, "                                   the codes are case-insensitive:\n");
+    fprintf(stderr, "                                       1: first allele from GT, regardless of phasing\n");
+    fprintf(stderr, "                                       2: second allele from GT, regardless of phasing\n");
+    fprintf(stderr, "                                       R: REF allele in het genotypes\n");
+    fprintf(stderr, "                                       A: ALT allele\n");
+    fprintf(stderr, "                                       I: IUPAC code for all genotypes\n");
+    fprintf(stderr, "                                       LR,LA: longer allele and REF/ALT if equal length\n");
+    fprintf(stderr, "                                       SR,SA: shorter allele and REF/ALT if equal length\n");
+    fprintf(stderr, "                                       1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
+    fprintf(stderr, "    -i, --include EXPR             select sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "    -I, --iupac-codes              output variants in the form of IUPAC ambiguity codes\n");
+    fprintf(stderr, "        --mark-del CHAR            instead of removing sequence, insert CHAR for deletions\n");
+    fprintf(stderr, "        --mark-ins uc|lc           highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
+    fprintf(stderr, "        --mark-snv uc|lc           highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
+    fprintf(stderr, "    -m, --mask FILE                replace regions according to the next --mask-with option. The default is --mask-with N\n");
+    fprintf(stderr, "        --mask-with CHAR|uc|lc     replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n");
+    fprintf(stderr, "    -M, --missing CHAR             output CHAR instead of skipping a missing genotype \"./.\"\n");
+    fprintf(stderr, "    -o, --output FILE              write output to a file [standard output]\n");
+    fprintf(stderr, "    -p, --prefix STRING            prefix to add to output sequence names\n");
+    fprintf(stderr, "    -s, --sample NAME              apply variants of the given sample\n");
     fprintf(stderr, "Examples:\n");
     fprintf(stderr, "   # Get the consensus for one region. The fasta header lines are then expected\n");
     fprintf(stderr, "   # in the form \">chr:from-to\".\n");
@@ -837,6 +1085,10 @@ int main_consensus(int argc, char *argv[])
 
     static struct option loptions[] = 
     {
+        {"mark-del",required_argument,NULL,1},
+        {"mark-ins",required_argument,NULL,2},
+        {"mark-snv",required_argument,NULL,3},
+        {"mask-with",1,0,4},
         {"exclude",required_argument,NULL,'e'},
         {"include",required_argument,NULL,'i'},
         {"sample",1,0,'s'},
@@ -846,23 +1098,44 @@ int main_consensus(int argc, char *argv[])
         {"fasta-ref",1,0,'f'},
         {"mask",1,0,'m'},
         {"missing",1,0,'M'},
+        {"absent",1,0,'a'},
         {"chain",1,0,'c'},
         {"prefix",required_argument,0,'p'},
         {0,0,0,0}
     };
     int c;
-    while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:p:",loptions,NULL)) >= 0) 
+    while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:p:a:",loptions,NULL)) >= 0)
     {
         switch (c) 
         {
+            case  1 : args->mark_del = optarg[0]; break;
+            case  2 :
+                if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u';
+                else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l';
+                else error("The argument is not recognised: --mark-ins %s\n",optarg);
+                break;
+            case  3 :
+                if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u';
+                else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l';
+                else error("The argument is not recognised: --mark-snv %s\n",optarg);
+                break;
             case 'p': args->chr_prefix = optarg; break;
             case 's': args->sample = optarg; break;
             case 'o': args->output_fname = optarg; break;
             case 'I': args->output_iupac = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e': 
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i': 
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'f': args->ref_fname = optarg; break;
-            case 'm': args->mask_fname = optarg; break;
+            case 'm': add_mask(args,optarg); break;
+            case  4 : add_mask_with(args,optarg); break;
+            case 'a':
+                args->absent_allele = optarg[0];
+                if ( optarg[1]!=0 ) error("Expected single character with -a, got \"%s\"\n", optarg);
+                break;
             case 'M': 
                 args->missing_allele = optarg[0]; 
                 if ( optarg[1]!=0 ) error("Expected single character with -M, got \"%s\"\n", optarg);
@@ -877,6 +1150,7 @@ int main_consensus(int argc, char *argv[])
                 else if ( !strcasecmp(optarg,"LA") ) args->allele |= PICK_LONG|PICK_ALT;
                 else if ( !strcasecmp(optarg,"SR") ) args->allele |= PICK_SHORT|PICK_REF;
                 else if ( !strcasecmp(optarg,"SA") ) args->allele |= PICK_SHORT|PICK_ALT;
+                else if ( !strcasecmp(optarg,"I") ) args->allele |= PICK_IUPAC;
                 else if ( !strcasecmp(optarg,"1pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 1;
                 else if ( !strcasecmp(optarg,"2pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 2;
                 else
diff --git a/bcftools/consensus.c.pysam.c b/bcftools/consensus.c.pysam.c
index b1b1861..5105a2e 100644
--- a/bcftools/consensus.c.pysam.c
+++ b/bcftools/consensus.c.pysam.c
@@ -2,7 +2,7 @@
 
 /* The MIT License
 
-   Copyright (c) 2014-2017 Genome Research Ltd.
+   Copyright (c) 2014-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -30,6 +30,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <getopt.h>
 #include <unistd.h>
@@ -54,6 +55,9 @@
 #define PICK_SHORT 8
 #define PICK_IUPAC 16
 
+#define TO_UPPER 0
+#define TO_LOWER 1
+
 typedef struct
 {
     int num;                // number of ungapped blocks in this chain
@@ -66,6 +70,16 @@ typedef struct
 }
 chain_t;
 
+#define MASK_LC 1
+#define MASK_UC 2
+#define MASK_SKIP(x) (((x)->with!=MASK_LC && (x)->with!=MASK_UC) ? 1 : 0)
+typedef struct
+{
+    char *fname, with;
+    regidx_t *idx;
+    regitr_t *itr;
+}
+mask_t;
 
 typedef struct
 {
@@ -73,9 +87,10 @@ typedef struct
     int fa_ori_pos;     // start position of the fa_buffer (wrt original sequence)
     int fa_frz_pos;     // protected position to avoid conflicting variants (last pos for SNPs/ins)
     int fa_mod_off;     // position difference of fa_frz_pos in the ori and modified sequence (ins positive)
+    int fa_frz_mod;     // the fa_buf offset of the protected fa_frz_pos position, includes the modified sequence
     int fa_end_pos;     // region's end position in the original sequence
     int fa_length;      // region's length in the original sequence (in case end_pos not provided in the FASTA header)
-    int fa_case;        // output upper case or lower case?
+    int fa_case;        // output upper case or lower case: TO_UPPER|TO_LOWER
     int fa_src_pos;     // last genomic coordinate read from the input fasta (0-based)
     char prev_base;     // this is only to validate the REF allele in the VCF - the modified fa_buf cannot be used for inserts following deletions, see 600#issuecomment-383186778
     int prev_base_pos;  // the position of prev_base
@@ -86,8 +101,8 @@ typedef struct
     int nvcf_buf, rid;
     char *chr, *chr_prefix;
 
-    regidx_t *mask;
-    regitr_t *itr;
+    mask_t *mask;
+    int nmask;
 
     int chain_id;       // chain_id, to provide a unique ID to each chain in the chain output
     chain_t *chain;     // chain structure to store the sequence of ungapped blocks between the ref and alt sequences
@@ -103,7 +118,10 @@ typedef struct
     FILE *fp_chain;
     char **argv;
     int argc, output_iupac, haplotype, allele, isample, napplied;
-    char *fname, *ref_fname, *sample, *output_fname, *mask_fname, *chain_fname, missing_allele;
+    uint8_t *iupac_bitmask;
+    int miupac_bitmask;
+    char *fname, *ref_fname, *sample, *output_fname, *mask_fname, *chain_fname, missing_allele, absent_allele;
+    char mark_del, mark_ins, mark_snv;
 }
 args_t;
 
@@ -184,7 +202,7 @@ static void push_chain_gap(chain_t *chain, int ref_start, int ref_len, int alt_s
 //     fprintf(bcftools_stderr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
     int num = chain->num;
 
-    if (ref_start <= chain->ref_last_block_ori) {
+    if (num && ref_start <= chain->ref_last_block_ori) {
         // In case this variant is back-to-back with the previous one
         chain->ref_last_block_ori = ref_start + ref_len;
         chain->alt_last_block_ori = alt_start + alt_len;
@@ -224,11 +242,13 @@ static void init_data(args_t *args)
         if ( bcf_hdr_nsamples(args->hdr) > 1 ) error("The --sample option is expected with --haplotype\n");
         args->isample = 0;
     }
-    if ( args->mask_fname )
+    int i;
+    for (i=0; i<args->nmask; i++)
     {
-        args->mask = regidx_init(args->mask_fname,NULL,NULL,0,NULL);
-        if ( !args->mask ) error("Failed to initialize mask regions\n");
-        args->itr = regitr_init(args->mask);
+        mask_t *mask = &args->mask[i];
+        mask->idx = regidx_init(mask->fname,NULL,NULL,0,NULL);
+        if ( !mask->idx ) error("Failed to initialize mask regions\n");
+        mask->itr = regitr_init(mask->idx);
     }
     // In case we want to store the chains
     if ( args->chain_fname )
@@ -247,10 +267,28 @@ static void init_data(args_t *args)
     if ( args->isample<0 ) fprintf(bcftools_stderr,"Note: the --sample option not given, applying all records regardless of the genotype\n");
     if ( args->filter_str )
         args->filter = filter_init(args->hdr, args->filter_str);
+    args->rid = -1;
+}
+static void add_mask(args_t *args, char *fname)
+{
+    args->nmask++;
+    args->mask = (mask_t*)realloc(args->mask,args->nmask*sizeof(*args->mask));
+    mask_t *mask = &args->mask[args->nmask-1];
+    mask->fname = fname;
+    mask->with  = 'N';
+}
+static void add_mask_with(args_t *args, char *with)
+{
+    if ( !args->nmask ) error("The --mask-with option must follow --mask\n");
+    mask_t *mask = &args->mask[args->nmask-1];
+    if ( !strcasecmp(with,"uc") ) mask->with = MASK_UC;
+    else if ( !strcasecmp(with,"lc") ) mask->with = MASK_LC;
+    else if ( strlen(with)!=1 ) error("Expected \"lc\", \"uc\", or a single character with the --mask-with option\n");
+    else mask->with = *with;
 }
-
 static void destroy_data(args_t *args)
 {
+    free(args->iupac_bitmask);
     if (args->filter) filter_destroy(args->filter);
     bcf_sr_destroy(args->files);
     int i;
@@ -259,8 +297,13 @@ static void destroy_data(args_t *args)
     free(args->vcf_buf);
     free(args->fa_buf.s);
     free(args->chr);
-    if ( args->mask ) regidx_destroy(args->mask);
-    if ( args->itr ) regitr_destroy(args->itr);
+    for (i=0; i<args->nmask; i++)
+    {
+        mask_t *mask = &args->mask[i];
+        regidx_destroy(mask->idx);
+        regitr_destroy(mask->itr);
+    }
+    free(args->mask);
     if ( args->chain_fname )
         if ( fclose(args->fp_chain) ) error("Close failed: %s\n", args->chain_fname);
     if ( fclose(args->fp_out) ) error("Close failed: %s\n", args->output_fname);
@@ -299,6 +342,7 @@ static void init_region(args_t *args, char *line)
     args->fa_src_pos = from;
     args->fa_mod_off = 0;
     args->fa_frz_pos = -1;
+    args->fa_frz_mod = -1;
     args->fa_case    = -1;
     args->vcf_rbuf.n = 0;
     bcf_sr_seek(args->files,line,args->fa_ori_pos);
@@ -347,7 +391,6 @@ static void unread_vcf_line(args_t *args, bcf1_t **rec_ptr)
 static void flush_fa_buffer(args_t *args, int len)
 {
     if ( !args->fa_buf.l ) return;
-
     int nwr = 0;
     while ( nwr + 60 <= args->fa_buf.l )
     {
@@ -358,6 +401,8 @@ static void flush_fa_buffer(args_t *args, int len)
     if ( nwr )
         args->fa_ori_pos += nwr;
 
+    args->fa_frz_mod -= nwr;
+
     if ( len )
     {
         // not finished on this chr yet and the buffer cannot be emptied completely
@@ -377,21 +422,84 @@ static void flush_fa_buffer(args_t *args, int len)
     args->fa_mod_off = 0;
     args->fa_buf.l = 0;
 }
+static void apply_absent(args_t *args, hts_pos_t pos)
+{
+    if ( !args->fa_buf.l || pos <= args->fa_frz_pos + 1 || pos <= args->fa_ori_pos ) return;
+
+    int ie = pos && pos - args->fa_ori_pos + args->fa_mod_off < args->fa_buf.l ? pos - args->fa_ori_pos + args->fa_mod_off : args->fa_buf.l;
+    int ib = args->fa_frz_mod < 0 ? 0 : args->fa_frz_mod;
+    int i;
+    for (i=ib; i<ie; i++)
+        args->fa_buf.s[i] = args->absent_allele;
+}
+static void freeze_ref(args_t *args, bcf1_t *rec)
+{
+    if ( args->fa_frz_pos >= rec->pos + rec->rlen - 1 ) return;
+    args->fa_frz_pos = rec->pos + rec->rlen - 1;
+    args->fa_frz_mod = rec->pos - args->fa_ori_pos + args->fa_mod_off + rec->rlen;
+}
+static char *mark_del(char *ref, int rlen, char *alt, int mark)
+{
+    char *out = malloc(rlen+1);
+    int i;
+    if ( alt )
+    {
+        int nalt = strlen(alt);
+        for (i=0; i<nalt; i++) out[i] = alt[i];
+    }
+    else    // symbolic <DEL>
+    {
+        int nref = strlen(ref);
+        for (i=0; i<nref; i++) out[i] = ref[i];
+    }
+    for (; i<rlen; i++) out[i] = mark;
+    out[rlen] = 0;
+    return out;
+}
+static void mark_ins(char *ref, char *alt, char mark)
+{
+    int i, nref = strlen(ref), nalt = strlen(alt);
+    if ( mark=='l' )
+        for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]);
+    else
+        for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]);
+}
+static void mark_snv(char *ref, char *alt, char mark)
+{
+    int i, nref = strlen(ref), nalt = strlen(alt);
+    int n = nref < nalt ? nref : nalt;
+    if ( mark=='l' )
+    {
+        for (i=0; i<n; i++)
+            if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]);
+    }
+    else
+    {
+        for (i=0; i<n; i++)
+            if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
+    }
+}
 static void apply_variant(args_t *args, bcf1_t *rec)
 {
     static int warned_haplotype = 0;
 
-    if ( rec->n_allele==1 && !args->missing_allele ) return;
+    if ( args->absent_allele ) apply_absent(args, rec->pos);
+    if ( rec->n_allele==1 && !args->missing_allele && !args->absent_allele ) { return; }
 
+    int i,j;
     if ( args->mask )
     {
         char *chr = (char*)bcf_hdr_id2name(args->hdr,args->rid);
         int start = rec->pos;
         int end   = rec->pos + rec->rlen - 1;
-        if ( regidx_overlap(args->mask, chr,start,end,NULL) ) return;
+        for (i=0; i<args->nmask; i++)
+        {
+            mask_t *mask = &args->mask[i];
+            if ( MASK_SKIP(mask) && regidx_overlap(mask->idx, chr,start,end,NULL) ) return;
+        }
     }
 
-    int i, ialt = 1;    // the alternate allele
+    int ialt = 1;    // the alternate allele
     if ( args->isample >= 0 )
     {
         bcf_unpack(rec, BCF_UN_FMT);
@@ -405,6 +513,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         enum { use_hap, use_iupac, pick_one } action = use_hap;
         if ( args->allele==PICK_IUPAC )
         {
+            if ( !args->haplotype ) action = use_iupac;
             if ( !bcf_gt_is_phased(ptr[0]) && !bcf_gt_is_phased(ptr[fmt->n-1]) ) action = use_iupac;
         }
         else if ( args->output_iupac ) action = use_iupac;
@@ -443,41 +552,40 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         }
         else if ( action==use_iupac ) 
         {
-            ialt = ptr[0];
-            if ( bcf_gt_is_missing(ialt) || ialt==bcf_int32_vector_end )
+            ialt = -1;
+            int is_missing = 0, alen = 0, mlen = 0, fallback_alt = -1;
+            for (i=0; i<fmt->n; i++)
             {
-                if ( !args->missing_allele ) return;
-                ialt = -1;
-            }
-            else
-                ialt = bcf_gt_allele(ialt);
+                if ( bcf_gt_is_missing(ptr[i]) ) { is_missing = 1; continue; }
+                if ( ptr[i]==(uint8_t)bcf_int8_vector_end ) break;
+                int jalt = bcf_gt_allele(ptr[i]);
+                if ( jalt >= rec->n_allele ) error("Invalid VCF, too few ALT alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+                if ( fallback_alt <= 0 ) fallback_alt = jalt;
 
-            int jalt;
-            if ( fmt->n>1 )
-            {
-                jalt = ptr[1];
-                if ( bcf_gt_is_missing(jalt) )
+                int l = strlen(rec->d.allele[jalt]);
+                for (j=0; j<l; j++)
+                    if ( iupac2bitmask(rec->d.allele[jalt][j]) < 0 ) break;
+                if ( j<l ) continue; // symbolic allele, breakpoint or invalid character in the allele
+
+                if ( l > mlen )
                 {
-                    if ( !args->missing_allele ) return;
-                    ialt = -1;
+                    hts_expand(uint8_t,l,args->miupac_bitmask,args->iupac_bitmask);
+                    for (j=mlen; j<l; j++) args->iupac_bitmask[j] = 0;
+                    mlen = l;
                 }
-                else if ( jalt==bcf_int32_vector_end ) jalt = ialt;
-                else
-                    jalt = bcf_gt_allele(jalt);
-            }
-            else jalt = ialt;
-
-            if ( ialt>=0 )
-            {
-                if ( rec->n_allele <= ialt || rec->n_allele <= jalt ) error("Invalid VCF, too few ALT alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
-                if ( ialt!=jalt && !rec->d.allele[ialt][1] && !rec->d.allele[jalt][1] ) // is this a het snp?
+                if ( jalt>0 && l>alen )
                 {
-                    char ial = rec->d.allele[ialt][0];
-                    char jal = rec->d.allele[jalt][0];
-                    if ( !ialt ) ialt = jalt;   // only ialt is used, make sure 0/1 is not ignored
-                    rec->d.allele[ialt][0] = gt2iupac(ial,jal);
+                    alen = l;
+                    ialt = jalt;
                 }
+                for (j=0; j<l; j++)
+                    args->iupac_bitmask[j] |= iupac2bitmask(rec->d.allele[jalt][j]);
             }
+            if ( alen > 0 )
+                for (j=0; j<alen; j++) rec->d.allele[ialt][j] = bitmask2iupac(args->iupac_bitmask[j]);
+            else if ( fallback_alt >= 0 )
+                ialt = fallback_alt;
+            else if ( is_missing && !args->missing_allele ) return;
         }
         else
         {
@@ -522,17 +630,50 @@ static void apply_variant(args_t *args, bcf1_t *rec)
                 }
             }
         }
-        if ( !ialt ) return;  // ref allele
+        if ( !ialt )
+        {
+            // ref allele
+            if ( args->absent_allele ) freeze_ref(args,rec);
+            return;
+        }
         if ( rec->n_allele <= ialt ) error("Broken VCF, too few alts at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
     }
-    else if ( args->output_iupac && !rec->d.allele[0][1] && !rec->d.allele[1][1] )
+    else if ( args->output_iupac && rec->n_allele>1 )
     {
-        char ial = rec->d.allele[0][0];
-        char jal = rec->d.allele[1][0];
-        rec->d.allele[1][0] = gt2iupac(ial,jal);
+        int ialt, alen = 0, mlen = 0;
+        for (i=0; i<rec->n_allele; i++)
+        {
+            int l = strlen(rec->d.allele[i]);
+            for (j=0; j<l; j++)
+                if ( iupac2bitmask(rec->d.allele[i][j]) < 0 ) break;
+            if ( j<l ) continue;    // symbolic allele, breakpoint or invalid character in the allele
+
+            if ( l > mlen )
+            {
+                hts_expand(uint8_t,l,args->miupac_bitmask,args->iupac_bitmask);
+                for (j=mlen; j<l; j++) args->iupac_bitmask[j] = 0;
+                mlen = l;
+            }
+            if ( i>0 && l>alen )
+            {
+                alen = l;
+                ialt = i;
+            }
+            for (j=0; j<l; j++)
+                args->iupac_bitmask[j] |= iupac2bitmask(rec->d.allele[i][j]);
+        }
+        if ( alen > 0 )
+            for (j=0; j<alen; j++) rec->d.allele[ialt][j] = bitmask2iupac(args->iupac_bitmask[j]);
+        else
+            ialt = 1;
     }
 
-    if ( rec->n_allele==1 && ialt!=-1 ) return; // non-missing reference
+    if ( rec->n_allele==1 && ialt!=-1 )
+    {
+        // non-missing reference
+        if ( args->absent_allele ) freeze_ref(args,rec);
+        return;
+    }
     if ( ialt==-1 )
     {
         char alleles[4];
@@ -544,15 +685,34 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         ialt = 1;
     }
 
+    // For some variant types POS+REF refer to the base *before* the event; in such case set trim_beg
+    int trim_beg = 0;
+    int var_type = bcf_get_variant_type(rec,ialt);
+    int var_len  = rec->d.var[ialt].n;
+    if ( var_type & VCF_INDEL )
+    {
+        // normally indel starts one base after, but not if the first base of the fa reference is deleted
+        if ( rec->d.allele[0][0] == rec->d.allele[ialt][0] )
+            trim_beg = 1;
+        else
+            trim_beg = 0;
+    }
+    else if ( (var_type & VCF_OTHER) && !strcasecmp(rec->d.allele[ialt],"<DEL>") )
+    {
+        trim_beg = 1;
+        var_len  = 1 - rec->rlen;
+    }
+    else if ( (var_type & VCF_OTHER) && !strncasecmp(rec->d.allele[ialt],"<INS",4) ) trim_beg = 1;
+
     // Overlapping variant?
     if ( rec->pos <= args->fa_frz_pos )
     {
         // Can be still OK iff this is an insertion (and which does not follow another insertion, see #888).
         // This still may not be enough for more complicated cases with multiple duplicate positions
         // and other types in between. In such case let the user normalize the VCF and remove duplicates.
+
         int overlap = 0;
-        if ( rec->pos < args->fa_frz_pos || !(bcf_get_variant_type(rec,ialt) & VCF_INDEL) ) overlap = 1;
-        else if ( rec->d.var[ialt].n <= 0 || args->prev_is_insert ) overlap = 1;
+        if ( rec->pos < args->fa_frz_pos || !trim_beg || var_len==0 || args->prev_is_insert ) overlap = 1;
 
         if ( overlap )
         {
@@ -562,6 +722,9 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         
     }
 
+    char *alt_allele = rec->d.allele[ialt];
+    int rmme_alt = 0;
+
     int len_diff = 0, alen = 0;
     int idx = rec->pos - args->fa_ori_pos + args->fa_mod_off;
     if ( idx<0 )
@@ -572,10 +735,10 @@ static void apply_variant(args_t *args, bcf1_t *rec)
     if ( rec->rlen > args->fa_buf.l - idx )
     {
         rec->rlen = args->fa_buf.l - idx;
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         if ( alen > rec->rlen )
         {
-            rec->d.allele[ialt][rec->rlen] = 0;
+            alt_allele[rec->rlen] = 0;
             fprintf(bcftools_stderr,"Warning: trimming variant starting at %s:%"PRId64"\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
         }
     }
@@ -583,14 +746,44 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         error("FIXME: %s:%"PRId64" .. idx=%d, ori_pos=%d, len=%"PRIu64", off=%d\n",bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1,idx,args->fa_ori_pos,(uint64_t)args->fa_buf.l,args->fa_mod_off);
 
     // sanity check the reference base
-    if ( rec->d.allele[ialt][0]=='<' )
+    if ( alt_allele[0]=='<' )
     {
-        if ( strcasecmp(rec->d.allele[ialt], "<DEL>") )
-            error("Symbolic alleles other than <DEL> are currently not supported: %s at %s:%"PRId64"\n",rec->d.allele[ialt],bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
-        assert( rec->d.allele[0][1]==0 );           // todo: for now expecting strlen(REF) = 1
-        len_diff = 1-rec->rlen;
-        rec->d.allele[ialt] = rec->d.allele[0];     // according to VCF spec, REF must precede the event
-        alen = strlen(rec->d.allele[ialt]);
+        // TODO: symbolic deletions probably need more work above with PICK_SHORT|PICK_LONG
+
+        if ( strcasecmp(alt_allele,"<DEL>") && strcasecmp(alt_allele,"<*>") && strcasecmp(alt_allele,"<NON_REF>") )
+            error("Symbolic alleles other than <DEL>, <*> or <NON_REF> are currently not supported, e.g. %s at %s:%"PRId64".\n"
+                  "Please use filtering expressions to exclude such sites, for example by running with: -e 'ALT~\"<.*>\"'\n",
+                alt_allele,bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+        if ( !strcasecmp(alt_allele,"<DEL>") )
+        {
+            static int multibase_ref_del_warned = 0;
+            if ( rec->d.allele[0][1]!=0 && !multibase_ref_del_warned )
+            {
+                fprintf(bcftools_stderr,
+                    "Warning: one REF base is expected with <DEL>, assuming the actual deletion starts at POS+1 at %s:%"PRId64".\n"
+                    "         (This warning is printed only once.)\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
+                multibase_ref_del_warned = 1;
+            }
+            if ( args->mark_del )   // insert dashes instead of delete sequence
+            {
+                alt_allele = mark_del(rec->d.allele[0], rec->rlen, NULL, args->mark_del);
+                alen = rec->rlen;
+                len_diff = 0;
+                rmme_alt = 1;
+            }
+            else
+            {
+                len_diff = 1-rec->rlen;
+                alt_allele = rec->d.allele[0];     // according to VCF spec, the first REF base must precede the event
+                alen = 1;
+            }
+        }
+        else
+        {
+            // <*>  or <NON_REF> .. gVCF, evidence for the reference allele throughout the whole block
+            freeze_ref(args,rec);
+            return;
+        }
     }
     else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) )
     {
@@ -616,39 +809,63 @@ static void apply_variant(args_t *args, bcf1_t *rec)
             }
             error(
                     "The fasta sequence does not match the REF allele at %s:%"PRId64":\n"
-                    "   .vcf: [%s] <- (REF)\n" 
-                    "   .vcf: [%s] <- (ALT)\n" 
-                    "   .fa:  [%s]%c%s\n",
-                    bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1, rec->d.allele[0], rec->d.allele[ialt], args->fa_buf.s+idx,
+                    "   REF .vcf: [%s]\n"
+                    "   ALT .vcf: [%s]\n"
+                    "   REF .fa : [%s]%c%s\n",
+                    bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1, rec->d.allele[0], alt_allele, args->fa_buf.s+idx,
                     tmp?tmp:' ',tmp?args->fa_buf.s+idx+rec->rlen+1:""
                  );
         }
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         len_diff = alen - rec->rlen;
+
+        if ( args->mark_del && len_diff<0 ) 
+        {
+            alt_allele = mark_del(rec->d.allele[0], rec->rlen, alt_allele, args->mark_del);
+            alen = rec->rlen;
+            len_diff = 0;
+            rmme_alt = 1;
+        }
     }
     else
     {
-        alen = strlen(rec->d.allele[ialt]);
+        alen = strlen(alt_allele);
         len_diff = alen - rec->rlen;
+
+        if ( args->mark_del && len_diff<0 ) 
+        {
+            alt_allele = mark_del(rec->d.allele[0], rec->rlen, alt_allele, args->mark_del);
+            alen = rec->rlen;
+            len_diff = 0;
+            rmme_alt = 1;
+        }
     }
 
-    if ( args->fa_case )
-        for (i=0; i<alen; i++) rec->d.allele[ialt][i] = toupper(rec->d.allele[ialt][i]);
+    args->fa_case = toupper(args->fa_buf.s[idx])==args->fa_buf.s[idx] ? TO_UPPER : TO_LOWER;
+    if ( args->fa_case==TO_UPPER )
+        for (i=0; i<alen; i++) alt_allele[i] = toupper(alt_allele[i]);
     else
-        for (i=0; i<alen; i++) rec->d.allele[ialt][i] = tolower(rec->d.allele[ialt][i]);
+        for (i=0; i<alen; i++) alt_allele[i] = tolower(alt_allele[i]);
+
+    if ( args->mark_ins && len_diff>0 )
+        mark_ins(rec->d.allele[0], alt_allele, args->mark_ins);
+    if ( args->mark_snv )
+        mark_snv(rec->d.allele[0], alt_allele, args->mark_snv);
 
     if ( len_diff <= 0 )
     {
         // deletion or same size event
-        for (i=0; i<alen; i++)
-            args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
+        assert( args->fa_buf.l >= idx+rec->rlen );
+        args->prev_base = args->fa_buf.s[idx+rec->rlen-1];
+        args->prev_base_pos = rec->pos + rec->rlen - 1;
+        args->prev_is_insert = 0;
+        args->fa_frz_mod = idx + alen;
+
+        for (i=trim_beg; i<alen; i++)
+            args->fa_buf.s[idx+i] = alt_allele[i];
 
         if ( len_diff )
             memmove(args->fa_buf.s+idx+alen,args->fa_buf.s+idx+rec->rlen,args->fa_buf.l-idx-rec->rlen);
-
-        args->prev_base = rec->d.allele[0][rec->rlen - 1];
-        args->prev_base_pos = rec->pos + rec->rlen - 1;
-        args->prev_is_insert = 0;
     }
     else
     {
@@ -665,14 +882,16 @@ static void apply_variant(args_t *args, bcf1_t *rec)
         //      1   C   T
         //      1   C   CAA
         int ibeg = 0;
-        while ( ibeg<alen && rec->d.allele[0][ibeg]==rec->d.allele[ialt][ibeg] && rec->pos + ibeg <= args->prev_base_pos  ) ibeg++;
+        while ( ibeg<alen && rec->d.allele[0][ibeg]==alt_allele[ibeg] && rec->pos + ibeg <= args->prev_base_pos  ) ibeg++;
         for (i=ibeg; i<alen; i++)
-            args->fa_buf.s[idx+i] = rec->d.allele[ialt][i];
+            args->fa_buf.s[idx+i] = alt_allele[i];
+
+        args->fa_frz_mod = idx + alen - ibeg + 1;
     }
     if (args->chain && len_diff != 0)
     {
         // If first nucleotide of both REF and ALT are the same... (indels typically include the nucleotide before the variant)
-        if ( strncasecmp(rec->d.allele[0],rec->d.allele[ialt],1) == 0)
+        if ( strncasecmp(rec->d.allele[0],alt_allele,1) == 0)
         {
             // ...extend the block by 1 bp: start is 1 bp further and alleles are 1 bp shorter
             push_chain_gap(args->chain, rec->pos + 1, rec->rlen - 1, rec->pos + 1 + args->fa_mod_off, alen - 1);
@@ -687,6 +906,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
     args->fa_mod_off += len_diff;
     args->fa_frz_pos  = rec->pos + rec->rlen - 1;
     args->napplied++;
+    if ( rmme_alt ) free(alt_allele);
 }
 
 
@@ -694,17 +914,27 @@ static void mask_region(args_t *args, char *seq, int len)
 {
     int start = args->fa_src_pos - len;
     int end   = args->fa_src_pos;
+    int i;
 
-    if ( !regidx_overlap(args->mask, args->chr,start,end, args->itr) ) return;
-
-    int idx_start, idx_end, i;
-    while ( regitr_overlap(args->itr) )
+    for (i=0; i<args->nmask; i++)
     {
-        idx_start = args->itr->beg - start;
-        idx_end   = args->itr->end - start;
-        if ( idx_start < 0 ) idx_start = 0;
-        if ( idx_end >= len ) idx_end = len - 1;
-        for (i=idx_start; i<=idx_end; i++) seq[i] = 'N';
+        mask_t *mask = &args->mask[i];
+        if ( !regidx_overlap(mask->idx, args->chr,start,end, mask->itr) ) continue;
+
+        int idx_start, idx_end, j;
+        while ( regitr_overlap(mask->itr) )
+        {
+            idx_start = mask->itr->beg - start;
+            idx_end   = mask->itr->end - start;
+            if ( idx_start < 0 ) idx_start = 0;
+            if ( idx_end >= len ) idx_end = len - 1;
+            if ( mask->with==MASK_UC )
+                for (j=idx_start; j<=idx_end; j++) seq[j] = toupper(seq[j]);
+            else if ( mask->with==MASK_LC )
+                for (j=idx_start; j<=idx_end; j++) seq[j] = tolower(seq[j]);
+            else
+                for (j=idx_start; j<=idx_end; j++) seq[j] = mask->with;
+        }
     }
 }
 
@@ -722,13 +952,20 @@ static void consensus(args_t *args)
                 print_chain(args);
                 destroy_chain(args);
             }
-            // apply all cached variants
-            while ( args->vcf_rbuf.n )
+            // apply all cached variants and variants that might have been missed because of short fasta (see test/consensus.9.*)
+            bcf1_t **rec_ptr = NULL;
+            while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
             {
-                bcf1_t *rec = args->vcf_buf[args->vcf_rbuf.f];
+                bcf1_t *rec = *rec_ptr;
                 if ( rec->rid!=args->rid || ( args->fa_end_pos && rec->pos > args->fa_end_pos ) ) break;
-                int i = rbuf_shift(&args->vcf_rbuf);
-                apply_variant(args, args->vcf_buf[i]);
+                apply_variant(args, rec);
+            }
+            if ( args->absent_allele )
+            {
+                int pos = 0;
+                if ( args->vcf_rbuf.n && args->vcf_buf[args->vcf_rbuf.f]->rid==args->rid )
+                    pos = args->vcf_buf[args->vcf_rbuf.f]->pos;
+                apply_absent(args, pos);
             }
             flush_fa_buffer(args, 0);
             init_region(args, str.s+1);
@@ -773,7 +1010,11 @@ static void consensus(args_t *args)
             }
             apply_variant(args, rec);
         }
-        if ( !rec_ptr ) flush_fa_buffer(args, 60);
+        if ( !rec_ptr )
+        {
+            if ( args->absent_allele ) apply_absent(args, args->fa_ori_pos - args->fa_mod_off + args->fa_buf.l);
+            flush_fa_buffer(args, 60);
+        }
     }
     bcf1_t **rec_ptr = NULL;
     while ( args->rid>=0 && (rec_ptr = next_vcf_line(args)) )
@@ -789,6 +1030,7 @@ static void consensus(args_t *args)
         print_chain(args);
         destroy_chain(args);
     }
+    if ( args->absent_allele ) apply_absent(args, HTS_POS_MAX);
     flush_fa_buffer(args, 0);
     bgzf_close(fasta);
     free(str.s);
@@ -803,33 +1045,39 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "       --sample (and, optionally, --haplotype) option will apply genotype\n");
     fprintf(bcftools_stderr, "       (or haplotype) calls from FORMAT/GT. The program ignores allelic depth\n");
     fprintf(bcftools_stderr, "       information, such as INFO/AD or FORMAT/AD.\n");
-    fprintf(bcftools_stderr, "Usage:   bcftools consensus [OPTIONS] <file.vcf.gz>\n");
+    fprintf(bcftools_stderr, "Usage: bcftools consensus [OPTIONS] <file.vcf.gz>\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "    -c, --chain <file>         write a chain file for liftover\n");
-    fprintf(bcftools_stderr, "    -e, --exclude <expr>       exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "    -f, --fasta-ref <file>     reference sequence in fasta format\n");
-    fprintf(bcftools_stderr, "    -H, --haplotype <which>    choose which allele to use from the FORMAT/GT field, note\n");
-    fprintf(bcftools_stderr, "                               the codes are case-insensitive:\n");
-    fprintf(bcftools_stderr, "                                   1: first allele from GT, regardless of phasing\n");
-    fprintf(bcftools_stderr, "                                   2: second allele from GT, regardless of phasing\n");
-    fprintf(bcftools_stderr, "                                   R: REF allele in het genotypes\n");
-    fprintf(bcftools_stderr, "                                   A: ALT allele\n");
-    fprintf(bcftools_stderr, "                                   LR,LA: longer allele and REF/ALT if equal length\n");
-    fprintf(bcftools_stderr, "                                   SR,SA: shorter allele and REF/ALT if equal length\n");
-    fprintf(bcftools_stderr, "                                   1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
-    fprintf(bcftools_stderr, "    -i, --include <expr>       select sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "    -I, --iupac-codes          output variants in the form of IUPAC ambiguity codes\n");
-    fprintf(bcftools_stderr, "    -m, --mask <file>          replace regions with N\n");
-    fprintf(bcftools_stderr, "    -M, --missing <char>       output <char> instead of skipping the missing genotypes\n");
-    fprintf(bcftools_stderr, "    -o, --output <file>        write output to a file [standard output]\n");
-    fprintf(bcftools_stderr, "    -p, --prefix <string>      prefix to add to output sequence names\n");
-    fprintf(bcftools_stderr, "    -s, --sample <name>        apply variants of the given sample\n");
+    fprintf(bcftools_stderr, "    -c, --chain FILE               write a chain file for liftover\n");
+    fprintf(bcftools_stderr, "    -a, --absent CHAR              replace positions absent from VCF with CHAR\n");
+    fprintf(bcftools_stderr, "    -e, --exclude EXPR             exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "    -f, --fasta-ref FILE           reference sequence in fasta format\n");
+    fprintf(bcftools_stderr, "    -H, --haplotype WHICH          choose which allele to use from the FORMAT/GT field, note\n");
+    fprintf(bcftools_stderr, "                                   the codes are case-insensitive:\n");
+    fprintf(bcftools_stderr, "                                       1: first allele from GT, regardless of phasing\n");
+    fprintf(bcftools_stderr, "                                       2: second allele from GT, regardless of phasing\n");
+    fprintf(bcftools_stderr, "                                       R: REF allele in het genotypes\n");
+    fprintf(bcftools_stderr, "                                       A: ALT allele\n");
+    fprintf(bcftools_stderr, "                                       I: IUPAC code for all genotypes\n");
+    fprintf(bcftools_stderr, "                                       LR,LA: longer allele and REF/ALT if equal length\n");
+    fprintf(bcftools_stderr, "                                       SR,SA: shorter allele and REF/ALT if equal length\n");
+    fprintf(bcftools_stderr, "                                       1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
+    fprintf(bcftools_stderr, "    -i, --include EXPR             select sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "    -I, --iupac-codes              output variants in the form of IUPAC ambiguity codes\n");
+    fprintf(bcftools_stderr, "        --mark-del CHAR            instead of removing sequence, insert CHAR for deletions\n");
+    fprintf(bcftools_stderr, "        --mark-ins uc|lc           highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
+    fprintf(bcftools_stderr, "        --mark-snv uc|lc           highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
+    fprintf(bcftools_stderr, "    -m, --mask FILE                replace regions according to the next --mask-with option. The default is --mask-with N\n");
+    fprintf(bcftools_stderr, "        --mask-with CHAR|uc|lc     replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n");
+    fprintf(bcftools_stderr, "    -M, --missing CHAR             output CHAR instead of skipping a missing genotype \"./.\"\n");
+    fprintf(bcftools_stderr, "    -o, --output FILE              write output to a file [standard output]\n");
+    fprintf(bcftools_stderr, "    -p, --prefix STRING            prefix to add to output sequence names\n");
+    fprintf(bcftools_stderr, "    -s, --sample NAME              apply variants of the given sample\n");
     fprintf(bcftools_stderr, "Examples:\n");
     fprintf(bcftools_stderr, "   # Get the consensus for one region. The fasta header lines are then expected\n");
     fprintf(bcftools_stderr, "   # in the form \">chr:from-to\".\n");
     fprintf(bcftools_stderr, "   samtools faidx ref.fa 8:11870-11890 | bcftools consensus in.vcf.gz > out.fa\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_consensus(int argc, char *argv[])
@@ -839,6 +1087,10 @@ int main_consensus(int argc, char *argv[])
 
     static struct option loptions[] = 
     {
+        {"mark-del",required_argument,NULL,1},
+        {"mark-ins",required_argument,NULL,2},
+        {"mark-snv",required_argument,NULL,3},
+        {"mask-with",1,0,4},
         {"exclude",required_argument,NULL,'e'},
         {"include",required_argument,NULL,'i'},
         {"sample",1,0,'s'},
@@ -848,23 +1100,44 @@ int main_consensus(int argc, char *argv[])
         {"fasta-ref",1,0,'f'},
         {"mask",1,0,'m'},
         {"missing",1,0,'M'},
+        {"absent",1,0,'a'},
         {"chain",1,0,'c'},
         {"prefix",required_argument,0,'p'},
         {0,0,0,0}
     };
     int c;
-    while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:p:",loptions,NULL)) >= 0) 
+    while ((c = getopt_long(argc, argv, "h?s:1Ii:e:H:f:o:m:c:M:p:a:",loptions,NULL)) >= 0)
     {
         switch (c) 
         {
+            case  1 : args->mark_del = optarg[0]; break;
+            case  2 :
+                if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u';
+                else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l';
+                else error("The argument is not recognised: --mark-ins %s\n",optarg);
+                break;
+            case  3 :
+                if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u';
+                else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l';
+                else error("The argument is not recognised: --mark-snv %s\n",optarg);
+                break;
             case 'p': args->chr_prefix = optarg; break;
             case 's': args->sample = optarg; break;
             case 'o': args->output_fname = optarg; break;
             case 'I': args->output_iupac = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e': 
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i': 
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'f': args->ref_fname = optarg; break;
-            case 'm': args->mask_fname = optarg; break;
+            case 'm': add_mask(args,optarg); break;
+            case  4 : add_mask_with(args,optarg); break;
+            case 'a':
+                args->absent_allele = optarg[0];
+                if ( optarg[1]!=0 ) error("Expected single character with -a, got \"%s\"\n", optarg);
+                break;
             case 'M': 
                 args->missing_allele = optarg[0]; 
                 if ( optarg[1]!=0 ) error("Expected single character with -M, got \"%s\"\n", optarg);
@@ -879,6 +1152,7 @@ int main_consensus(int argc, char *argv[])
                 else if ( !strcasecmp(optarg,"LA") ) args->allele |= PICK_LONG|PICK_ALT;
                 else if ( !strcasecmp(optarg,"SR") ) args->allele |= PICK_SHORT|PICK_REF;
                 else if ( !strcasecmp(optarg,"SA") ) args->allele |= PICK_SHORT|PICK_ALT;
+                else if ( !strcasecmp(optarg,"I") ) args->allele |= PICK_IUPAC;
                 else if ( !strcasecmp(optarg,"1pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 1;
                 else if ( !strcasecmp(optarg,"2pIu") ) args->allele |= PICK_IUPAC, args->haplotype = 2;
                 else
diff --git a/bcftools/convert.c b/bcftools/convert.c
index fbf98e0..71dfb51 100644
--- a/bcftools/convert.c
+++ b/bcftools/convert.c
@@ -1,6 +1,6 @@
 /*  convert.c -- functions for converting between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -40,6 +41,7 @@ THE SOFTWARE.  */
 #include "bcftools.h"
 #include "variantkey.h"
 #include "convert.h"
+#include "filter.h"
 
 #define T_CHROM   1
 #define T_POS     2
@@ -73,6 +75,7 @@ THE SOFTWARE.  */
 #define T_RSX          30   // RSID HEX
 #define T_VKX          31   // VARIANTKEY HEX
 #define T_PBINOM       32
+#define T_NPASS        33
 
 typedef struct _fmt_t
 {
@@ -503,7 +506,7 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
                 type_t val = x[j]; \
                 if ( !val ) continue; \
                 for (i=0; i<nbits; i+=2) \
-                    if ( val & (mask<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
+                    if ( val & (mask<<i) ) { kputs(csq->str[(j*30+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
             } \
         } \
         if ( fmt->subscript<0 || fmt->subscript==2 ) \
@@ -513,7 +516,7 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
                 type_t val = x[j]; \
                 if ( !val ) continue; \
                 for (i=1; i<nbits; i+=2) \
-                    if ( val & (1<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
+                    if ( val & (1<<i) ) { kputs(csq->str[(j*30+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
             } \
         } \
     }
@@ -521,7 +524,7 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
     {
         case BCF_BT_INT8:  BRANCH(uint8_t, 8); break;
         case BCF_BT_INT16: BRANCH(uint16_t,16); break;
-        case BCF_BT_INT32: BRANCH(uint32_t,32); break;
+        case BCF_BT_INT32: BRANCH(uint32_t,30); break;  // 2 bytes unused to account for the reserved BCF values
         default: error("Unexpected type: %d\n", fmt->fmt->type); exit(1); break;
     }
     #undef BRANCH
@@ -782,8 +785,8 @@ static void process_gp_to_prob3(convert_t *convert, bcf1_t *line, fmt_t *fmt, in
         int j;
         for (j=0; j<n; j++)
         {
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-            if ( ptr[j]==bcf_int32_missing ) { ptr[j]=0; continue; }
+            if ( bcf_float_is_vector_end(ptr[j]) ) break;
+            if ( bcf_float_is_missing(ptr[j]) ) { ptr[j]=0; continue; }
             if ( ptr[j]<0 || ptr[j]>1 ) error("[%s:%"PRId64":%f] GP value outside range [0,1]; bcftools convert expects the VCF4.3+ spec for the GP field encoding genotype posterior probabilities", bcf_seqname(convert->header,line),(int64_t) line->pos+1,ptr[j]);
             sum+=ptr[j];
         }
@@ -1122,6 +1125,21 @@ static void process_variantkey_hex(convert_t *convert, bcf1_t *line, fmt_t *fmt,
     ksprintf(str, "%016" PRIx64 "", vk);
 }
 
+static void process_npass(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
+{
+    int i, nsmpl = 0;
+    filter_t *flt = (filter_t*) fmt->usr;
+    const uint8_t *smpl;
+    filter_test(flt,line,&smpl);
+    for (i=0; i<convert->nsamples; i++)
+        if ( smpl[i] ) nsmpl++;
+    kputd(nsmpl, str);
+}
+static void destroy_npass(void *usr)
+{
+    filter_destroy((filter_t*)usr);
+}
+
 static void process_pbinom(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     int i;
@@ -1225,11 +1243,17 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
             else if ( !strcmp("VKX",key) ) { fmt->type = T_VKX; }
             else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) ) { fmt->type = T_INFO; }
         }
-        if ( fmt->type==T_PBINOM )
+        else if ( fmt->type==T_PBINOM )
         {
             fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, fmt->key);
             if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT, fmt->id)  ) error("No such FORMAT tag defined in the header: %s\n", fmt->key);
         }
+        else if ( fmt->type==T_NPASS )
+        {
+            filter_t *flt = filter_init(convert->header,key);
+            convert->max_unpack |= filter_max_unpack(flt);
+            fmt->usr = (void*) flt;
+        }
     }
 
     switch (fmt->type)
@@ -1266,6 +1290,7 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_RSX: fmt->handler = &process_rsid_hex; break;
         case T_VKX: fmt->handler = &process_variantkey_hex; break;
         case T_PBINOM: fmt->handler = &process_pbinom; convert->max_unpack |= BCF_UN_FMT; break;
+        case T_NPASS: fmt->handler = &process_npass; fmt->destroy = &destroy_npass; break;
         default: error("TODO: handler for type %d\n", fmt->type);
     }
     if ( key && fmt->type==T_INFO )
@@ -1344,6 +1369,8 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
             register_tag(convert, T_PBINOM, str.s, is_gtf);
             q++;
         }
+        else if ( !strcmp(str.s,"N_PASS") )
+            error("N_PASS() must be placed outside the square brackets\n");
         else
         {
             fmt_t *fmt = register_tag(convert, T_FORMAT, str.s, is_gtf);
@@ -1380,7 +1407,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         else if ( !strcmp(str.s, "_GT_TO_HAP2") ) register_tag(convert, T_GT_TO_HAP2, str.s, is_gtf);
         else if ( !strcmp(str.s, "RSX") ) register_tag(convert, T_RSX, str.s, is_gtf);
         else if ( !strcmp(str.s, "VKX") ) register_tag(convert, T_VKX, str.s, is_gtf);
-        else if ( !strcmp(str.s,"pbinom") ) error("Error: pbinom() is currently supported only with FORMAT tags. (todo)\n");
+        else if ( !strcmp(str.s,"PBINOM") ) error("Error: PBINOM() is currently supported only with FORMAT tags. (todo)\n");
         else if ( !strcmp(str.s, "INFO") )
         {
             if ( *q=='/' )
@@ -1398,6 +1425,22 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         }
         else if ( !strcmp(str.s, "FORMAT") )
              register_tag(convert, T_FORMAT, NULL, 0);
+        else if ( !strcmp(str.s,"N_PASS") )
+        {
+            if ( *q!='(' ) error("Could not parse the expression: %s\n", convert->format_str);
+            p = ++q;
+            str.l = 0;
+            int nopen = 1;
+            while ( *q && nopen )
+            {
+                if ( *q=='(' ) nopen++;
+                else if ( *q==')' ) nopen--;
+                q++;
+            }
+            if ( q-p==0 || nopen ) error("Could not parse format string: %s\n", convert->format_str);
+            kputsn(p, q-p-1, &str);
+            register_tag(convert, T_NPASS, str.s, is_gtf);
+        }
         else
         {
             fmt_t *fmt = register_tag(convert, T_INFO, str.s, is_gtf);
@@ -1565,7 +1608,8 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
             for (js=0; js<convert->nsamples; js++)
             {
                 // Skip samples when filtering was requested
-                if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[js] ) continue;
+                int ks = convert->samples[js];
+                if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[ks] ) continue;
 
                 // Here comes a hack designed for TBCSQ. When running on large files,
                 // such as 1000GP, there are too many empty fields in the output and
@@ -1574,7 +1618,6 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
                 // brackets here. This may be changed in future, time will show...
                 size_t l_start = str->l;
             
-                int ks = convert->samples[js];
                 for (k=i; k<j; k++)
                 {
                     if ( convert->fmt[k].type == T_MASK )
diff --git a/bcftools/convert.c.pysam.c b/bcftools/convert.c.pysam.c
index 8f04911..e3c995f 100644
--- a/bcftools/convert.c.pysam.c
+++ b/bcftools/convert.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  convert.c -- functions for converting between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -42,6 +43,7 @@ THE SOFTWARE.  */
 #include "bcftools.h"
 #include "variantkey.h"
 #include "convert.h"
+#include "filter.h"
 
 #define T_CHROM   1
 #define T_POS     2
@@ -75,6 +77,7 @@ THE SOFTWARE.  */
 #define T_RSX          30   // RSID HEX
 #define T_VKX          31   // VARIANTKEY HEX
 #define T_PBINOM       32
+#define T_NPASS        33
 
 typedef struct _fmt_t
 {
@@ -270,7 +273,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT32: if ( info->v1.i==bcf_int32_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
             case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else kputd(info->v1.f, str); break;
             case BCF_BT_CHAR:  kputc(info->v1.i, str); break;
-            default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); exit(1); break;
+            default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); bcftools_exit(1); break;
         }
     }
     else if ( fmt->subscript >=0 )
@@ -292,7 +295,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
             case BCF_BT_INT32: BRANCH(int32_t, val==bcf_int32_missing, val==bcf_int32_vector_end, kputw(val, str)); break;
             case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(val), bcf_float_is_vector_end(val), kputd(val, str)); break;
             case BCF_BT_CHAR:  _copy_field((char*)info->vptr, info->vptr_len, fmt->subscript, str); break;
-            default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); exit(1); break;
+            default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); bcftools_exit(1); break;
         }
         #undef BRANCH
     }
@@ -505,7 +508,7 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
                 type_t val = x[j]; \
                 if ( !val ) continue; \
                 for (i=0; i<nbits; i+=2) \
-                    if ( val & (mask<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
+                    if ( val & (mask<<i) ) { kputs(csq->str[(j*30+i)/2], &csq->hap1); kputc_(',', &csq->hap1); } \
             } \
         } \
         if ( fmt->subscript<0 || fmt->subscript==2 ) \
@@ -515,7 +518,7 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
                 type_t val = x[j]; \
                 if ( !val ) continue; \
                 for (i=1; i<nbits; i+=2) \
-                    if ( val & (1<<i) ) { kputs(csq->str[(j*32+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
+                    if ( val & (1<<i) ) { kputs(csq->str[(j*30+i)/2], &csq->hap2); kputc_(',', &csq->hap2); } \
             } \
         } \
     }
@@ -523,8 +526,8 @@ static void process_tbcsq(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isam
     {
         case BCF_BT_INT8:  BRANCH(uint8_t, 8); break;
         case BCF_BT_INT16: BRANCH(uint16_t,16); break;
-        case BCF_BT_INT32: BRANCH(uint32_t,32); break;
-        default: error("Unexpected type: %d\n", fmt->fmt->type); exit(1); break;
+        case BCF_BT_INT32: BRANCH(uint32_t,30); break;  // 2 bytes unused to account for the reserved BCF values
+        default: error("Unexpected type: %d\n", fmt->fmt->type); bcftools_exit(1); break;
     }
     #undef BRANCH
 
@@ -784,8 +787,8 @@ static void process_gp_to_prob3(convert_t *convert, bcf1_t *line, fmt_t *fmt, in
         int j;
         for (j=0; j<n; j++)
         {
-            if ( ptr[j]==bcf_int32_vector_end ) break;
-            if ( ptr[j]==bcf_int32_missing ) { ptr[j]=0; continue; }
+            if ( bcf_float_is_vector_end(ptr[j]) ) break;
+            if ( bcf_float_is_missing(ptr[j]) ) { ptr[j]=0; continue; }
             if ( ptr[j]<0 || ptr[j]>1 ) error("[%s:%"PRId64":%f] GP value outside range [0,1]; bcftools convert expects the VCF4.3+ spec for the GP field encoding genotype posterior probabilities", bcf_seqname(convert->header,line),(int64_t) line->pos+1,ptr[j]);
             sum+=ptr[j];
         }
@@ -1124,6 +1127,21 @@ static void process_variantkey_hex(convert_t *convert, bcf1_t *line, fmt_t *fmt,
     ksprintf(str, "%016" PRIx64 "", vk);
 }
 
+static void process_npass(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
+{
+    int i, nsmpl = 0;
+    filter_t *flt = (filter_t*) fmt->usr;
+    const uint8_t *smpl;
+    filter_test(flt,line,&smpl);
+    for (i=0; i<convert->nsamples; i++)
+        if ( smpl[i] ) nsmpl++;
+    kputd(nsmpl, str);
+}
+static void destroy_npass(void *usr)
+{
+    filter_destroy((filter_t*)usr);
+}
+
 static void process_pbinom(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isample, kstring_t *str)
 {
     int i;
@@ -1227,11 +1245,17 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
             else if ( !strcmp("VKX",key) ) { fmt->type = T_VKX; }
             else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) ) { fmt->type = T_INFO; }
         }
-        if ( fmt->type==T_PBINOM )
+        else if ( fmt->type==T_PBINOM )
         {
             fmt->id = bcf_hdr_id2int(convert->header, BCF_DT_ID, fmt->key);
             if ( !bcf_hdr_idinfo_exists(convert->header,BCF_HL_FMT, fmt->id)  ) error("No such FORMAT tag defined in the header: %s\n", fmt->key);
         }
+        else if ( fmt->type==T_NPASS )
+        {
+            filter_t *flt = filter_init(convert->header,key);
+            convert->max_unpack |= filter_max_unpack(flt);
+            fmt->usr = (void*) flt;
+        }
     }
 
     switch (fmt->type)
@@ -1268,6 +1292,7 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
         case T_RSX: fmt->handler = &process_rsid_hex; break;
         case T_VKX: fmt->handler = &process_variantkey_hex; break;
         case T_PBINOM: fmt->handler = &process_pbinom; convert->max_unpack |= BCF_UN_FMT; break;
+        case T_NPASS: fmt->handler = &process_npass; fmt->destroy = &destroy_npass; break;
         default: error("TODO: handler for type %d\n", fmt->type);
     }
     if ( key && fmt->type==T_INFO )
@@ -1346,6 +1371,8 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
             register_tag(convert, T_PBINOM, str.s, is_gtf);
             q++;
         }
+        else if ( !strcmp(str.s,"N_PASS") )
+            error("N_PASS() must be placed outside the square brackets\n");
         else
         {
             fmt_t *fmt = register_tag(convert, T_FORMAT, str.s, is_gtf);
@@ -1382,7 +1409,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         else if ( !strcmp(str.s, "_GT_TO_HAP2") ) register_tag(convert, T_GT_TO_HAP2, str.s, is_gtf);
         else if ( !strcmp(str.s, "RSX") ) register_tag(convert, T_RSX, str.s, is_gtf);
         else if ( !strcmp(str.s, "VKX") ) register_tag(convert, T_VKX, str.s, is_gtf);
-        else if ( !strcmp(str.s,"pbinom") ) error("Error: pbinom() is currently supported only with FORMAT tags. (todo)\n");
+        else if ( !strcmp(str.s,"PBINOM") ) error("Error: PBINOM() is currently supported only with FORMAT tags. (todo)\n");
         else if ( !strcmp(str.s, "INFO") )
         {
             if ( *q=='/' )
@@ -1400,6 +1427,22 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf)
         }
         else if ( !strcmp(str.s, "FORMAT") )
              register_tag(convert, T_FORMAT, NULL, 0);
+        else if ( !strcmp(str.s,"N_PASS") )
+        {
+            if ( *q!='(' ) error("Could not parse the expression: %s\n", convert->format_str);
+            p = ++q;
+            str.l = 0;
+            int nopen = 1;
+            while ( *q && nopen )
+            {
+                if ( *q=='(' ) nopen++;
+                else if ( *q==')' ) nopen--;
+                q++;
+            }
+            if ( q-p==0 || nopen ) error("Could not parse format string: %s\n", convert->format_str);
+            kputsn(p, q-p-1, &str);
+            register_tag(convert, T_NPASS, str.s, is_gtf);
+        }
         else
         {
             fmt_t *fmt = register_tag(convert, T_INFO, str.s, is_gtf);
@@ -1567,7 +1610,8 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
             for (js=0; js<convert->nsamples; js++)
             {
                 // Skip samples when filtering was requested
-                if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[js] ) continue;
+                int ks = convert->samples[js];
+                if ( convert->subset_samples && *convert->subset_samples && !(*convert->subset_samples)[ks] ) continue;
 
                 // Here comes a hack designed for TBCSQ. When running on large files,
                 // such as 1000GP, there are too many empty fields in the output and
@@ -1576,7 +1620,6 @@ int convert_line(convert_t *convert, bcf1_t *line, kstring_t *str)
                 // brackets here. This may be changed in future, time will show...
                 size_t l_start = str->l;
             
-                int ks = convert->samples[js];
                 for (k=i; k<j; k++)
                 {
                     if ( convert->fmt[k].type == T_MASK )
diff --git a/bcftools/csq.c b/bcftools/csq.c
index c9a0132..8e3ee3b 100644
--- a/bcftools/csq.c
+++ b/bcftools/csq.c
@@ -1,9 +1,6 @@
-//$bt csq -f $ref -g $gff -p r -Ou -o /dev/null /lustre/scratch116/vr/projects/g1k/phase3/release/ALL.chr4.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
-
-
 /* The MIT License
 
-   Copyright (c) 2016-2018 Genome Research Ltd.
+   Copyright (c) 2016-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -136,6 +133,7 @@
  
 #include <stdio.h>
 #include <stdlib.h>
+#include <assert.h>
 #include <getopt.h>
 #include <math.h>
 #include <inttypes.h>
@@ -592,8 +590,8 @@ typedef struct _args_t
     char *bcsq_tag;
     int argc, output_type;
     int phase, verbosity, local_csq, record_cmd_line;
-    int ncsq_max, nfmt_bcsq;    // maximum number of csq per site that can be accessed from FORMAT/BCSQ
-    int ncsq_small_warned;
+    int ncsq2_max, nfmt_bcsq;   // maximum number of csq per site that can be accessed from FORMAT/BCSQ (*2 and 1 bit skipped to avoid BCF missing values)
+    int ncsq2_small_warned;
     int brief_predictions;
     
     int rid;                    // current chromosome
@@ -680,11 +678,42 @@ static inline int feature_set_seq(args_t *args, char *chr_beg, char *chr_end)
     int iseq;
     if ( khash_str2int_get(aux->seq2int, chr_beg, &iseq)!=0 )
     {
-        hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
-        aux->seq[aux->nseq] = strdup(chr_beg);
-        iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
-        aux->nseq++;
-        assert( aux->nseq < 1<<29 );  // see gf_gene_t.iseq and ftr_t.iseq
+        // check for possible mismatch in chromosome naming convention such as chrX vs X
+        char *new_chr = NULL;
+        if ( faidx_has_seq(args->fai,chr_beg) )
+            new_chr = strdup(chr_beg);                  // valid chr name, the same in gff and faidx
+        else
+        {
+            int len = strlen(chr_beg);
+            if ( !strncmp("chr",chr_beg,3) && len>3 )
+                new_chr = strdup(chr_beg+3);            // gff has the prefix, faidx does not
+            else
+            {
+                new_chr = malloc(len+3);                // gff does not have the prefix, faidx has
+                memcpy(new_chr,"chr",3);
+                memcpy(new_chr+3,chr_beg,len);
+                new_chr[len+3] = 0;
+            }
+            if ( !faidx_has_seq(args->fai,new_chr) )    // modification did not help, this sequence is not in fai
+            {
+                static int unkwn_chr_warned = 0;
+                if ( !unkwn_chr_warned && args->verbosity>0 )
+                    fprintf(stderr,"Warning: GFF chromosome \"%s\" not part of the reference genome\n",chr_beg);
+                unkwn_chr_warned = 1;
+                free(new_chr);
+                new_chr = strdup(chr_beg);              // use the original sequence name
+            }
+        }
+        if ( khash_str2int_get(aux->seq2int, new_chr, &iseq)!=0 )
+        {
+            hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
+            aux->seq[aux->nseq] = new_chr;
+            iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
+            aux->nseq++;
+            assert( aux->nseq < 1<<29 );  // see gf_gene_t.iseq and ftr_t.iseq
+        }
+        else
+            free(new_chr);
     }
     chr_end[1] = c;
     return iseq;
@@ -1140,7 +1169,8 @@ void tscript_init_cds(args_t *args)
                         tscript_ok = 0;
                         break;
                     }
-                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
+                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n",
+                        args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
                 }
                 len += tr->cds[i]->len; 
             }
@@ -1178,7 +1208,8 @@ void tscript_init_cds(args_t *args)
                         tscript_ok = 0;
                         break;
                     }
-                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
+                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n",
+                        args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
                 }
                 len += tr->cds[i]->len;
             }
@@ -1196,8 +1227,17 @@ void tscript_init_cds(args_t *args)
             gf_cds_t *a = tr->cds[i-1];
             gf_cds_t *b = tr->cds[i];
             if ( a->beg + a->len - 1 >= b->beg ) 
-                error("Error: CDS overlap in the transcript %"PRIu32": %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32"\n", 
-                    kh_key(aux->id2tr, k), a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+            {
+                if ( args->force )
+                {
+                    fprintf(stderr,"Warning: GFF contains overlapping CDS %s: %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32".\n",
+                        args->tscript_ids.str[tr->id], a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+                }
+                else
+                    error("Error: CDS overlap in the transcript %s: %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32", is this intended (e.g. ribosomal slippage)?\n"
+                          "       Use the --force option to override (at your own risk).\n", 
+                            args->tscript_ids.str[tr->id], a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+            }
         }
         if ( len%3 != 0 )
         {
@@ -1337,9 +1377,22 @@ void init_gff(args_t *args)
     khash_str2int_destroy_free(aux->ignored_biotypes);
 }
 
+static inline int ncsq2_to_nfmt(int ncsq2)
+{
+    return 1 + (ncsq2 - 1) / 30;
+}
+static inline void icsq2_to_bit(int icsq2, int *ival, int *ibit)
+{
+    *ival = icsq2 / 30;
+    *ibit = icsq2 % 30;
+}
+
 void init_data(args_t *args)
 {
-    args->nfmt_bcsq = 1 + (args->ncsq_max - 1) / 32; 
+    args->nfmt_bcsq = ncsq2_to_nfmt(args->ncsq2_max);
+
+    args->fai = fai_load(args->fa_fname);
+    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
 
     if ( args->verbosity > 0 ) fprintf(stderr,"Parsing %s ...\n", args->gff_fname);
     init_gff(args);
@@ -1349,9 +1402,6 @@ void init_data(args_t *args)
     if ( args->filter_str )
         args->filter = filter_init(args->hdr, args->filter_str);
 
-    args->fai = fai_load(args->fa_fname);
-    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
-
     args->pos2vbuf  = kh_init(pos2vbuf);
     args->active_tr = khp_init(trhp);
     args->hap = (hap_t*) calloc(1,sizeof(hap_t));
@@ -1395,7 +1445,7 @@ void init_data(args_t *args)
     }
     else
     {
-        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("[%s] Error: cannot write to %s: %s\n", __func__,args->output_fname? args->output_fname : "standard output", strerror(errno));
         if ( args->n_threads > 0)
             hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->sr->p);
@@ -1410,6 +1460,11 @@ void init_data(args_t *args)
 
 void destroy_data(args_t *args)
 {
+    if ( args->ncsq2_small_warned )
+        fprintf(stderr,
+            "Note: Some samples had too many consequences to be represented in %d bytes. If you need to record them all,\n"
+            "      the limit can be increased by running with `--ncsq %d`.\n",ncsq2_to_nfmt(args->ncsq2_max)/8,1+args->ncsq2_small_warned/2);
+
     regidx_destroy(args->idx_cds);
     regidx_destroy(args->idx_utr);
     regidx_destroy(args->idx_exon);
@@ -2683,13 +2738,13 @@ void kput_vcsq(args_t *args, vcsq_t *csq, kstring_t *str)
 
 void kprint_aa_prediction(args_t *args, int beg, kstring_t *aa, kstring_t *str)
 {
-    if ( !args->brief_predictions )
+    if ( !args->brief_predictions || (int)aa->l - args->brief_predictions < 3 )
         kputs(aa->s, str);
     else
     {
-        int len = aa->l;
+        int i, len = aa->l;
         if ( aa->s[len-1]=='*' ) len--;
-        kputc(aa->s[0], str);
+        for (i=0; i<len && i<args->brief_predictions; i++) kputc(aa->s[i], str);
         kputs("..", str);
         kputw(beg+len, str);
     }
@@ -3083,22 +3138,24 @@ static inline void hap_stage_vcf(args_t *args, tscript_t *tr, int ismpl, int iha
     {
         csq_t *csq = node->csq_list + i;
         vrec_t *vrec = csq->vrec;
-        int icsq = 2*csq->idx + ihap;
-        if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+        int icsq2 = 2*csq->idx + ihap;
+        if ( icsq2 >= args->ncsq2_max ) // more than ncsq2_max consequences, so can't fit it in FMT
         {
-            if ( args->verbosity && (!args->ncsq_small_warned || args->verbosity > 1) )
+            if ( args->verbosity && (!args->ncsq2_small_warned || args->verbosity > 1) )
             {
                 fprintf(stderr,
                     "Warning: Too many consequences for sample %s at %s:%"PRId64", keeping the first %d and skipping the rest.\n",
                     args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,csq->idx);
-                if ( !args->ncsq_small_warned )
+                if ( !args->ncsq2_small_warned )
                     fprintf(stderr,"         The limit can be increased by setting the --ncsq parameter. This warning is printed only once.\n");
-                args->ncsq_small_warned = 1;
             }
+            if ( args->ncsq2_small_warned < icsq2 ) args->ncsq2_small_warned = icsq2;
             break;
         }
-        if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
-        vrec->smpl[ismpl*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+        int ival, ibit;
+        icsq2_to_bit(icsq2, &ival,&ibit);
+        if ( vrec->nfmt < 1 + ival ) vrec->nfmt = 1 + ival;
+        vrec->smpl[ismpl*args->nfmt_bcsq + ival] |= 1 << ibit;
     }
 }
 
@@ -3727,22 +3784,26 @@ void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec)
         {
             if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
 
-            int icsq = 2*csq->idx + j;
-            if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+            int icsq2 = 2*csq->idx + j;
+            if ( icsq2 >= args->ncsq2_max ) // more than ncsq_max consequences, so can't fit it in FMT
             {
                 int ismpl = args->smpl->idx[i];
-                if ( args->verbosity && (!args->ncsq_small_warned || args->verbosity > 1) )
+                if ( args->verbosity && (!args->ncsq2_small_warned || args->verbosity > 1) )
                 {
                     fprintf(stderr,
                             "Warning: Too many consequences for sample %s at %s:%"PRId64", keeping the first %d and skipping the rest.\n",
-                            args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,icsq+1);
-                    if ( !args->ncsq_small_warned )
+                            args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,icsq2+1);
+                    if ( !args->ncsq2_small_warned )
                         fprintf(stderr,"         The limit can be increased by setting the --ncsq parameter. This warning is printed only once.\n");
-                    args->ncsq_small_warned = 1;
+                    args->ncsq2_small_warned = 1;
                 }
+                if ( args->ncsq2_small_warned < icsq2 ) args->ncsq2_small_warned = icsq2;
+                break;
             }
-            if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
-            vrec->smpl[i*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+            int ival, ibit;
+            icsq2_to_bit(icsq2, &ival,&ibit);
+            if ( vrec->nfmt < 1 + ival ) vrec->nfmt = 1 + ival;
+            vrec->smpl[i*args->nfmt_bcsq + ival] |= 1 << ibit;
         }
     }
 }
@@ -4041,39 +4102,39 @@ static const char *usage(void)
     return 
         "\n"
         "About: Haplotype-aware consequence caller.\n"
-        "Usage: bcftools csq [options] in.vcf\n"
+        "Usage: bcftools csq [OPTIONS] in.vcf\n"
         "\n"
         "Required options:\n"
-        "   -f, --fasta-ref <file>          reference file in fasta format\n"
-        "   -g, --gff-annot <file>          gff3 annotation file\n"
+        "   -f, --fasta-ref FILE            reference file in fasta format\n"
+        "   -g, --gff-annot FILE            gff3 annotation file\n"
         "\n"
         "CSQ options:\n"
-        "   -b, --brief-predictions         annotate with abbreviated protein-changing predictions\n"
-        "   -c, --custom-tag <string>       use this tag instead of the default BCSQ\n"
+        "   -B, --trim-protein-seq INT      abbreviate protein-changing predictions to max INT aminoacids\n" 
+        "   -c, --custom-tag STRING         use this tag instead of the default BCSQ\n"
         "   -l, --local-csq                 localized predictions, consider only one VCF record at a time\n"
-        "   -n, --ncsq <int>                maximum number of consequences to consider per site [16]\n"
-        "   -p, --phase <a|m|r|R|s>         how to handle unphased heterozygous genotypes: [r]\n"
+        "   -n, --ncsq INT                  maximum number of per-haplotype consequences to consider for each site [15]\n"
+        "   -p, --phase a|m|r|R|s           how to handle unphased heterozygous genotypes: [r]\n"
         "                                     a: take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)\n"
         "                                     m: merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)\n"
         "                                     r: require phased GTs, throw an error on unphased het GTs\n"
         "                                     R: create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)\n"
         "                                     s: skip unphased hets\n"
         "Options:\n"
-        "   -e, --exclude <expr>            exclude sites for which the expression is true\n"
+        "   -e, --exclude EXPR              exclude sites for which the expression is true\n"
         "       --force                     run even if some sanity checks fail\n"
-        "   -i, --include <expr>            select sites for which the expression is true\n"
+        "   -i, --include EXPR              select sites for which the expression is true\n"
         "       --no-version                do not append version and command line to the header\n"
-        "   -o, --output <file>             write output to a file [standard output]\n"
-        "   -O, --output-type <b|u|z|v|t>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
+        "   -o, --output FILE               write output to a file [standard output]\n"
+        "   -O, --output-type b|u|z|v|t     b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
         "                                   v: uncompressed VCF, t: plain tab-delimited text output [v]\n"
-        "   -r, --regions <region>          restrict to comma-separated list of regions\n"
-        "   -R, --regions-file <file>       restrict to regions listed in a file\n"
-        "   -s, --samples <-|list>          samples to include or \"-\" to apply all variants and ignore samples\n"
-        "   -S, --samples-file <file>       samples to include\n"
-        "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n"
-        "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n"
-        "       --threads <int>             use multithreading with <int> worker threads [0]\n"
-        "   -v, --verbose <int>             verbosity level 0-2 [1]\n"
+        "   -r, --regions REGION            restrict to comma-separated list of regions\n"
+        "   -R, --regions-file FILE         restrict to regions listed in a file\n"
+        "   -s, --samples -|LIST            samples to include or \"-\" to apply all variants and ignore samples\n"
+        "   -S, --samples-file FILE         samples to include\n"
+        "   -t, --targets REGION            similar to -r but streams rather than index-jumps\n"
+        "   -T, --targets-file FILE         similar to -R but streams rather than index-jumps\n"
+        "       --threads INT               use multithreading with <int> worker threads [0]\n"
+        "   -v, --verbose INT               verbosity level 0-2 [1]\n"
         "\n"
         "Example:\n"
         "   bcftools csq -f hs37d5.fa -g Homo_sapiens.GRCh37.82.gff3.gz in.vcf\n"
@@ -4090,7 +4151,7 @@ int main_csq(int argc, char *argv[])
     args->argc = argc; args->argv = argv;
     args->output_type = FT_VCF;
     args->bcsq_tag = "BCSQ";
-    args->ncsq_max = 2*16;
+    args->ncsq2_max = 2*(16-1);      // 1 bit is reserved for BCF missing values
     args->verbosity = 1;
     args->record_cmd_line = 1;
 
@@ -4100,7 +4161,8 @@ int main_csq(int argc, char *argv[])
         {"threads",required_argument,NULL,2},
         {"help",0,0,'h'},
         {"ncsq",1,0,'n'},
-        {"brief-predictions",0,0,'b'},
+        {"brief-predictions",no_argument,0,'b'},
+        {"trim-protein-seq",required_argument,0,'B'},
         {"custom-tag",1,0,'c'},
         {"local-csq",0,0,'l'},
         {"gff-annot",1,0,'g'},
@@ -4123,7 +4185,7 @@ int main_csq(int argc, char *argv[])
     };
     int c, targets_is_file = 0, regions_is_file = 0; 
     char *targets_list = NULL, *regions_list = NULL, *tmp;
-    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:bv:",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:bB:v:",loptions,NULL)) >= 0)
     {
         switch (c) 
         {
@@ -4133,7 +4195,14 @@ int main_csq(int argc, char *argv[])
                 if ( *tmp ) error("Could not parse argument: --threads  %s\n", optarg);
                 break;
             case  3 : args->record_cmd_line = 0; break;
-            case 'b': args->brief_predictions = 1; break;
+            case 'b':
+                    args->brief_predictions = 1;
+                    fprintf(stderr,"Warning: the -b option will be removed in future versions. Please use -B 1 instead.\n");
+                    break;
+            case 'B': 
+                    args->brief_predictions = strtol(optarg,&tmp,10);
+                    if ( *tmp || args->brief_predictions<1 ) error("Could not parse argument: --trim-protein-seq %s\n", optarg);
+                    break;
             case 'l': args->local_csq = 1; break;
             case 'c': args->bcsq_tag = optarg; break;
             case 'q': error("Error: the -q option has been deprecated, use -v, --verbose instead.\n"); break;
@@ -4155,8 +4224,8 @@ int main_csq(int argc, char *argv[])
             case 'f': args->fa_fname = optarg; break;
             case 'g': args->gff_fname = optarg; break;
             case 'n': 
-                args->ncsq_max = 2 * atoi(optarg);
-                if ( args->ncsq_max <=0 ) error("Expected positive integer with -n, got %s\n", optarg);
+                args->ncsq2_max = 2 * atoi(optarg);
+                if ( args->ncsq2_max <= 0 ) error("Expected positive integer with -n, got %s\n", optarg);
                 break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
@@ -4169,8 +4238,12 @@ int main_csq(int argc, char *argv[])
                           default: error("The output type \"%s\" not recognised\n", optarg);
                       }
                       break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': regions_list = optarg; break;
             case 'R': regions_list = optarg; regions_is_file = 1; break;
             case 's': args->sample_list = optarg; break;
diff --git a/bcftools/csq.c.pysam.c b/bcftools/csq.c.pysam.c
index e0c3001..e7f6a70 100644
--- a/bcftools/csq.c.pysam.c
+++ b/bcftools/csq.c.pysam.c
@@ -1,11 +1,8 @@
 #include "bcftools.pysam.h"
 
-//$bt csq -f $ref -g $gff -p r -Ou -o /dev/null /lustre/scratch116/vr/projects/g1k/phase3/release/ALL.chr4.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz
-
-
 /* The MIT License
 
-   Copyright (c) 2016-2018 Genome Research Ltd.
+   Copyright (c) 2016-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -138,6 +135,7 @@
  
 #include <stdio.h>
 #include <stdlib.h>
+#include <assert.h>
 #include <getopt.h>
 #include <math.h>
 #include <inttypes.h>
@@ -594,8 +592,8 @@ typedef struct _args_t
     char *bcsq_tag;
     int argc, output_type;
     int phase, verbosity, local_csq, record_cmd_line;
-    int ncsq_max, nfmt_bcsq;    // maximum number of csq per site that can be accessed from FORMAT/BCSQ
-    int ncsq_small_warned;
+    int ncsq2_max, nfmt_bcsq;   // maximum number of csq per site that can be accessed from FORMAT/BCSQ (*2 and 1 bit skipped to avoid BCF missing values)
+    int ncsq2_small_warned;
     int brief_predictions;
     
     int rid;                    // current chromosome
@@ -682,11 +680,42 @@ static inline int feature_set_seq(args_t *args, char *chr_beg, char *chr_end)
     int iseq;
     if ( khash_str2int_get(aux->seq2int, chr_beg, &iseq)!=0 )
     {
-        hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
-        aux->seq[aux->nseq] = strdup(chr_beg);
-        iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
-        aux->nseq++;
-        assert( aux->nseq < 1<<29 );  // see gf_gene_t.iseq and ftr_t.iseq
+        // check for possible mismatch in chromosome naming convention such as chrX vs X
+        char *new_chr = NULL;
+        if ( faidx_has_seq(args->fai,chr_beg) )
+            new_chr = strdup(chr_beg);                  // valid chr name, the same in gff and faidx
+        else
+        {
+            int len = strlen(chr_beg);
+            if ( !strncmp("chr",chr_beg,3) && len>3 )
+                new_chr = strdup(chr_beg+3);            // gff has the prefix, faidx does not
+            else
+            {
+                new_chr = malloc(len+3);                // gff does not have the prefix, faidx has
+                memcpy(new_chr,"chr",3);
+                memcpy(new_chr+3,chr_beg,len);
+                new_chr[len+3] = 0;
+            }
+            if ( !faidx_has_seq(args->fai,new_chr) )    // modification did not help, this sequence is not in fai
+            {
+                static int unkwn_chr_warned = 0;
+                if ( !unkwn_chr_warned && args->verbosity>0 )
+                    fprintf(bcftools_stderr,"Warning: GFF chromosome \"%s\" not part of the reference genome\n",chr_beg);
+                unkwn_chr_warned = 1;
+                free(new_chr);
+                new_chr = strdup(chr_beg);              // use the original sequence name
+            }
+        }
+        if ( khash_str2int_get(aux->seq2int, new_chr, &iseq)!=0 )
+        {
+            hts_expand(char*, aux->nseq+1, aux->mseq, aux->seq);
+            aux->seq[aux->nseq] = new_chr;
+            iseq = khash_str2int_inc(aux->seq2int, aux->seq[aux->nseq]);
+            aux->nseq++;
+            assert( aux->nseq < 1<<29 );  // see gf_gene_t.iseq and ftr_t.iseq
+        }
+        else
+            free(new_chr);
     }
     chr_end[1] = c;
     return iseq;
@@ -1142,7 +1171,8 @@ void tscript_init_cds(args_t *args)
                         tscript_ok = 0;
                         break;
                     }
-                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
+                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n",
+                        args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
                 }
                 len += tr->cds[i]->len; 
             }
@@ -1180,7 +1210,8 @@ void tscript_init_cds(args_t *args)
                         tscript_ok = 0;
                         break;
                     }
-                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d)\n",args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
+                    error("Error: GFF3 assumption failed for transcript %s, CDS=%d: phase!=len%%3 (phase=%d, len=%d). Use the --force option to proceed anyway (at your own risk).\n",
+                        args->tscript_ids.str[tr->id],tr->cds[i]->beg+1,phase,len);
                 }
                 len += tr->cds[i]->len;
             }
@@ -1198,8 +1229,17 @@ void tscript_init_cds(args_t *args)
             gf_cds_t *a = tr->cds[i-1];
             gf_cds_t *b = tr->cds[i];
             if ( a->beg + a->len - 1 >= b->beg ) 
-                error("Error: CDS overlap in the transcript %"PRIu32": %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32"\n", 
-                    kh_key(aux->id2tr, k), a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+            {
+                if ( args->force )
+                {
+                    fprintf(bcftools_stderr,"Warning: GFF contains overlapping CDS %s: %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32".\n",
+                        args->tscript_ids.str[tr->id], a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+                }
+                else
+                    error("Error: CDS overlap in the transcript %s: %"PRIu32"-%"PRIu32" and %"PRIu32"-%"PRIu32", is this intended (e.g. ribosomal slippage)?\n"
+                          "       Use the --force option to override (at your own risk).\n", 
+                            args->tscript_ids.str[tr->id], a->beg+1,a->beg+a->len, b->beg+1,b->beg+b->len);
+            }
         }
         if ( len%3 != 0 )
         {
@@ -1339,9 +1379,22 @@ void init_gff(args_t *args)
     khash_str2int_destroy_free(aux->ignored_biotypes);
 }
 
+static inline int ncsq2_to_nfmt(int ncsq2)
+{
+    return 1 + (ncsq2 - 1) / 30;
+}
+static inline void icsq2_to_bit(int icsq2, int *ival, int *ibit)
+{
+    *ival = icsq2 / 30;
+    *ibit = icsq2 % 30;
+}
+
 void init_data(args_t *args)
 {
-    args->nfmt_bcsq = 1 + (args->ncsq_max - 1) / 32; 
+    args->nfmt_bcsq = ncsq2_to_nfmt(args->ncsq2_max);
+
+    args->fai = fai_load(args->fa_fname);
+    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
 
     if ( args->verbosity > 0 ) fprintf(bcftools_stderr,"Parsing %s ...\n", args->gff_fname);
     init_gff(args);
@@ -1351,9 +1404,6 @@ void init_data(args_t *args)
     if ( args->filter_str )
         args->filter = filter_init(args->hdr, args->filter_str);
 
-    args->fai = fai_load(args->fa_fname);
-    if ( !args->fai ) error("Failed to load the fai index: %s\n", args->fa_fname);
-
     args->pos2vbuf  = kh_init(pos2vbuf);
     args->active_tr = khp_init(trhp);
     args->hap = (hap_t*) calloc(1,sizeof(hap_t));
@@ -1397,7 +1447,7 @@ void init_data(args_t *args)
     }
     else
     {
-        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("[%s] Error: cannot write to %s: %s\n", __func__,args->output_fname? args->output_fname : "standard output", strerror(errno));
         if ( args->n_threads > 0)
             hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->sr->p);
@@ -1412,6 +1462,11 @@ void init_data(args_t *args)
 
 void destroy_data(args_t *args)
 {
+    if ( args->ncsq2_small_warned )
+        fprintf(bcftools_stderr,
+            "Note: Some samples had too many consequences to be represented in %d bytes. If you need to record them all,\n"
+            "      the limit can be increased by running with `--ncsq %d`.\n",ncsq2_to_nfmt(args->ncsq2_max)/8,1+args->ncsq2_small_warned/2);
+
     regidx_destroy(args->idx_cds);
     regidx_destroy(args->idx_utr);
     regidx_destroy(args->idx_exon);
@@ -2685,13 +2740,13 @@ void kput_vcsq(args_t *args, vcsq_t *csq, kstring_t *str)
 
 void kprint_aa_prediction(args_t *args, int beg, kstring_t *aa, kstring_t *str)
 {
-    if ( !args->brief_predictions )
+    if ( !args->brief_predictions || (int)aa->l - args->brief_predictions < 3 )
         kputs(aa->s, str);
     else
     {
-        int len = aa->l;
+        int i, len = aa->l;
         if ( aa->s[len-1]=='*' ) len--;
-        kputc(aa->s[0], str);
+        for (i=0; i<len && i<args->brief_predictions; i++) kputc(aa->s[i], str);
         kputs("..", str);
         kputw(beg+len, str);
     }
@@ -3085,22 +3140,24 @@ static inline void hap_stage_vcf(args_t *args, tscript_t *tr, int ismpl, int iha
     {
         csq_t *csq = node->csq_list + i;
         vrec_t *vrec = csq->vrec;
-        int icsq = 2*csq->idx + ihap;
-        if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+        int icsq2 = 2*csq->idx + ihap;
+        if ( icsq2 >= args->ncsq2_max ) // more than ncsq2_max consequences, so can't fit it in FMT
         {
-            if ( args->verbosity && (!args->ncsq_small_warned || args->verbosity > 1) )
+            if ( args->verbosity && (!args->ncsq2_small_warned || args->verbosity > 1) )
             {
                 fprintf(bcftools_stderr,
                     "Warning: Too many consequences for sample %s at %s:%"PRId64", keeping the first %d and skipping the rest.\n",
                     args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,csq->idx);
-                if ( !args->ncsq_small_warned )
+                if ( !args->ncsq2_small_warned )
                     fprintf(bcftools_stderr,"         The limit can be increased by setting the --ncsq parameter. This warning is printed only once.\n");
-                args->ncsq_small_warned = 1;
             }
+            if ( args->ncsq2_small_warned < icsq2 ) args->ncsq2_small_warned = icsq2;
             break;
         }
-        if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
-        vrec->smpl[ismpl*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+        int ival, ibit;
+        icsq2_to_bit(icsq2, &ival,&ibit);
+        if ( vrec->nfmt < 1 + ival ) vrec->nfmt = 1 + ival;
+        vrec->smpl[ismpl*args->nfmt_bcsq + ival] |= 1 << ibit;
     }
 }
 
@@ -3729,22 +3786,26 @@ void csq_stage(args_t *args, csq_t *csq, bcf1_t *rec)
         {
             if ( gt[j]==bcf_gt_missing || gt[j]==bcf_int32_vector_end || !bcf_gt_allele(gt[j]) ) continue;
 
-            int icsq = 2*csq->idx + j;
-            if ( icsq >= args->ncsq_max ) // more than ncsq_max consequences, so can't fit it in FMT
+            int icsq2 = 2*csq->idx + j;
+            if ( icsq2 >= args->ncsq2_max ) // more than ncsq_max consequences, so can't fit it in FMT
             {
                 int ismpl = args->smpl->idx[i];
-                if ( args->verbosity && (!args->ncsq_small_warned || args->verbosity > 1) )
+                if ( args->verbosity && (!args->ncsq2_small_warned || args->verbosity > 1) )
                 {
                     fprintf(bcftools_stderr,
                             "Warning: Too many consequences for sample %s at %s:%"PRId64", keeping the first %d and skipping the rest.\n",
-                            args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,icsq+1);
-                    if ( !args->ncsq_small_warned )
+                            args->hdr->samples[ismpl],bcf_hdr_id2name(args->hdr,args->rid),(int64_t) vrec->line->pos+1,icsq2+1);
+                    if ( !args->ncsq2_small_warned )
                         fprintf(bcftools_stderr,"         The limit can be increased by setting the --ncsq parameter. This warning is printed only once.\n");
-                    args->ncsq_small_warned = 1;
+                    args->ncsq2_small_warned = 1;
                 }
+                if ( args->ncsq2_small_warned < icsq2 ) args->ncsq2_small_warned = icsq2;
+                break;
             }
-            if ( vrec->nfmt < 1 + icsq/32 ) vrec->nfmt = 1 + icsq/32;
-            vrec->smpl[i*args->nfmt_bcsq + icsq/32] |= 1 << (icsq % 32);
+            int ival, ibit;
+            icsq2_to_bit(icsq2, &ival,&ibit);
+            if ( vrec->nfmt < 1 + ival ) vrec->nfmt = 1 + ival;
+            vrec->smpl[i*args->nfmt_bcsq + ival] |= 1 << ibit;
         }
     }
 }
@@ -4043,39 +4104,39 @@ static const char *usage(void)
     return 
         "\n"
         "About: Haplotype-aware consequence caller.\n"
-        "Usage: bcftools csq [options] in.vcf\n"
+        "Usage: bcftools csq [OPTIONS] in.vcf\n"
         "\n"
         "Required options:\n"
-        "   -f, --fasta-ref <file>          reference file in fasta format\n"
-        "   -g, --gff-annot <file>          gff3 annotation file\n"
+        "   -f, --fasta-ref FILE            reference file in fasta format\n"
+        "   -g, --gff-annot FILE            gff3 annotation file\n"
         "\n"
         "CSQ options:\n"
-        "   -b, --brief-predictions         annotate with abbreviated protein-changing predictions\n"
-        "   -c, --custom-tag <string>       use this tag instead of the default BCSQ\n"
+        "   -B, --trim-protein-seq INT      abbreviate protein-changing predictions to max INT aminoacids\n" 
+        "   -c, --custom-tag STRING         use this tag instead of the default BCSQ\n"
         "   -l, --local-csq                 localized predictions, consider only one VCF record at a time\n"
-        "   -n, --ncsq <int>                maximum number of consequences to consider per site [16]\n"
-        "   -p, --phase <a|m|r|R|s>         how to handle unphased heterozygous genotypes: [r]\n"
+        "   -n, --ncsq INT                  maximum number of per-haplotype consequences to consider for each site [15]\n"
+        "   -p, --phase a|m|r|R|s           how to handle unphased heterozygous genotypes: [r]\n"
         "                                     a: take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)\n"
         "                                     m: merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)\n"
         "                                     r: require phased GTs, throw an error on unphased het GTs\n"
         "                                     R: create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)\n"
         "                                     s: skip unphased hets\n"
         "Options:\n"
-        "   -e, --exclude <expr>            exclude sites for which the expression is true\n"
+        "   -e, --exclude EXPR              exclude sites for which the expression is true\n"
         "       --force                     run even if some sanity checks fail\n"
-        "   -i, --include <expr>            select sites for which the expression is true\n"
+        "   -i, --include EXPR              select sites for which the expression is true\n"
         "       --no-version                do not append version and command line to the header\n"
-        "   -o, --output <file>             write output to a file [standard output]\n"
-        "   -O, --output-type <b|u|z|v|t>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
+        "   -o, --output FILE               write output to a file [standard output]\n"
+        "   -O, --output-type b|u|z|v|t     b: compressed BCF, u: uncompressed BCF, z: compressed VCF\n"
         "                                   v: uncompressed VCF, t: plain tab-delimited text output [v]\n"
-        "   -r, --regions <region>          restrict to comma-separated list of regions\n"
-        "   -R, --regions-file <file>       restrict to regions listed in a file\n"
-        "   -s, --samples <-|list>          samples to include or \"-\" to apply all variants and ignore samples\n"
-        "   -S, --samples-file <file>       samples to include\n"
-        "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n"
-        "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n"
-        "       --threads <int>             use multithreading with <int> worker threads [0]\n"
-        "   -v, --verbose <int>             verbosity level 0-2 [1]\n"
+        "   -r, --regions REGION            restrict to comma-separated list of regions\n"
+        "   -R, --regions-file FILE         restrict to regions listed in a file\n"
+        "   -s, --samples -|LIST            samples to include or \"-\" to apply all variants and ignore samples\n"
+        "   -S, --samples-file FILE         samples to include\n"
+        "   -t, --targets REGION            similar to -r but streams rather than index-jumps\n"
+        "   -T, --targets-file FILE         similar to -R but streams rather than index-jumps\n"
+        "       --threads INT               use multithreading with <int> worker threads [0]\n"
+        "   -v, --verbose INT               verbosity level 0-2 [1]\n"
         "\n"
         "Example:\n"
         "   bcftools csq -f hs37d5.fa -g Homo_sapiens.GRCh37.82.gff3.gz in.vcf\n"
@@ -4092,7 +4153,7 @@ int main_csq(int argc, char *argv[])
     args->argc = argc; args->argv = argv;
     args->output_type = FT_VCF;
     args->bcsq_tag = "BCSQ";
-    args->ncsq_max = 2*16;
+    args->ncsq2_max = 2*(16-1);      // 1 bit is reserved for BCF missing values
     args->verbosity = 1;
     args->record_cmd_line = 1;
 
@@ -4102,7 +4163,8 @@ int main_csq(int argc, char *argv[])
         {"threads",required_argument,NULL,2},
         {"help",0,0,'h'},
         {"ncsq",1,0,'n'},
-        {"brief-predictions",0,0,'b'},
+        {"brief-predictions",no_argument,0,'b'},
+        {"trim-protein-seq",required_argument,0,'B'},
         {"custom-tag",1,0,'c'},
         {"local-csq",0,0,'l'},
         {"gff-annot",1,0,'g'},
@@ -4125,7 +4187,7 @@ int main_csq(int argc, char *argv[])
     };
     int c, targets_is_file = 0, regions_is_file = 0; 
     char *targets_list = NULL, *regions_list = NULL, *tmp;
-    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:bv:",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "?hr:R:t:T:i:e:f:o:O:g:s:S:p:qc:ln:bB:v:",loptions,NULL)) >= 0)
     {
         switch (c) 
         {
@@ -4135,7 +4197,14 @@ int main_csq(int argc, char *argv[])
                 if ( *tmp ) error("Could not parse argument: --threads  %s\n", optarg);
                 break;
             case  3 : args->record_cmd_line = 0; break;
-            case 'b': args->brief_predictions = 1; break;
+            case 'b':
+                    args->brief_predictions = 1;
+                    fprintf(bcftools_stderr,"Warning: the -b option will be removed in future versions. Please use -B 1 instead.\n");
+                    break;
+            case 'B': 
+                    args->brief_predictions = strtol(optarg,&tmp,10);
+                    if ( *tmp || args->brief_predictions<1 ) error("Could not parse argument: --trim-protein-seq %s\n", optarg);
+                    break;
             case 'l': args->local_csq = 1; break;
             case 'c': args->bcsq_tag = optarg; break;
             case 'q': error("Error: the -q option has been deprecated, use -v, --verbose instead.\n"); break;
@@ -4157,8 +4226,8 @@ int main_csq(int argc, char *argv[])
             case 'f': args->fa_fname = optarg; break;
             case 'g': args->gff_fname = optarg; break;
             case 'n': 
-                args->ncsq_max = 2 * atoi(optarg);
-                if ( args->ncsq_max <=0 ) error("Expected positive integer with -n, got %s\n", optarg);
+                args->ncsq2_max = 2 * atoi(optarg);
+                if ( args->ncsq2_max <= 0 ) error("Expected positive integer with -n, got %s\n", optarg);
                 break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
@@ -4171,8 +4240,12 @@ int main_csq(int argc, char *argv[])
                           default: error("The output type \"%s\" not recognised\n", optarg);
                       }
                       break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': regions_list = optarg; break;
             case 'R': regions_list = optarg; regions_is_file = 1; break;
             case 's': args->sample_list = optarg; break;
diff --git a/bcftools/dist.c b/bcftools/dist.c
new file mode 100644
index 0000000..094fc73
--- /dev/null
+++ b/bcftools/dist.c
@@ -0,0 +1,124 @@
+/* The MIT License
+
+   Copyright (c) 2016-2020 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include "dist.h"
+
+extern void error(const char *format, ...);
+
+struct _dist_t
+{
+    uint64_t *bins, nvalues;
+    int nbins;
+    int npow;   // the number of orders of magnitude to represent exactly
+    int nexact; // pow(10,npow)
+    int nlevel;
+};
+
+dist_t *dist_init(int npow)
+{
+    dist_t *dist = (dist_t*) calloc(1,sizeof(dist_t));
+    dist->npow   = npow;
+    dist->nexact = pow(10,npow);
+    dist->nlevel = dist->nexact - pow(10,npow-1);
+    return dist;
+}
+
+void dist_destroy(dist_t *dist)
+{
+    if ( !dist ) return;
+    free(dist->bins);
+    free(dist);
+}
+
+int dist_nbins(dist_t *dist)
+{
+    return dist->nbins;
+}
+
+int dist_nvalues(dist_t *dist)
+{
+    return dist->nvalues;
+}
+
+uint32_t dist_insert(dist_t *dist, uint32_t value)
+{
+    int ibin;
+
+    if ( value <= dist->nexact ) 
+        ibin = value;
+    else
+    {
+        int npow  = (int) log10(value);
+        int level = npow - dist->npow + 1;
+        uint32_t step = pow(10, level);
+        ibin = dist->nexact + dist->nlevel*(level-1) + (value - pow(10,npow)) / step;
+    }
+
+    if ( ibin >= dist->nbins )
+    {
+        dist->bins = (uint64_t*) realloc(dist->bins, sizeof(*dist->bins)*(ibin+1));
+        memset(dist->bins + dist->nbins, 0, (ibin+1 - dist->nbins)*sizeof(*dist->bins));
+        dist->nbins = ibin+1;
+    }
+    dist->bins[ibin]++;
+    dist->nvalues++;
+    return ibin;
+}
+uint32_t dist_insert_n(dist_t *dist, uint32_t value, uint32_t cnt)
+{
+    if ( !cnt ) return 0;
+    int ibin = dist_insert(dist, value);
+    dist->bins[ibin] += cnt - 1;
+    dist->nvalues += cnt;
+    return ibin;
+}
+
+uint64_t dist_get(dist_t *dist, uint32_t idx, uint32_t *beg, uint32_t *end)
+{
+    if ( idx < dist->nexact )
+    {
+        if ( beg ) *beg = idx;
+        if ( end ) *end = idx + 1;
+    }
+    else
+    {
+        int level = (idx - dist->nexact) / dist->nlevel + 1;
+        int bin   = idx - dist->nexact - dist->nlevel*(level-1);
+
+        uint32_t step  = pow(10, level);
+        uint32_t value = pow(10, level + dist->npow - 1) + step*bin;
+
+        if ( beg ) *beg = value;
+        if ( end ) *end = value + step;
+    }
+    return dist->bins[idx];
+}
+
diff --git a/bcftools/dist.c.pysam.c b/bcftools/dist.c.pysam.c
new file mode 100644
index 0000000..f3f0915
--- /dev/null
+++ b/bcftools/dist.c.pysam.c
@@ -0,0 +1,126 @@
+#include "bcftools.pysam.h"
+
+/* The MIT License
+
+   Copyright (c) 2016-2020 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include "dist.h"
+
+extern void error(const char *format, ...);
+
+struct _dist_t
+{
+    uint64_t *bins, nvalues;
+    int nbins;
+    int npow;   // the number of orders of magnitude to represent exactly
+    int nexact; // pow(10,npow)
+    int nlevel;
+};
+
+dist_t *dist_init(int npow)
+{
+    dist_t *dist = (dist_t*) calloc(1,sizeof(dist_t));
+    dist->npow   = npow;
+    dist->nexact = pow(10,npow);
+    dist->nlevel = dist->nexact - pow(10,npow-1);
+    return dist;
+}
+
+void dist_destroy(dist_t *dist)
+{
+    if ( !dist ) return;
+    free(dist->bins);
+    free(dist);
+}
+
+int dist_nbins(dist_t *dist)
+{
+    return dist->nbins;
+}
+
+int dist_nvalues(dist_t *dist)
+{
+    return dist->nvalues;
+}
+
+uint32_t dist_insert(dist_t *dist, uint32_t value)
+{
+    int ibin;
+
+    if ( value <= dist->nexact ) 
+        ibin = value;
+    else
+    {
+        int npow  = (int) log10(value);
+        int level = npow - dist->npow + 1;
+        uint32_t step = pow(10, level);
+        ibin = dist->nexact + dist->nlevel*(level-1) + (value - pow(10,npow)) / step;
+    }
+
+    if ( ibin >= dist->nbins )
+    {
+        dist->bins = (uint64_t*) realloc(dist->bins, sizeof(*dist->bins)*(ibin+1));
+        memset(dist->bins + dist->nbins, 0, (ibin+1 - dist->nbins)*sizeof(*dist->bins));
+        dist->nbins = ibin+1;
+    }
+    dist->bins[ibin]++;
+    dist->nvalues++;
+    return ibin;
+}
+uint32_t dist_insert_n(dist_t *dist, uint32_t value, uint32_t cnt)
+{
+    if ( !cnt ) return 0;
+    int ibin = dist_insert(dist, value);
+    dist->bins[ibin] += cnt - 1;
+    dist->nvalues += cnt;
+    return ibin;
+}
+
+uint64_t dist_get(dist_t *dist, uint32_t idx, uint32_t *beg, uint32_t *end)
+{
+    if ( idx < dist->nexact )
+    {
+        if ( beg ) *beg = idx;
+        if ( end ) *end = idx + 1;
+    }
+    else
+    {
+        int level = (idx - dist->nexact) / dist->nlevel + 1;
+        int bin   = idx - dist->nexact - dist->nlevel*(level-1);
+
+        uint32_t step  = pow(10, level);
+        uint32_t value = pow(10, level + dist->npow - 1) + step*bin;
+
+        if ( beg ) *beg = value;
+        if ( end ) *end = value + step;
+    }
+    return dist->bins[idx];
+}
+
diff --git a/bcftools/dist.h b/bcftools/dist.h
new file mode 100644
index 0000000..5c9c571
--- /dev/null
+++ b/bcftools/dist.h
@@ -0,0 +1,98 @@
+/* The MIT License
+
+   Copyright (c) 2016-2020 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+/*
+    Logarithmic binning
+
+    Example of usage:
+
+        // Initialize, make the binning exact up to 10^4, then add a log-step
+        dist_t *dist = dist_init(4);
+
+        // Insert values
+        int i;
+        for (i=0; i<1e6; i++)
+            dist_insert(dist, i);
+
+        // Number of bins used
+        int n = dist_n(dist);
+
+        // Now print the distribution
+        uint32_t beg, end;
+        for (i=0; i<n; i++)
+        {
+            // Raw count in the bin. The boundaries beg,end are optional, 
+            // and can be used to plot correctly the density
+            uint64_t cnt = dist_get(dist, i, &beg, &end);
+            if ( !cnt ) continue;
+
+            // Print the interval, count and density
+            printf("%u\t%u\t%"PRIu64"\t%f\n", beg, end, cnt, (double)cnt/(end-beg));
+        }
+
+        // Clean up
+        dist_destroy(dist);
+ */
+
+#ifndef __DIST_H__
+#define __DIST_H__
+
+#include <stdio.h>
+#include <inttypes.h>
+
+typedef struct _dist_t dist_t;
+
+/*
+ *  dist_init() - init bins
+ */
+dist_t *dist_init(int npow);
+void dist_destroy(dist_t *dist);
+
+/*
+    dist_nbins() - get the number of bins
+ */
+int dist_nbins(dist_t *dist);
+
+/*
+    dist_nvalues() - get the total number of values inserted
+ */
+int dist_nvalues(dist_t *dist);
+
+/*
+    dist_insert()   - insert new value
+    dist_insert_n() - insert new value n times
+ */
+uint32_t dist_insert(dist_t *dist, uint32_t value);
+uint32_t dist_insert_n(dist_t *dist, uint32_t value, uint32_t cnt);
+
+/*
+   dist_get() 
+   @idx:        from the interval [0,dist_n-1]
+   @beg,end:    [beg,end)
+ */
+uint64_t dist_get(dist_t *dist, uint32_t idx, uint32_t *beg, uint32_t *end);
+
+#endif
+
diff --git a/bcftools/em.c b/bcftools/em.c
index a976f22..baa3490 100644
--- a/bcftools/em.c
+++ b/bcftools/em.c
@@ -1,7 +1,7 @@
 /*  em.c -- mathematical functions.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Portions copyright (C) 2013 Genome Research Ltd.
+    Portions copyright (C) 2013-2014 Genome Research Ltd.
 
     Author: Heng Li <lh3@live.co.uk>
 
diff --git a/bcftools/em.c.pysam.c b/bcftools/em.c.pysam.c
index db27d06..37a3dea 100644
--- a/bcftools/em.c.pysam.c
+++ b/bcftools/em.c.pysam.c
@@ -3,7 +3,7 @@
 /*  em.c -- mathematical functions.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Portions copyright (C) 2013 Genome Research Ltd.
+    Portions copyright (C) 2013-2014 Genome Research Ltd.
 
     Author: Heng Li <lh3@live.co.uk>
 
diff --git a/bcftools/extsort.c b/bcftools/extsort.c
new file mode 100644
index 0000000..014e03b
--- /dev/null
+++ b/bcftools/extsort.c
@@ -0,0 +1,250 @@
+/*  ext-sort.h -- sort on disk
+
+   Copyright (C) 2020-2021 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include <unistd.h>     // for unlink()
+#include <sys/stat.h>   // for chmod()
+#include <assert.h>
+#include <fcntl.h>
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include "bcftools.h"
+#include "extsort.h"
+#include "kheap.h"
+
+typedef struct
+{
+    extsort_t *es;  // this is to get access to extsort_cmp_f from kheap
+    int fd;
+    char *fname;
+    void *dat;
+}
+blk_t;
+
+static inline int blk_is_smaller(blk_t **aptr, blk_t **bptr);
+KHEAP_INIT(blk, blk_t*, blk_is_smaller)     /* defines khp_blk_t */
+
+struct _extsort_t
+{
+    size_t dat_size, mem, max_mem;
+    char *tmp_prefix;
+    extsort_cmp_f cmp;
+
+    size_t nbuf, mbuf, nblk;
+    blk_t **blk;
+    void **buf, *tmp_dat;
+    khp_blk_t *bhp;
+};
+
+static inline int blk_is_smaller(blk_t **aptr, blk_t **bptr)
+{
+    blk_t *a = *aptr;
+    blk_t *b = *bptr;
+    int ret = a->es->cmp(&a->dat,&b->dat);
+    if ( ret < 0 ) return 1;
+    return 0;
+}
+
+size_t parse_mem_string(const char *str);
+
+void extsort_set(extsort_t *es, extsort_opt_t key, void *value)
+{
+    if ( key==DAT_SIZE ) { es->dat_size = *((size_t*)value); return; }
+    if ( key==MAX_MEM )
+    {
+        es->max_mem = parse_mem_string(*((const char**)value));
+        if ( es->max_mem <=0 ) error("Could not parse the memory string, expected positive number: %s\n",*((const char**)value));
+        return;
+    }
+    if ( key==TMP_PREFIX ) { es->tmp_prefix = init_tmp_prefix(*((const char**)value)); return; }
+    if ( key==FUNC_CMP ) { es->cmp = *((extsort_cmp_f*)value); return; }
+}
+
+extsort_t *extsort_alloc(void)
+{
+    extsort_t *es = (extsort_t*) calloc(1,sizeof(*es));
+    es->max_mem = 100e6;
+    return es;
+}
+void extsort_init(extsort_t *es)
+{
+    assert( es->cmp );
+    assert( es->dat_size );
+    if ( !es->tmp_prefix ) es->tmp_prefix = init_tmp_prefix(NULL);
+    es->tmp_dat = malloc(es->dat_size);
+}
+
+void extsort_destroy(extsort_t *es)
+{
+    int i;
+    for (i=0; i<es->nblk; i++)
+    {
+        blk_t *blk = es->blk[i];
+        if ( blk->fd!=-1 )
+#ifdef _WIN32
+            _close(blk->fd);
+#else
+            close(blk->fd);
+#endif
+        free(blk->fname);
+        free(blk->dat);
+        free(blk);
+    }
+    free(es->tmp_dat);
+    free(es->tmp_prefix);
+    free(es->blk);
+    khp_destroy(blk, es->bhp);
+    free(es);
+}
+
+static void _buf_flush(extsort_t *es)
+{
+    int i;
+    if ( !es->nbuf ) return;
+
+    qsort(es->buf, es->nbuf, sizeof(void*), es->cmp);
+
+    es->nblk++;
+    es->blk = (blk_t**) realloc(es->blk, sizeof(blk_t*)*es->nblk);
+    es->blk[es->nblk-1] = (blk_t*) calloc(1,sizeof(blk_t));
+    blk_t *blk = es->blk[es->nblk-1];
+    blk->es    = es;
+    blk->dat   = malloc(es->dat_size);
+    blk->fname = strdup(es->tmp_prefix);
+    #ifdef _WIN32
+        for (i=0; i<100000; i++)
+        {
+            memcpy(blk->fname,es->tmp_prefix,strlen(es->tmp_prefix));
+            mktemp(blk->fname);
+            blk->fd = _open(blk->fname, O_RDWR|O_CREAT|O_EXCL|O_BINARY|O_TEMPORARY, 0600);
+            if ( blk->fd==-1 )
+            {
+                if ( errno==EEXIST ) continue; 
+                error("Error: failed to open a temporary file %s\n",blk->fname);
+            }
+            break;
+        }
+        if ( !blk->fd ) error("Error: failed to create a unique temporary file name from %s\n",es->tmp_prefix);
+        if ( _chmod(blk->fname, S_IRUSR|S_IWUSR)!=0 ) error("Error: failed to set permissions of the temporary file %s\n",blk->fname);
+    #else
+        if ( (blk->fd = mkstemp(blk->fname))==-1 )
+            error("Error: failed to open a temporary file %s\n",blk->fname);
+        if ( fchmod(blk->fd,S_IRUSR|S_IWUSR)!=0 ) error("Error: failed to set permissions of the temporary file %s\n",blk->fname);
+        unlink(blk->fname); // should auto delete when closed on linux, the descriptor remains open
+    #endif
+
+    for (i=0; i<es->nbuf; i++)
+    {
+        #ifdef _WIN32
+            if ( _write(blk->fd, es->buf[i], es->dat_size)!=es->dat_size ) error("Error: failed to write %zu bytes to the temporary file %s\n",es->dat_size,blk->fname);
+        #else
+            if ( write(blk->fd, es->buf[i], es->dat_size)!=es->dat_size ) error("Error: failed to write %zu bytes to the temporary file %s\n",es->dat_size,blk->fname);
+        #endif
+        free(es->buf[i]);
+    }
+#ifdef _WIN32
+    if ( _lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#else
+    if ( lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#endif
+
+    es->nbuf = 0;
+    es->mem  = 0;
+}
+
+void extsort_push(extsort_t *es, void *dat)
+{
+    int delta = sizeof(void*) + es->dat_size;
+    if ( es->nbuf && es->mem + delta > es->max_mem ) _buf_flush(es);
+    es->nbuf++;
+    es->mem += delta;
+    hts_expand(void*, es->nbuf, es->mbuf, es->buf);
+    es->buf[es->nbuf-1] = dat;
+}
+
+// return number of elements read
+static ssize_t _blk_read(extsort_t *es, blk_t *blk)
+{
+    ssize_t ret = 0;
+    if ( blk->fd==-1 ) return ret;
+#ifdef _WIN32
+    ret = _read(blk->fd, blk->dat, es->dat_size);
+#else
+    ret = read(blk->fd, blk->dat, es->dat_size);
+#endif
+    if ( ret < 0 ) error("Error: failed to read from the temporary file %s\n", blk->fname);
+    if ( ret == 0 )
+    {
+#ifdef _WIN32
+        if ( _close(blk->fd)!=0 ) error("Error: failed to close the temporary file %s\n", blk->fname);
+#else
+        if ( close(blk->fd)!=0 ) error("Error: failed to close the temporary file %s\n", blk->fname);
+#endif
+        blk->fd = -1;
+        return ret;
+    }
+    if ( ret < es->dat_size ) error("Error: failed to read %zu bytes from the temporary file %s\n",es->dat_size,blk->fname);
+    return ret;
+}
+
+void extsort_sort(extsort_t *es)
+{
+    _buf_flush(es);
+    free(es->buf);
+    es->buf = NULL;
+    es->bhp = khp_init(blk);
+
+    // open all blocks, read one record from each, create a heap
+    int i;
+    for (i=0; i<es->nblk; i++)
+    {
+        blk_t *blk = es->blk[i];
+#ifdef _WIN32
+        if ( _lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#else
+        if ( lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#endif
+        int ret = _blk_read(es, blk);
+        if ( ret ) khp_insert(blk, es->bhp, &blk);
+    }
+}
+
+void *extsort_shift(extsort_t *es)
+{
+    if ( !es->bhp->ndat ) return NULL;
+    blk_t *blk = es->bhp->dat[0];
+
+    // swap the pointer which keeps the location of user data so that it is not overwritten by the next read
+    void *tmp = es->tmp_dat; es->tmp_dat = blk->dat; blk->dat = tmp;
+    khp_delete(blk, es->bhp);
+
+    int ret = _blk_read(es, blk);
+    if ( ret ) khp_insert(blk, es->bhp, &blk);
+
+    return es->tmp_dat;
+}
+
diff --git a/bcftools/extsort.c.pysam.c b/bcftools/extsort.c.pysam.c
new file mode 100644
index 0000000..1b410a7
--- /dev/null
+++ b/bcftools/extsort.c.pysam.c
@@ -0,0 +1,252 @@
+#include "bcftools.pysam.h"
+
+/*  ext-sort.h -- sort on disk
+
+   Copyright (C) 2020-2021 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#include <stdio.h>
+#include <unistd.h>     // for unlink()
+#include <sys/stat.h>   // for chmod()
+#include <assert.h>
+#include <fcntl.h>
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include "bcftools.h"
+#include "extsort.h"
+#include "kheap.h"
+
+typedef struct
+{
+    extsort_t *es;  // this is to get access to extsort_cmp_f from kheap
+    int fd;
+    char *fname;
+    void *dat;
+}
+blk_t;
+
+static inline int blk_is_smaller(blk_t **aptr, blk_t **bptr);
+KHEAP_INIT(blk, blk_t*, blk_is_smaller)     /* defines khp_blk_t */
+
+struct _extsort_t
+{
+    size_t dat_size, mem, max_mem;
+    char *tmp_prefix;
+    extsort_cmp_f cmp;
+
+    size_t nbuf, mbuf, nblk;
+    blk_t **blk;
+    void **buf, *tmp_dat;
+    khp_blk_t *bhp;
+};
+
+static inline int blk_is_smaller(blk_t **aptr, blk_t **bptr)
+{
+    blk_t *a = *aptr;
+    blk_t *b = *bptr;
+    int ret = a->es->cmp(&a->dat,&b->dat);
+    if ( ret < 0 ) return 1;
+    return 0;
+}
+
+size_t parse_mem_string(const char *str);
+
+void extsort_set(extsort_t *es, extsort_opt_t key, void *value)
+{
+    if ( key==DAT_SIZE ) { es->dat_size = *((size_t*)value); return; }
+    if ( key==MAX_MEM )
+    {
+        es->max_mem = parse_mem_string(*((const char**)value));
+        if ( es->max_mem <=0 ) error("Could not parse the memory string, expected positive number: %s\n",*((const char**)value));
+        return;
+    }
+    if ( key==TMP_PREFIX ) { es->tmp_prefix = init_tmp_prefix(*((const char**)value)); return; }
+    if ( key==FUNC_CMP ) { es->cmp = *((extsort_cmp_f*)value); return; }
+}
+
+extsort_t *extsort_alloc(void)
+{
+    extsort_t *es = (extsort_t*) calloc(1,sizeof(*es));
+    es->max_mem = 100e6;
+    return es;
+}
+void extsort_init(extsort_t *es)
+{
+    assert( es->cmp );
+    assert( es->dat_size );
+    if ( !es->tmp_prefix ) es->tmp_prefix = init_tmp_prefix(NULL);
+    es->tmp_dat = malloc(es->dat_size);
+}
+
+void extsort_destroy(extsort_t *es)
+{
+    int i;
+    for (i=0; i<es->nblk; i++)
+    {
+        blk_t *blk = es->blk[i];
+        if ( blk->fd!=-1 )
+#ifdef _WIN32
+            _close(blk->fd);
+#else
+            close(blk->fd);
+#endif
+        free(blk->fname);
+        free(blk->dat);
+        free(blk);
+    }
+    free(es->tmp_dat);
+    free(es->tmp_prefix);
+    free(es->blk);
+    khp_destroy(blk, es->bhp);
+    free(es);
+}
+
+static void _buf_flush(extsort_t *es)
+{
+    int i;
+    if ( !es->nbuf ) return;
+
+    qsort(es->buf, es->nbuf, sizeof(void*), es->cmp);
+
+    es->nblk++;
+    es->blk = (blk_t**) realloc(es->blk, sizeof(blk_t*)*es->nblk);
+    es->blk[es->nblk-1] = (blk_t*) calloc(1,sizeof(blk_t));
+    blk_t *blk = es->blk[es->nblk-1];
+    blk->es    = es;
+    blk->dat   = malloc(es->dat_size);
+    blk->fname = strdup(es->tmp_prefix);
+    #ifdef _WIN32
+        for (i=0; i<100000; i++)
+        {
+            memcpy(blk->fname,es->tmp_prefix,strlen(es->tmp_prefix));
+            mktemp(blk->fname);
+            blk->fd = _open(blk->fname, O_RDWR|O_CREAT|O_EXCL|O_BINARY|O_TEMPORARY, 0600);
+            if ( blk->fd==-1 )
+            {
+                if ( errno==EEXIST ) continue; 
+                error("Error: failed to open a temporary file %s\n",blk->fname);
+            }
+            break;
+        }
+        if ( !blk->fd ) error("Error: failed to create a unique temporary file name from %s\n",es->tmp_prefix);
+        if ( _chmod(blk->fname, S_IRUSR|S_IWUSR)!=0 ) error("Error: failed to set permissions of the temporary file %s\n",blk->fname);
+    #else
+        if ( (blk->fd = mkstemp(blk->fname))==-1 )
+            error("Error: failed to open a temporary file %s\n",blk->fname);
+        if ( fchmod(blk->fd,S_IRUSR|S_IWUSR)!=0 ) error("Error: failed to set permissions of the temporary file %s\n",blk->fname);
+        unlink(blk->fname); // should auto delete when closed on linux, the descriptor remains open
+    #endif
+
+    for (i=0; i<es->nbuf; i++)
+    {
+        #ifdef _WIN32
+            if ( _write(blk->fd, es->buf[i], es->dat_size)!=es->dat_size ) error("Error: failed to write %zu bytes to the temporary file %s\n",es->dat_size,blk->fname);
+        #else
+            if ( write(blk->fd, es->buf[i], es->dat_size)!=es->dat_size ) error("Error: failed to write %zu bytes to the temporary file %s\n",es->dat_size,blk->fname);
+        #endif
+        free(es->buf[i]);
+    }
+#ifdef _WIN32
+    if ( _lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#else
+    if ( lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#endif
+
+    es->nbuf = 0;
+    es->mem  = 0;
+}
+
+void extsort_push(extsort_t *es, void *dat)
+{
+    int delta = sizeof(void*) + es->dat_size;
+    if ( es->nbuf && es->mem + delta > es->max_mem ) _buf_flush(es);
+    es->nbuf++;
+    es->mem += delta;
+    hts_expand(void*, es->nbuf, es->mbuf, es->buf);
+    es->buf[es->nbuf-1] = dat;
+}
+
+// return number of elements read
+static ssize_t _blk_read(extsort_t *es, blk_t *blk)
+{
+    ssize_t ret = 0;
+    if ( blk->fd==-1 ) return ret;
+#ifdef _WIN32
+    ret = _read(blk->fd, blk->dat, es->dat_size);
+#else
+    ret = read(blk->fd, blk->dat, es->dat_size);
+#endif
+    if ( ret < 0 ) error("Error: failed to read from the temporary file %s\n", blk->fname);
+    if ( ret == 0 )
+    {
+#ifdef _WIN32
+        if ( _close(blk->fd)!=0 ) error("Error: failed to close the temporary file %s\n", blk->fname);
+#else
+        if ( close(blk->fd)!=0 ) error("Error: failed to close the temporary file %s\n", blk->fname);
+#endif
+        blk->fd = -1;
+        return ret;
+    }
+    if ( ret < es->dat_size ) error("Error: failed to read %zu bytes from the temporary file %s\n",es->dat_size,blk->fname);
+    return ret;
+}
+
+void extsort_sort(extsort_t *es)
+{
+    _buf_flush(es);
+    free(es->buf);
+    es->buf = NULL;
+    es->bhp = khp_init(blk);
+
+    // open all blocks, read one record from each, create a heap
+    int i;
+    for (i=0; i<es->nblk; i++)
+    {
+        blk_t *blk = es->blk[i];
+#ifdef _WIN32
+        if ( _lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#else
+        if ( lseek(blk->fd,0,SEEK_SET)!=0 ) error("Error: failed to lseek() to the start of the temporary file %s\n", blk->fname);
+#endif
+        int ret = _blk_read(es, blk);
+        if ( ret ) khp_insert(blk, es->bhp, &blk);
+    }
+}
+
+void *extsort_shift(extsort_t *es)
+{
+    if ( !es->bhp->ndat ) return NULL;
+    blk_t *blk = es->bhp->dat[0];
+
+    // swap the pointer which keeps the location of user data so that it is not overwritten by the next read
+    void *tmp = es->tmp_dat; es->tmp_dat = blk->dat; blk->dat = tmp;
+    khp_delete(blk, es->bhp);
+
+    int ret = _blk_read(es, blk);
+    if ( ret ) khp_insert(blk, es->bhp, &blk);
+
+    return es->tmp_dat;
+}
+
diff --git a/bcftools/extsort.h b/bcftools/extsort.h
new file mode 100644
index 0000000..ba6282e
--- /dev/null
+++ b/bcftools/extsort.h
@@ -0,0 +1,56 @@
+/*  ext-sort.h -- sort on disk
+
+   Copyright (C) 2020 Genome Research Ltd.
+
+   Author: Petr Danecek <pd3@sanger.ac.uk>
+   
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+   
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+   
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+
+ */
+
+#ifndef __EXTSORT_H__
+#define __EXTSORT_H__
+
+//todo: return status to all functions
+
+typedef struct _extsort_t extsort_t;
+
+typedef int (*extsort_cmp_f) (const void *aptr, const void *bptr);
+
+// Modes of operation
+typedef enum
+{
+    DAT_SIZE,       // size_t        .. assuming constant size records for now
+    TMP_PREFIX,     // const char*   .. prefix of temporary files, XXXXXX will be appended
+    MAX_MEM,        // const char*   .. maximum memory to use, e.g. 100MB
+    FUNC_CMP,       // extsort_cmp_f .. sort function
+}
+extsort_opt_t;
+
+#define extsort_set_opt(es,type,key,value) { type tmp = value; extsort_set(es, key, (void*)&tmp); }
+
+extsort_t *extsort_alloc(void);
+void extsort_set(extsort_t *es, extsort_opt_t key, void *value);
+void extsort_init(extsort_t *es);
+void extsort_push(extsort_t *es, void *dat);    // dat will be freed by extsort later
+void extsort_sort(extsort_t *es);
+void *extsort_shift(extsort_t *es);
+void extsort_destroy(extsort_t *es);
+
+#endif
diff --git a/bcftools/filter.c b/bcftools/filter.c
index ea60036..3c45195 100644
--- a/bcftools/filter.c
+++ b/bcftools/filter.c
@@ -1,6 +1,6 @@
 /*  filter.c -- filter expressions.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ THE SOFTWARE.  */
 #include <ctype.h>
 #include <stdlib.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <math.h>
 #include <sys/types.h>
@@ -56,27 +57,6 @@ static int filter_ninit = 0;
 #  define __FUNCTION__ __func__
 #endif
 
-static const uint64_t bcf_double_missing    = 0x7ff0000000000001;
-static const uint64_t bcf_double_vector_end = 0x7ff0000000000002;
-static inline void bcf_double_set(double *ptr, uint64_t value)
-{
-    union { uint64_t i; double d; } u;
-    u.i = value;
-    *ptr = u.d;
-}
-static inline int bcf_double_test(double d, uint64_t value)
-{
-    union { uint64_t i; double d; } u;
-    u.d = d;
-    return u.i==value ? 1 : 0;
-}
-#define bcf_double_set_vector_end(x) bcf_double_set(&(x),bcf_double_vector_end)
-#define bcf_double_set_missing(x)    bcf_double_set(&(x),bcf_double_missing)
-#define bcf_double_is_vector_end(x)  bcf_double_test((x),bcf_double_vector_end)
-#define bcf_double_is_missing(x)     bcf_double_test((x),bcf_double_missing)
-#define bcf_double_is_missing_or_vector_end(x)     (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end))
-
-
 typedef struct _token_t
 {
     // read-only values, same for all VCF lines
@@ -89,9 +69,9 @@ typedef struct _token_t
     int hdr_id, tag_type;   // BCF header lookup ID and one of BCF_HL_* types
     int idx;            // 0-based index to VCF vectors,
                         //  -2: list (e.g. [0,1,2] or [1..3] or [1..] or any field[*], which is equivalent to [0..])
-    int *idxs;          // set indexes to 0 to exclude, to 1 to include, and last element negative if unlimited
+    int *idxs;          // set indexes to 0 to exclude, to 1 to include, and last element negative if unlimited; used by VCF retrievers only
     int nidxs, nuidxs;  // size of idxs array and the number of elements set to 1
-    uint8_t *usmpl;     // bitmask of used samples as set by idx
+    uint8_t *usmpl;     // bitmask of used samples as set by idx, set for FORMAT fields, NULL otherwise
     int nsamples;       // number of samples for format fields, 0 for info and other fields
     void (*setter)(filter_t *, bcf1_t *, struct _token_t *);
     int (*func)(filter_t *, bcf1_t *, struct _token_t *rtok, struct _token_t **stack, int nstack);
@@ -158,11 +138,19 @@ struct _filter_t
 #define TOK_PHRED   29
 #define TOK_MEDIAN  30
 #define TOK_STDEV   31
-
-//                      0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-//                        ( ) [ < = > ] ! | &  +  -  *  /  M  m  a  A  O  ~  ^  S  .  l  f  c  p  b  P  i  s
-static int op_prec[] = {0,1,1,5,5,5,5,5,5,2,3, 6, 6, 7, 7, 8, 8, 8, 3, 2, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
-#define TOKEN_STRING "x()[<=>]!|&+-*/MmaAO~^S.lfcpis"
+#define TOK_sMAX    32
+#define TOK_sMIN    33
+#define TOK_sAVG    34
+#define TOK_sMEDIAN 35
+#define TOK_sSTDEV  36
+#define TOK_sSUM    37
+#define TOK_IN      38      // contains, e.g. FILTER~"A" 
+#define TOK_NOT_IN  39      // does not contain, e.g. FILTER!~"A" 
+
+//                      0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
+//                        ( ) [ < = > ] ! | &  +  -  *  /  M  m  a  A  O  ~  ^  S  .  l  f  c  p  b  P  i  s 
+static int op_prec[] = {0,1,1,5,5,5,5,5,5,2,3, 6, 6, 7, 7, 8, 8, 8, 3, 2, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+#define TOKEN_STRING "x()[<=>]!|&+-*/MmaAO~^S.lfcpis"       // this is only for debugging, not maintained diligently
 
 // Return negative values if it is a function with variable number of arguments
 static int filters_next_token(char **str, int *len)
@@ -184,6 +172,20 @@ static int filters_next_token(char **str, int *len)
         tmp = *str;
     }
 
+    if ( !strncasecmp(tmp,"SMPL_MAX(",9) ) { (*str) += 8; return TOK_sMAX; }
+    if ( !strncasecmp(tmp,"SMPL_MIN(",9) ) { (*str) += 8; return TOK_sMIN; }
+    if ( !strncasecmp(tmp,"SMPL_MEAN(",10) ) { (*str) += 9; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"SMPL_MEDIAN(",12) ) { (*str) += 11; return TOK_sMEDIAN; }
+    if ( !strncasecmp(tmp,"SMPL_AVG(",9) ) { (*str) += 8; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"SMPL_STDEV(",11) ) { (*str) += 10; return TOK_sSTDEV; }
+    if ( !strncasecmp(tmp,"SMPL_SUM(",9) ) { (*str) += 8; return TOK_sSUM; }
+    if ( !strncasecmp(tmp,"sMAX(",5) ) { (*str) += 4; return TOK_sMAX; }
+    if ( !strncasecmp(tmp,"sMIN(",5) ) { (*str) += 4; return TOK_sMIN; }
+    if ( !strncasecmp(tmp,"sMEAN(",6) ) { (*str) += 5; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"sMEDIAN(",8) ) { (*str) += 7; return TOK_sMEDIAN; }
+    if ( !strncasecmp(tmp,"sAVG(",5) ) { (*str) += 4; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"sSTDEV(",7) ) { (*str) += 6; return TOK_sSTDEV; }
+    if ( !strncasecmp(tmp,"sSUM(",5) ) { (*str) += 4; return TOK_sSUM; }
     if ( !strncasecmp(tmp,"MAX(",4) ) { (*str) += 3; return TOK_MAX; }
     if ( !strncasecmp(tmp,"MIN(",4) ) { (*str) += 3; return TOK_MIN; }
     if ( !strncasecmp(tmp,"MEAN(",5) ) { (*str) += 4; return TOK_AVG; }
@@ -417,7 +419,7 @@ static void filters_cmp_bit_and(token_t *atok, token_t *btok, token_t *rtok, bcf
 static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
 {
     int i;
-    if ( rtok->tok_type==TOK_NE )  // AND logic: none of the filters can match
+    if ( rtok->tok_type==TOK_NOT_IN )
     {
         if ( !line->d.n_flt )
         {
@@ -430,7 +432,7 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
         rtok->pass_site = 1;
         return;
     }
-    else if ( rtok->tok_type==TOK_EQ ) // OR logic: at least one of the filters must match
+    else if ( rtok->tok_type==TOK_IN )
     {
         if ( !line->d.n_flt )
         {
@@ -441,8 +443,30 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
             if ( atok->hdr_id==line->d.flt[i] ) { rtok->pass_site = 1; return; }
         return;
     }
+    else if ( rtok->tok_type==TOK_NE )  // exact match
+    {
+        if ( !line->d.n_flt )
+        {
+            if ( atok->hdr_id==-1 ) return;   // missing value
+            rtok->pass_site = 1;
+            return; // no filter present, eval to true
+        }
+        if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) return;    // exact match, fail iff a single matching value is present
+        rtok->pass_site = 1;
+        return;
+    }
+    else if ( rtok->tok_type==TOK_EQ )  // exact match, pass iff a single matching value is present
+    {
+        if ( !line->d.n_flt )
+        {
+            if ( atok->hdr_id==-1 ) { rtok->pass_site = 1; return; }
+            return; // no filter present, eval to false
+        }
+        if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) rtok->pass_site = 1;
+        return;
+    }
     else 
-        error("Only == and != operators are supported for FILTER\n");
+        error("Only ==, !=, ~, and !~ operators are supported for FILTER\n");
     return;
 }
 static void filters_cmp_id(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
@@ -1036,54 +1060,46 @@ static void filters_set_nmissing(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->nvalues = 0;
         return;
     }
-    if ( fmt->type!=BCF_BT_INT8 ) error("TODO: the GT fmt_type is not int8\n");
-
+    
     int j,nmissing = 0;
-    for (i=0; i<line->n_sample; i++)
-    {
-        int8_t *ptr = (int8_t*) (fmt->p + i*fmt->size);
-        for (j=0; j<fmt->n; j++)
-        {
-            if ( ptr[j]==bcf_int8_vector_end ) break;
-            if ( ptr[j]==bcf_gt_missing ) { nmissing++; break; }
-        }
+    #define BRANCH(type_t, is_vector_end) { \
+        for (i=0; i<line->n_sample; i++) \
+        { \
+            type_t *ptr = (type_t *) (fmt->p + i*fmt->size); \
+            for (j=0; j<fmt->n; j++) \
+            { \
+                if ( ptr[j]==is_vector_end ) break; \
+                if ( ptr[j]==bcf_gt_missing ) { nmissing++; break; } \
+            } \
+        } \
+    }
+    switch (fmt->type) {
+        case BCF_BT_INT8:  BRANCH(int8_t,  bcf_int8_vector_end); break;
+        case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_vector_end); break;
+        case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_vector_end); break;
+        default: fprintf(stderr,"todo: type %d\n", fmt->type); exit(1); break;
     }
+    #undef BRANCH
     tok->nvalues = 1;
     tok->values[0] = tok->tag[0]=='N' ? nmissing : (double)nmissing / line->n_sample;
 }
 static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
-    if ( nstack==0 ) error("Error parsing the expresion\n");
+    if ( nstack==0 ) error("Error parsing the expression\n");
     token_t *tok = stack[nstack - 1];
     if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", rtok->tag);
-
-    rtok->nsamples = tok->nsamples;
-    memcpy(rtok->pass_samples, tok->pass_samples, rtok->nsamples*sizeof(*rtok->pass_samples));
-
     assert(tok->usmpl);
-    if ( !rtok->usmpl )
-    {
-        rtok->usmpl = (uint8_t*) malloc(tok->nsamples*sizeof(*rtok->usmpl));
-        memcpy(rtok->usmpl, tok->usmpl, tok->nsamples*sizeof(*rtok->usmpl));
-    }
 
     int i, npass = 0;
-    for (i=0; i<rtok->nsamples; i++)
+    for (i=0; i<tok->nsamples; i++)
     {
-        if ( !rtok->usmpl[i] ) continue;
-        if ( rtok->pass_samples[i] ) npass++;
+        if ( !tok->usmpl[i] ) continue;
+        if ( tok->pass_samples[i] ) npass++;
     }
-
-    hts_expand(double,rtok->nsamples,rtok->mvalues,rtok->values);
-    double value = rtok->tag[0]=='N' ? npass : (line->n_sample ? 1.0*npass/line->n_sample : 0);
-    rtok->nval1 = 1;
-    rtok->nvalues = rtok->nsamples;
-
-    // Set per-sample status so that `query -i 'F_PASS(GT!="mis" & GQ >= 20) > 0.5'` or +trio-stats
-    // consider only the passing site AND samples. The values for failed samples is set to -1 so
-    // that it can never conflict with valid expressions.
-    for (i=0; i<rtok->nsamples; i++)
-        rtok->values[i] = rtok->pass_samples[i] ? value : -1;
+    hts_expand(double,1,rtok->mvalues,rtok->values);
+    rtok->nsamples = 0;
+    rtok->nvalues = 1;
+    rtok->values[0] = rtok->tag[0]=='N' ? npass : (line->n_sample ? 1.0*npass/line->n_sample : 0);
 
     return 1;
 }
@@ -1165,13 +1181,30 @@ static int func_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = -HUGE_VAL;
-    int i, has_value = 0;
-    for (i=0; i<tok->nvalues; i++)
+    double *ptr, val = -HUGE_VAL;
+    int i,j, has_value = 0;
+    if ( tok->nsamples )
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        has_value = 1;
-        if ( val < tok->values[i] ) val = tok->values[i];
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                has_value = 1;
+                if ( val < ptr[j] ) val = ptr[j];
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            has_value = 1;
+            if ( val < tok->values[i] ) val = tok->values[i];
+        }
     }
     if ( has_value )
     {
@@ -1180,18 +1213,65 @@ static int func_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_max(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = -HUGE_VAL;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            if ( val < ptr[j] ) val = ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = HUGE_VAL;
-    int i, has_value = 0;
-    for (i=0; i<tok->nvalues; i++)
+    double *ptr, val = HUGE_VAL;
+    int i,j, has_value = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                has_value = 1;
+                if ( val > ptr[j] ) val = ptr[j];
+            }
+        }
+    }
+    else
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        has_value = 1;
-        if ( val > tok->values[i] ) val = tok->values[i];
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            has_value = 1;
+            if ( val > tok->values[i] ) val = tok->values[i];
+        }
     }
     if ( has_value )
     {
@@ -1200,15 +1280,62 @@ static int func_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_min(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = HUGE_VAL;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            if ( val > ptr[j] ) val = ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = 0;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    double *ptr, val = 0;
+    int i,j, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                val += ptr[j];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+            if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    }
     if ( n )
     {
         rtok->values[0] = val / n;
@@ -1216,6 +1343,34 @@ static int func_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = 0;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( !bcf_double_is_missing_or_vector_end(ptr[j]) ) { val += ptr[j]; n++; }
+        }
+        if ( n ) rtok->values[i] = val / n;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int compare_doubles(const void *lhs, const void *rhs)
 {
     double arg1 = *(const double*) lhs;
@@ -1229,12 +1384,29 @@ static int func_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **sta
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
+    // sweep through all tok->values and while excluding all missing values reuse the very same array
+    int i,j,k = 0, n = 0;
+    if ( tok->nsamples )
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        if ( n < i ) tok->values[n] = tok->values[i];
-        n++;
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k += tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) continue;
+                if ( n < k ) tok->values[n] = tok->values[k];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            if ( n < i ) tok->values[n] = tok->values[i];
+            n++;
+        }
     }
     if ( !n ) return 1;
     if ( n==1 ) rtok->values[0] = tok->values[0];
@@ -1246,40 +1418,149 @@ static int func_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **sta
     rtok->nvalues = 1;
     return 1;
 }
+static int func_smpl_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            if ( n < j ) ptr[n] = ptr[j];
+            n++;
+        }
+        if ( n==0 )
+            bcf_double_set_missing(rtok->values[i]);
+        else if ( n==1 )
+            rtok->values[i] = ptr[0];
+        else
+        {
+            qsort(ptr, n, sizeof(double), compare_doubles);
+            rtok->values[i] = n % 2 ? ptr[n/2] : (ptr[n/2-1] + ptr[n/2]) * 0.5;
+        }
+    }
+    return 1;
+}
 static int func_stddev(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
+    // sweep through all tok->values and while excluding all missing values reuse the very same array
+    int i,j,k = 0, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k += tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) continue;
+                if ( n < k ) tok->values[n] = tok->values[k];
+                n++;
+            }
+        }
+    }
+    else
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        if ( n < i ) tok->values[n] = tok->values[i];
-        n++;
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            if ( n < i ) tok->values[n] = tok->values[i];
+            n++;
+        }
     }
     if ( !n ) return 1;
     if ( n==1 ) rtok->values[0] = 0;
     else
     {
         double sdev = 0, avg = 0;
-        for (i=0; i<n; i++) avg += tok->values[n];
+        for (i=0; i<n; i++) avg += tok->values[i];
         avg /= n;
-        for (i=0; i<n; i++) sdev += (tok->values[n] - avg) * (tok->values[n] - avg);
+        for (i=0; i<n; i++) sdev += (tok->values[i] - avg) * (tok->values[i] - avg);
         rtok->values[0] = sqrt(sdev/n);
     }
     rtok->nvalues = 1;
     return 1;
 }
+static int func_smpl_stddev(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            if ( n < j ) ptr[n] = ptr[j];
+            n++;
+        }
+        if ( n==0 )
+            bcf_double_set_missing(rtok->values[i]);
+        else if ( n==1 )
+            rtok->values[i] = 0;
+        else
+        {
+            double sdev = 0, avg = 0;
+            for (j=0; j<n; j++) avg += ptr[j];
+            avg /= n;
+            for (j=0; j<n; j++) sdev += (ptr[j] - avg) * (ptr[j] - avg);
+            rtok->values[i] = sqrt(sdev/n);
+        }
+    }
+    return 1;
+}
 static int func_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     rtok->nvalues = 0;
     token_t *tok = stack[nstack - 1];
     if ( !tok->nvalues ) return 1;
-    double val = 0;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    double *ptr, val = 0;
+    int i,j, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                val += ptr[j];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+            if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    }
     if ( n )
     {
         rtok->values[0] = val;
@@ -1287,39 +1568,104 @@ static int func_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = 0;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            val += ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_abs(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     if ( tok->is_str ) error("ABS() can be applied only on numeric values\n");
-
+    rtok->nsamples = tok->nsamples;
     rtok->nvalues = tok->nvalues;
+    rtok->nval1 = tok->nval1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    if ( tok->usmpl )
+    {
+        if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+        memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    }
     if ( !tok->nvalues ) return 1;
     hts_expand(double, rtok->nvalues, rtok->mvalues, rtok->values);
-    int i;
-    for (i=0; i<tok->nvalues; i++)
-        if ( bcf_double_is_missing(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
-        else if ( !bcf_double_is_vector_end(tok->values[i]) ) rtok->values[i] = fabs(tok->values[i]);
+    int i,j,k = 0;
+    if ( tok->usmpl )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k+= tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) bcf_double_set_missing(rtok->values[k]);
+                else rtok->values[k] = fabs(tok->values[k]);
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( tok->usmpl && !tok->usmpl[i] ) continue;
+            if ( bcf_double_is_missing(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
+            else if ( !bcf_double_is_vector_end(tok->values[i]) ) rtok->values[i] = fabs(tok->values[i]);
+        }
+    }
     return 1;
 }
 static int func_count(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
-    int i, cnt = 0;
-    if ( !tok->nsamples )
+    int i,j, cnt = 0;
+    if ( tok->tag && tok->nsamples )
     {
-        if ( tok->is_str )
+        // raw number of values in a FMT tag, e.g. COUNT(FMT/TAG)
+        if ( tok->is_str ) error("todo: Type=String for COUNT on FORMAT fields?\n");
+        for (i=0; i<tok->nsamples; i++)
         {
-            if ( tok->str_value.l ) cnt = 1;
-            for (i=0; i<tok->str_value.l; i++) if ( tok->str_value.s[i]==',' ) cnt++;
+            if ( !tok->usmpl[i] ) continue;
+            double *ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+                if ( !bcf_double_is_missing_or_vector_end(ptr[j]) ) cnt++;
         }
-        else
-            cnt = tok->nvalues;
     }
-    else
+    else if ( tok->nsamples )
     {
+        // number of samples that pass a processed FMT tag
         for (i=0; i<tok->nsamples; i++)
             if ( tok->pass_samples[i] ) cnt++;
     }
+    else if ( tok->is_str )
+    {
+        if ( tok->str_value.l ) cnt = 1;
+        for (i=0; i<tok->str_value.l; i++) if ( tok->str_value.s[i]==',' ) cnt++;
+    }
+    else
+        cnt = tok->nvalues;
 
     rtok->nvalues = 1;
     rtok->values[0] = cnt;
@@ -1531,11 +1877,27 @@ static int func_phred(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stac
     if ( !tok->nvalues ) return 1;
 
     hts_expand(double, rtok->nvalues, rtok->mvalues, rtok->values);
-    int i;
-    for (i=0; i<tok->nvalues; i++)
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
-        else rtok->values[i] = -4.34294481903*log(tok->values[i]);
-
+    int i,j,k = 0;
+    if ( tok->usmpl )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k+= tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) bcf_double_set_missing(rtok->values[k]);
+                else rtok->values[k] = -4.34294481903*log(tok->values[k]);
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
+            else rtok->values[i] = -4.34294481903*log(tok->values[i]);
+        }
+    }
     return 1;
 }
 inline static void tok_init_values(token_t *atok, token_t *btok, token_t *rtok)
@@ -1555,7 +1917,8 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok)
         for (i=0; i<atok->nsamples; i++) rtok->usmpl[i] |= atok->usmpl[i];
         for (i=0; i<btok->nsamples; i++) rtok->usmpl[i] |= btok->usmpl[i];
     }
-    memset(rtok->pass_samples, 0, rtok->nsamples);
+    if (rtok->nsamples)
+        memset(rtok->pass_samples, 0, rtok->nsamples);
 }
 
 #define VECTOR_ARITHMETICS(atok,btok,_rtok,AOP) \
@@ -1580,22 +1943,37 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok)
                 rtok->values[i] = atok->values[i] AOP btok->values[i]; \
             } \
         } \
+        else if ( atok->nsamples ) \
+        { \
+            assert( btok->nvalues==1 ); \
+            if ( !bcf_double_is_missing_or_vector_end(btok->values[0]) ) \
+            { \
+                for (i=0; i<atok->nvalues; i++) \
+                { \
+                    if ( bcf_double_is_missing_or_vector_end(atok->values[i]) ) \
+                    { \
+                        bcf_double_set_missing(rtok->values[i]); \
+                        continue; \
+                    } \
+                    has_values = 1; \
+                    rtok->values[i] = atok->values[i] AOP btok->values[0]; \
+                } \
+            } \
+        } \
         else \
         { \
-            token_t *xtok = atok->nsamples ? atok : btok; \
-            token_t *ytok = atok->nsamples ? btok : atok; \
-            assert( ytok->nvalues==1 ); \
-            if ( !bcf_double_is_missing_or_vector_end(ytok->values[0]) ) \
+            assert( atok->nvalues==1 ); \
+            if ( !bcf_double_is_missing_or_vector_end(atok->values[0]) ) \
             { \
-                for (i=0; i<xtok->nvalues; i++) \
+                for (i=0; i<btok->nvalues; i++) \
                 { \
-                    if ( bcf_double_is_missing_or_vector_end(xtok->values[i]) ) \
+                    if ( bcf_double_is_missing_or_vector_end(btok->values[i]) ) \
                     { \
                         bcf_double_set_missing(rtok->values[i]); \
                         continue; \
                     } \
                     has_values = 1; \
-                    rtok->values[i] = xtok->values[i] AOP ytok->values[0]; \
+                    rtok->values[i] = atok->values[0] AOP btok->values[i]; \
                 } \
             } \
         } \
@@ -1711,14 +2089,6 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
     return 2;
 }
 
-#define CMP_MISSING(atok,btok,CMP_OP,ret) \
-{ \
-    if ( (atok)->nsamples || (btok)->nsamples ) error("todo: Querying of missing values in FORMAT\n"); \
-    token_t *tok = (atok)->is_missing ? (btok) : (atok); \
-    (ret) = ( tok->nvalues CMP_OP 1 ) ? 0 : 1; \
-    tok->nvalues = 1; \
-}
-
 #define CMP_VECTORS(atok,btok,_rtok,CMP_OP,missing_logic) \
 { \
     token_t *rtok = _rtok; \
@@ -1821,31 +2191,56 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
                 } \
             } \
         } \
-        else \
+        else if ( atok->nsamples )\
+        { \
+            for (i=0; i<atok->nsamples; i++) \
+            { \
+                if ( !rtok->usmpl[i] ) continue; \
+                double *aptr = atok->values + i*atok->nval1; \
+                double *bptr = btok->values + i*btok->nval1; \
+                for (j=0; j<atok->nval1; j++) \
+                { \
+                    int miss = bcf_double_is_missing_or_vector_end(aptr[j]) ? 1 : 0; \
+                    if ( miss && !missing_logic[0] ) continue; /* any is missing => result is false */ \
+                    for (k=0; k<btok->nvalues; k++) \
+                    { \
+                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(bptr[k]) ? 1 : 0); \
+                        if ( nmiss ) \
+                        { \
+                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                        } \
+                        else if ( aptr[j] > 16777216 || bptr[k] > 16777216 ) /* Ugly, see #871 */ \
+                        { \
+                            if ( aptr[j] CMP_OP bptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                        } \
+                        else if ( (float)aptr[j] CMP_OP (float)bptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                    } \
+                } \
+            } \
+        } \
+        else /* btok->nsamples */ \
         { \
-            token_t *xtok = atok->nsamples ? atok : btok; \
-            token_t *ytok = atok->nsamples ? btok : atok; \
-            for (i=0; i<xtok->nsamples; i++) \
+            for (i=0; i<btok->nsamples; i++) \
             { \
                 if ( !rtok->usmpl[i] ) continue; \
-                double *xptr = xtok->values + i*xtok->nval1; \
-                double *yptr = ytok->values + i*ytok->nval1; \
-                for (j=0; j<xtok->nval1; j++) \
+                double *aptr = atok->values + i*atok->nval1; \
+                double *bptr = btok->values + i*btok->nval1; \
+                for (j=0; j<btok->nval1; j++) \
                 { \
-                    int miss = bcf_double_is_missing_or_vector_end(xptr[j]) ? 1 : 0; \
+                    int miss = bcf_double_is_missing_or_vector_end(bptr[j]) ? 1 : 0; \
                     if ( miss && !missing_logic[0] ) continue; /* any is missing => result is false */ \
-                    for (k=0; k<ytok->nvalues; k++) \
+                    for (k=0; k<atok->nvalues; k++) \
                     { \
-                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(yptr[k]) ? 1 : 0); \
+                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(aptr[k]) ? 1 : 0); \
                         if ( nmiss ) \
                         { \
-                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                         } \
-                        else if ( xptr[j] > 16777216 || yptr[k] > 16777216 ) /* Ugly, see #871 */ \
+                        else if ( bptr[j] > 16777216 || aptr[k] > 16777216 ) /* Ugly, see #871 */ \
                         { \
-                            if ( xptr[j] CMP_OP yptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                            if ( aptr[k] CMP_OP bptr[j] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                         } \
-                        else if ( (float)xptr[j] CMP_OP (float)yptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                        else if ( (float)aptr[k] CMP_OP (float)bptr[j] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                     } \
                 } \
             } \
@@ -2344,7 +2739,8 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         {
             int is_info = bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_INFO,tok->hdr_id) ? 1 : 0;
             is_fmt = bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FMT,tok->hdr_id) ? 1 : 0;
-            if ( is_info && is_fmt ) error("Both INFO/%s and FORMAT/%s exist, which one do you want?\n", tmp.s,tmp.s);
+            if ( is_info && is_fmt )
+                error("Error: ambiguous filtering expression, both INFO/%s and FORMAT/%s are defined in the VCF header.\n" , tmp.s,tmp.s);
         }
         if ( is_fmt==-1 ) is_fmt = 0;
     }
@@ -2833,6 +3229,7 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
     // Additionally, treat "." as missing value rather than a string in numeric equalities; that
     // @file is only used with ID; etc.
     // This code is fragile: improve me.
+    static int comma_separator_warned = 0;
     int i;
     for (i=0; i<nout; i++)
     {
@@ -2883,6 +3280,19 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( regcomp(out[j].regex, out[j].key, cflags) )
                 error("Could not compile the regex expression \"%s\": %s\n", out[j].key,filter->str);
         }
+        if ( out[i].is_str && out[i].tok_type==TOK_VAL && out[i].key && strchr(out[i].key,',') )
+        {
+            int print_note = 0;
+            if ( out[i+1].tok_type==TOK_EQ || (out[i+1].is_str && out[i+2].tok_type==TOK_EQ) ) print_note = 1;
+            else if ( out[i+1].tok_type==TOK_NE || (out[i+1].is_str && out[i+2].tok_type==TOK_NE) ) print_note = 1;
+            if ( print_note && !comma_separator_warned )
+            {
+                comma_separator_warned = 1;
+                fprintf(stderr,
+                    "Warning: comma is interpreted as a separator and OR logic is used in string comparisons.\n"
+                    "         (Search the manual for \"Comma in strings\" to learn more.)\n");
+            }
+        }
         if ( out[i].tok_type!=TOK_VAL ) continue;
         if ( !out[i].tag ) continue;
         if ( out[i].setter==filters_set_type )
@@ -2939,11 +3349,11 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
             int itok = i, ival;
             if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1;
-            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_EQ, ival = i - 1;
-            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NE, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_IN, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NOT_IN, ival = i - 1;
             else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) ival = ++i;
-            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_EQ, ival = ++i;
-            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NE, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_IN, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NOT_IN, ival = ++i;
             else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
             if ( out[ival].tok_type!=TOK_VAL || !out[ival].key )
                 error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
@@ -2976,6 +3386,12 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         else if ( out[i].tok_type==TOK_PHRED ) { out[i].func = func_phred; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_BINOM ) { out[i].func = func_binom; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_PERLSUB ) { out[i].func = perl_exec; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMAX ) { out[i].func = func_smpl_max; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMIN ) { out[i].func = func_smpl_min; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sAVG ) { out[i].func = func_smpl_avg; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMEDIAN ) { out[i].func = func_smpl_median; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sSTDEV ) { out[i].func = func_smpl_stddev; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sSUM ) { out[i].func = func_smpl_sum; out[i].tok_type = TOK_FUNC; }
         hts_expand0(double,1,out[i].mvalues,out[i].values);
         if ( filter->nsamples )
         {
@@ -3151,3 +3567,32 @@ int filter_max_unpack(filter_t *flt)
 {
     return flt->max_unpack;
 }
+
+const double *filter_get_doubles(filter_t *filter, int *nval, int *nval1)
+{
+    token_t *tok = filter->flt_stack[0];
+    if ( tok->nvalues )
+    {
+        *nval  = tok->nvalues;
+        *nval1 = tok->nval1;
+    }
+    else
+    {
+        if ( !tok->values ) error("fixme in filter_get_doubles(): %s\n", filter->str);
+        *nval  = 1;
+        *nval1 = 1;
+        tok->values[0] = filter->flt_stack[0]->pass_site;
+    }
+    return tok->values;
+}
+
+void filter_set_samples(filter_t *filter, const uint8_t *samples)
+{
+    int i,j;
+    for (i=0; i<filter->nfilters; i++)
+    {
+        if ( !filter->filters[i].nsamples ) continue;
+        for (j=0; j<filter->filters[i].nsamples; j++) filter->filters[i].usmpl[j] = samples[j];
+    }
+}
+
diff --git a/bcftools/filter.c.pysam.c b/bcftools/filter.c.pysam.c
index 2d1987a..8832633 100644
--- a/bcftools/filter.c.pysam.c
+++ b/bcftools/filter.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  filter.c -- filter expressions.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ THE SOFTWARE.  */
 #include <ctype.h>
 #include <stdlib.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <math.h>
 #include <sys/types.h>
@@ -58,27 +59,6 @@ static int filter_ninit = 0;
 #  define __FUNCTION__ __func__
 #endif
 
-static const uint64_t bcf_double_missing    = 0x7ff0000000000001;
-static const uint64_t bcf_double_vector_end = 0x7ff0000000000002;
-static inline void bcf_double_set(double *ptr, uint64_t value)
-{
-    union { uint64_t i; double d; } u;
-    u.i = value;
-    *ptr = u.d;
-}
-static inline int bcf_double_test(double d, uint64_t value)
-{
-    union { uint64_t i; double d; } u;
-    u.d = d;
-    return u.i==value ? 1 : 0;
-}
-#define bcf_double_set_vector_end(x) bcf_double_set(&(x),bcf_double_vector_end)
-#define bcf_double_set_missing(x)    bcf_double_set(&(x),bcf_double_missing)
-#define bcf_double_is_vector_end(x)  bcf_double_test((x),bcf_double_vector_end)
-#define bcf_double_is_missing(x)     bcf_double_test((x),bcf_double_missing)
-#define bcf_double_is_missing_or_vector_end(x)     (bcf_double_test((x),bcf_double_missing) || bcf_double_test((x),bcf_double_vector_end))
-
-
 typedef struct _token_t
 {
     // read-only values, same for all VCF lines
@@ -91,9 +71,9 @@ typedef struct _token_t
     int hdr_id, tag_type;   // BCF header lookup ID and one of BCF_HL_* types
     int idx;            // 0-based index to VCF vectors,
                         //  -2: list (e.g. [0,1,2] or [1..3] or [1..] or any field[*], which is equivalent to [0..])
-    int *idxs;          // set indexes to 0 to exclude, to 1 to include, and last element negative if unlimited
+    int *idxs;          // set indexes to 0 to exclude, to 1 to include, and last element negative if unlimited; used by VCF retrievers only
     int nidxs, nuidxs;  // size of idxs array and the number of elements set to 1
-    uint8_t *usmpl;     // bitmask of used samples as set by idx
+    uint8_t *usmpl;     // bitmask of used samples as set by idx, set for FORMAT fields, NULL otherwise
     int nsamples;       // number of samples for format fields, 0 for info and other fields
     void (*setter)(filter_t *, bcf1_t *, struct _token_t *);
     int (*func)(filter_t *, bcf1_t *, struct _token_t *rtok, struct _token_t **stack, int nstack);
@@ -160,11 +140,19 @@ struct _filter_t
 #define TOK_PHRED   29
 #define TOK_MEDIAN  30
 #define TOK_STDEV   31
-
-//                      0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-//                        ( ) [ < = > ] ! | &  +  -  *  /  M  m  a  A  O  ~  ^  S  .  l  f  c  p  b  P  i  s
-static int op_prec[] = {0,1,1,5,5,5,5,5,5,2,3, 6, 6, 7, 7, 8, 8, 8, 3, 2, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
-#define TOKEN_STRING "x()[<=>]!|&+-*/MmaAO~^S.lfcpis"
+#define TOK_sMAX    32
+#define TOK_sMIN    33
+#define TOK_sAVG    34
+#define TOK_sMEDIAN 35
+#define TOK_sSTDEV  36
+#define TOK_sSUM    37
+#define TOK_IN      38      // contains, e.g. FILTER~"A" 
+#define TOK_NOT_IN  39      // does not contain, e.g. FILTER!~"A" 
+
+//                      0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
+//                        ( ) [ < = > ] ! | &  +  -  *  /  M  m  a  A  O  ~  ^  S  .  l  f  c  p  b  P  i  s 
+static int op_prec[] = {0,1,1,5,5,5,5,5,5,2,3, 6, 6, 7, 7, 8, 8, 8, 3, 2, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
+#define TOKEN_STRING "x()[<=>]!|&+-*/MmaAO~^S.lfcpis"       // this is only for debugging, not maintained diligently
 
 // Return negative values if it is a function with variable number of arguments
 static int filters_next_token(char **str, int *len)
@@ -186,6 +174,20 @@ static int filters_next_token(char **str, int *len)
         tmp = *str;
     }
 
+    if ( !strncasecmp(tmp,"SMPL_MAX(",9) ) { (*str) += 8; return TOK_sMAX; }
+    if ( !strncasecmp(tmp,"SMPL_MIN(",9) ) { (*str) += 8; return TOK_sMIN; }
+    if ( !strncasecmp(tmp,"SMPL_MEAN(",10) ) { (*str) += 9; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"SMPL_MEDIAN(",12) ) { (*str) += 11; return TOK_sMEDIAN; }
+    if ( !strncasecmp(tmp,"SMPL_AVG(",9) ) { (*str) += 8; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"SMPL_STDEV(",11) ) { (*str) += 10; return TOK_sSTDEV; }
+    if ( !strncasecmp(tmp,"SMPL_SUM(",9) ) { (*str) += 8; return TOK_sSUM; }
+    if ( !strncasecmp(tmp,"sMAX(",5) ) { (*str) += 4; return TOK_sMAX; }
+    if ( !strncasecmp(tmp,"sMIN(",5) ) { (*str) += 4; return TOK_sMIN; }
+    if ( !strncasecmp(tmp,"sMEAN(",6) ) { (*str) += 5; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"sMEDIAN(",8) ) { (*str) += 7; return TOK_sMEDIAN; }
+    if ( !strncasecmp(tmp,"sAVG(",5) ) { (*str) += 4; return TOK_sAVG; }
+    if ( !strncasecmp(tmp,"sSTDEV(",7) ) { (*str) += 6; return TOK_sSTDEV; }
+    if ( !strncasecmp(tmp,"sSUM(",5) ) { (*str) += 4; return TOK_sSUM; }
     if ( !strncasecmp(tmp,"MAX(",4) ) { (*str) += 3; return TOK_MAX; }
     if ( !strncasecmp(tmp,"MIN(",4) ) { (*str) += 3; return TOK_MIN; }
     if ( !strncasecmp(tmp,"MEAN(",5) ) { (*str) += 4; return TOK_AVG; }
@@ -419,7 +421,7 @@ static void filters_cmp_bit_and(token_t *atok, token_t *btok, token_t *rtok, bcf
 static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
 {
     int i;
-    if ( rtok->tok_type==TOK_NE )  // AND logic: none of the filters can match
+    if ( rtok->tok_type==TOK_NOT_IN )
     {
         if ( !line->d.n_flt )
         {
@@ -432,7 +434,7 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
         rtok->pass_site = 1;
         return;
     }
-    else if ( rtok->tok_type==TOK_EQ ) // OR logic: at least one of the filters must match
+    else if ( rtok->tok_type==TOK_IN )
     {
         if ( !line->d.n_flt )
         {
@@ -443,8 +445,30 @@ static void filters_cmp_filter(token_t *atok, token_t *btok, token_t *rtok, bcf1
             if ( atok->hdr_id==line->d.flt[i] ) { rtok->pass_site = 1; return; }
         return;
     }
+    else if ( rtok->tok_type==TOK_NE )  // exact match
+    {
+        if ( !line->d.n_flt )
+        {
+            if ( atok->hdr_id==-1 ) return;   // missing value
+            rtok->pass_site = 1;
+            return; // no filter present, eval to true
+        }
+        if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) return;    // exact match, fail iff a single matching value is present
+        rtok->pass_site = 1;
+        return;
+    }
+    else if ( rtok->tok_type==TOK_EQ )  // exact match, pass iff a single matching value is present
+    {
+        if ( !line->d.n_flt )
+        {
+            if ( atok->hdr_id==-1 ) { rtok->pass_site = 1; return; }
+            return; // no filter present, eval to false
+        }
+        if ( line->d.n_flt==1 && atok->hdr_id==line->d.flt[0] ) rtok->pass_site = 1;
+        return;
+    }
     else 
-        error("Only == and != operators are supported for FILTER\n");
+        error("Only ==, !=, ~, and !~ operators are supported for FILTER\n");
     return;
 }
 static void filters_cmp_id(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *line)
@@ -516,7 +540,7 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
         case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int64_t); break;
         case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int64_t); break;
         case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), double); break;
-        default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); exit(1); break;
+        default: fprintf(bcftools_stderr,"todo: type %d\n", info->type); bcftools_exit(1); break;
     }
     #undef BRANCH
     return -1;  // this shouldn't happen
@@ -1038,54 +1062,46 @@ static void filters_set_nmissing(filter_t *flt, bcf1_t *line, token_t *tok)
         tok->nvalues = 0;
         return;
     }
-    if ( fmt->type!=BCF_BT_INT8 ) error("TODO: the GT fmt_type is not int8\n");
-
+    
     int j,nmissing = 0;
-    for (i=0; i<line->n_sample; i++)
-    {
-        int8_t *ptr = (int8_t*) (fmt->p + i*fmt->size);
-        for (j=0; j<fmt->n; j++)
-        {
-            if ( ptr[j]==bcf_int8_vector_end ) break;
-            if ( ptr[j]==bcf_gt_missing ) { nmissing++; break; }
-        }
+    #define BRANCH(type_t, is_vector_end) { \
+        for (i=0; i<line->n_sample; i++) \
+        { \
+            type_t *ptr = (type_t *) (fmt->p + i*fmt->size); \
+            for (j=0; j<fmt->n; j++) \
+            { \
+                if ( ptr[j]==is_vector_end ) break; \
+                if ( ptr[j]==bcf_gt_missing ) { nmissing++; break; } \
+            } \
+        } \
+    }
+    switch (fmt->type) {
+        case BCF_BT_INT8:  BRANCH(int8_t,  bcf_int8_vector_end); break;
+        case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_vector_end); break;
+        case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_vector_end); break;
+        default: fprintf(bcftools_stderr,"todo: type %d\n", fmt->type); bcftools_exit(1); break;
     }
+    #undef BRANCH
     tok->nvalues = 1;
     tok->values[0] = tok->tag[0]=='N' ? nmissing : (double)nmissing / line->n_sample;
 }
 static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
-    if ( nstack==0 ) error("Error parsing the expresion\n");
+    if ( nstack==0 ) error("Error parsing the expression\n");
     token_t *tok = stack[nstack - 1];
     if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", rtok->tag);
-
-    rtok->nsamples = tok->nsamples;
-    memcpy(rtok->pass_samples, tok->pass_samples, rtok->nsamples*sizeof(*rtok->pass_samples));
-
     assert(tok->usmpl);
-    if ( !rtok->usmpl )
-    {
-        rtok->usmpl = (uint8_t*) malloc(tok->nsamples*sizeof(*rtok->usmpl));
-        memcpy(rtok->usmpl, tok->usmpl, tok->nsamples*sizeof(*rtok->usmpl));
-    }
 
     int i, npass = 0;
-    for (i=0; i<rtok->nsamples; i++)
+    for (i=0; i<tok->nsamples; i++)
     {
-        if ( !rtok->usmpl[i] ) continue;
-        if ( rtok->pass_samples[i] ) npass++;
+        if ( !tok->usmpl[i] ) continue;
+        if ( tok->pass_samples[i] ) npass++;
     }
-
-    hts_expand(double,rtok->nsamples,rtok->mvalues,rtok->values);
-    double value = rtok->tag[0]=='N' ? npass : (line->n_sample ? 1.0*npass/line->n_sample : 0);
-    rtok->nval1 = 1;
-    rtok->nvalues = rtok->nsamples;
-
-    // Set per-sample status so that `query -i 'F_PASS(GT!="mis" & GQ >= 20) > 0.5'` or +trio-stats
-    // consider only the passing site AND samples. The values for failed samples is set to -1 so
-    // that it can never conflict with valid expressions.
-    for (i=0; i<rtok->nsamples; i++)
-        rtok->values[i] = rtok->pass_samples[i] ? value : -1;
+    hts_expand(double,1,rtok->mvalues,rtok->values);
+    rtok->nsamples = 0;
+    rtok->nvalues = 1;
+    rtok->values[0] = rtok->tag[0]=='N' ? npass : (line->n_sample ? 1.0*npass/line->n_sample : 0);
 
     return 1;
 }
@@ -1167,13 +1183,30 @@ static int func_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = -HUGE_VAL;
-    int i, has_value = 0;
-    for (i=0; i<tok->nvalues; i++)
+    double *ptr, val = -HUGE_VAL;
+    int i,j, has_value = 0;
+    if ( tok->nsamples )
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        has_value = 1;
-        if ( val < tok->values[i] ) val = tok->values[i];
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                has_value = 1;
+                if ( val < ptr[j] ) val = ptr[j];
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            has_value = 1;
+            if ( val < tok->values[i] ) val = tok->values[i];
+        }
     }
     if ( has_value )
     {
@@ -1182,18 +1215,65 @@ static int func_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_max(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_max(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = -HUGE_VAL;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            if ( val < ptr[j] ) val = ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = HUGE_VAL;
-    int i, has_value = 0;
-    for (i=0; i<tok->nvalues; i++)
+    double *ptr, val = HUGE_VAL;
+    int i,j, has_value = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                has_value = 1;
+                if ( val > ptr[j] ) val = ptr[j];
+            }
+        }
+    }
+    else
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        has_value = 1;
-        if ( val > tok->values[i] ) val = tok->values[i];
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            has_value = 1;
+            if ( val > tok->values[i] ) val = tok->values[i];
+        }
     }
     if ( has_value )
     {
@@ -1202,15 +1282,62 @@ static int func_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_min(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_min(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = HUGE_VAL;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            if ( val > ptr[j] ) val = ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    double val = 0;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    double *ptr, val = 0;
+    int i,j, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                val += ptr[j];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+            if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    }
     if ( n )
     {
         rtok->values[0] = val / n;
@@ -1218,6 +1345,34 @@ static int func_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_avg(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = 0;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( !bcf_double_is_missing_or_vector_end(ptr[j]) ) { val += ptr[j]; n++; }
+        }
+        if ( n ) rtok->values[i] = val / n;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int compare_doubles(const void *lhs, const void *rhs)
 {
     double arg1 = *(const double*) lhs;
@@ -1231,12 +1386,29 @@ static int func_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **sta
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
+    // sweep through all tok->values and while excluding all missing values reuse the very same array
+    int i,j,k = 0, n = 0;
+    if ( tok->nsamples )
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        if ( n < i ) tok->values[n] = tok->values[i];
-        n++;
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k += tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) continue;
+                if ( n < k ) tok->values[n] = tok->values[k];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            if ( n < i ) tok->values[n] = tok->values[i];
+            n++;
+        }
     }
     if ( !n ) return 1;
     if ( n==1 ) rtok->values[0] = tok->values[0];
@@ -1248,40 +1420,149 @@ static int func_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **sta
     rtok->nvalues = 1;
     return 1;
 }
+static int func_smpl_median(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            if ( n < j ) ptr[n] = ptr[j];
+            n++;
+        }
+        if ( n==0 )
+            bcf_double_set_missing(rtok->values[i]);
+        else if ( n==1 )
+            rtok->values[i] = ptr[0];
+        else
+        {
+            qsort(ptr, n, sizeof(double), compare_doubles);
+            rtok->values[i] = n % 2 ? ptr[n/2] : (ptr[n/2-1] + ptr[n/2]) * 0.5;
+        }
+    }
+    return 1;
+}
 static int func_stddev(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     rtok->nvalues = 0;
     if ( !tok->nvalues ) return 1;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
+    // sweep through all tok->values and while excluding all missing values reuse the very same array
+    int i,j,k = 0, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k += tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) continue;
+                if ( n < k ) tok->values[n] = tok->values[k];
+                n++;
+            }
+        }
+    }
+    else
     {
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
-        if ( n < i ) tok->values[n] = tok->values[i];
-        n++;
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) continue;
+            if ( n < i ) tok->values[n] = tok->values[i];
+            n++;
+        }
     }
     if ( !n ) return 1;
     if ( n==1 ) rtok->values[0] = 0;
     else
     {
         double sdev = 0, avg = 0;
-        for (i=0; i<n; i++) avg += tok->values[n];
+        for (i=0; i<n; i++) avg += tok->values[i];
         avg /= n;
-        for (i=0; i<n; i++) sdev += (tok->values[n] - avg) * (tok->values[n] - avg);
+        for (i=0; i<n; i++) sdev += (tok->values[i] - avg) * (tok->values[i] - avg);
         rtok->values[0] = sqrt(sdev/n);
     }
     rtok->nvalues = 1;
     return 1;
 }
+static int func_smpl_stddev(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, n;
+    double *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        n = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            if ( n < j ) ptr[n] = ptr[j];
+            n++;
+        }
+        if ( n==0 )
+            bcf_double_set_missing(rtok->values[i]);
+        else if ( n==1 )
+            rtok->values[i] = 0;
+        else
+        {
+            double sdev = 0, avg = 0;
+            for (j=0; j<n; j++) avg += ptr[j];
+            avg /= n;
+            for (j=0; j<n; j++) sdev += (ptr[j] - avg) * (ptr[j] - avg);
+            rtok->values[i] = sqrt(sdev/n);
+        }
+    }
+    return 1;
+}
 static int func_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     rtok->nvalues = 0;
     token_t *tok = stack[nstack - 1];
     if ( !tok->nvalues ) return 1;
-    double val = 0;
-    int i, n = 0;
-    for (i=0; i<tok->nvalues; i++)
-        if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    double *ptr, val = 0;
+    int i,j, n = 0;
+    if ( tok->nsamples )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) continue;
+            ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+                val += ptr[j];
+                n++;
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+            if ( !bcf_double_is_missing_or_vector_end(tok->values[i]) ) { val += tok->values[i]; n++; }
+    }
     if ( n )
     {
         rtok->values[0] = val;
@@ -1289,39 +1570,104 @@ static int func_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack,
     }
     return 1;
 }
+static int func_smpl_sum(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
+{
+    token_t *tok = stack[nstack - 1];
+    if ( !tok->nsamples ) return func_avg(flt,line,rtok,stack,nstack);
+    rtok->nsamples = tok->nsamples;
+    rtok->nvalues  = tok->nsamples;
+    rtok->nval1 = 1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    assert(tok->usmpl);
+    if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+    memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    int i, j, has_value;
+    double val, *ptr;
+    for (i=0; i<tok->nsamples; i++)
+    {
+        if ( !rtok->usmpl[i] ) continue;
+        val = 0;
+        has_value = 0;
+        ptr = tok->values + i*tok->nval1;
+        for (j=0; j<tok->nval1; j++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(ptr[j]) ) continue;
+            has_value = 1;
+            val += ptr[j];
+        }
+        if ( has_value ) rtok->values[i] = val;
+        else bcf_double_set_missing(rtok->values[i]);
+    }
+    return 1;
+}
 static int func_abs(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
     if ( tok->is_str ) error("ABS() can be applied only on numeric values\n");
-
+    rtok->nsamples = tok->nsamples;
     rtok->nvalues = tok->nvalues;
+    rtok->nval1 = tok->nval1;
+    hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
+    if ( tok->usmpl )
+    {
+        if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
+        memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
+    }
     if ( !tok->nvalues ) return 1;
     hts_expand(double, rtok->nvalues, rtok->mvalues, rtok->values);
-    int i;
-    for (i=0; i<tok->nvalues; i++)
-        if ( bcf_double_is_missing(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
-        else if ( !bcf_double_is_vector_end(tok->values[i]) ) rtok->values[i] = fabs(tok->values[i]);
+    int i,j,k = 0;
+    if ( tok->usmpl )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k+= tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) bcf_double_set_missing(rtok->values[k]);
+                else rtok->values[k] = fabs(tok->values[k]);
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( tok->usmpl && !tok->usmpl[i] ) continue;
+            if ( bcf_double_is_missing(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
+            else if ( !bcf_double_is_vector_end(tok->values[i]) ) rtok->values[i] = fabs(tok->values[i]);
+        }
+    }
     return 1;
 }
 static int func_count(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
 {
     token_t *tok = stack[nstack - 1];
-    int i, cnt = 0;
-    if ( !tok->nsamples )
+    int i,j, cnt = 0;
+    if ( tok->tag && tok->nsamples )
     {
-        if ( tok->is_str )
+        // raw number of values in a FMT tag, e.g. COUNT(FMT/TAG)
+        if ( tok->is_str ) error("todo: Type=String for COUNT on FORMAT fields?\n");
+        for (i=0; i<tok->nsamples; i++)
         {
-            if ( tok->str_value.l ) cnt = 1;
-            for (i=0; i<tok->str_value.l; i++) if ( tok->str_value.s[i]==',' ) cnt++;
+            if ( !tok->usmpl[i] ) continue;
+            double *ptr = tok->values + i*tok->nval1;
+            for (j=0; j<tok->nval1; j++)
+                if ( !bcf_double_is_missing_or_vector_end(ptr[j]) ) cnt++;
         }
-        else
-            cnt = tok->nvalues;
     }
-    else
+    else if ( tok->nsamples )
     {
+        // number of samples that pass a processed FMT tag
         for (i=0; i<tok->nsamples; i++)
             if ( tok->pass_samples[i] ) cnt++;
     }
+    else if ( tok->is_str )
+    {
+        if ( tok->str_value.l ) cnt = 1;
+        for (i=0; i<tok->str_value.l; i++) if ( tok->str_value.s[i]==',' ) cnt++;
+    }
+    else
+        cnt = tok->nvalues;
 
     rtok->nvalues = 1;
     rtok->values[0] = cnt;
@@ -1533,11 +1879,27 @@ static int func_phred(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stac
     if ( !tok->nvalues ) return 1;
 
     hts_expand(double, rtok->nvalues, rtok->mvalues, rtok->values);
-    int i;
-    for (i=0; i<tok->nvalues; i++)
-        if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
-        else rtok->values[i] = -4.34294481903*log(tok->values[i]);
-
+    int i,j,k = 0;
+    if ( tok->usmpl )
+    {
+        for (i=0; i<tok->nsamples; i++)
+        {
+            if ( !tok->usmpl[i] ) { k+= tok->nval1; continue; }
+            for (j=0; j<tok->nval1; k++,j++)
+            {
+                if ( bcf_double_is_missing_or_vector_end(tok->values[k]) ) bcf_double_set_missing(rtok->values[k]);
+                else rtok->values[k] = -4.34294481903*log(tok->values[k]);
+            }
+        }
+    }
+    else
+    {
+        for (i=0; i<tok->nvalues; i++)
+        {
+            if ( bcf_double_is_missing_or_vector_end(tok->values[i]) ) bcf_double_set_missing(rtok->values[i]);
+            else rtok->values[i] = -4.34294481903*log(tok->values[i]);
+        }
+    }
     return 1;
 }
 inline static void tok_init_values(token_t *atok, token_t *btok, token_t *rtok)
@@ -1557,7 +1919,8 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok)
         for (i=0; i<atok->nsamples; i++) rtok->usmpl[i] |= atok->usmpl[i];
         for (i=0; i<btok->nsamples; i++) rtok->usmpl[i] |= btok->usmpl[i];
     }
-    memset(rtok->pass_samples, 0, rtok->nsamples);
+    if (rtok->nsamples)
+        memset(rtok->pass_samples, 0, rtok->nsamples);
 }
 
 #define VECTOR_ARITHMETICS(atok,btok,_rtok,AOP) \
@@ -1582,22 +1945,37 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok)
                 rtok->values[i] = atok->values[i] AOP btok->values[i]; \
             } \
         } \
+        else if ( atok->nsamples ) \
+        { \
+            assert( btok->nvalues==1 ); \
+            if ( !bcf_double_is_missing_or_vector_end(btok->values[0]) ) \
+            { \
+                for (i=0; i<atok->nvalues; i++) \
+                { \
+                    if ( bcf_double_is_missing_or_vector_end(atok->values[i]) ) \
+                    { \
+                        bcf_double_set_missing(rtok->values[i]); \
+                        continue; \
+                    } \
+                    has_values = 1; \
+                    rtok->values[i] = atok->values[i] AOP btok->values[0]; \
+                } \
+            } \
+        } \
         else \
         { \
-            token_t *xtok = atok->nsamples ? atok : btok; \
-            token_t *ytok = atok->nsamples ? btok : atok; \
-            assert( ytok->nvalues==1 ); \
-            if ( !bcf_double_is_missing_or_vector_end(ytok->values[0]) ) \
+            assert( atok->nvalues==1 ); \
+            if ( !bcf_double_is_missing_or_vector_end(atok->values[0]) ) \
             { \
-                for (i=0; i<xtok->nvalues; i++) \
+                for (i=0; i<btok->nvalues; i++) \
                 { \
-                    if ( bcf_double_is_missing_or_vector_end(xtok->values[i]) ) \
+                    if ( bcf_double_is_missing_or_vector_end(btok->values[i]) ) \
                     { \
                         bcf_double_set_missing(rtok->values[i]); \
                         continue; \
                     } \
                     has_values = 1; \
-                    rtok->values[i] = xtok->values[i] AOP ytok->values[0]; \
+                    rtok->values[i] = atok->values[0] AOP btok->values[i]; \
                 } \
             } \
         } \
@@ -1713,14 +2091,6 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
     return 2;
 }
 
-#define CMP_MISSING(atok,btok,CMP_OP,ret) \
-{ \
-    if ( (atok)->nsamples || (btok)->nsamples ) error("todo: Querying of missing values in FORMAT\n"); \
-    token_t *tok = (atok)->is_missing ? (btok) : (atok); \
-    (ret) = ( tok->nvalues CMP_OP 1 ) ? 0 : 1; \
-    tok->nvalues = 1; \
-}
-
 #define CMP_VECTORS(atok,btok,_rtok,CMP_OP,missing_logic) \
 { \
     token_t *rtok = _rtok; \
@@ -1823,31 +2193,56 @@ static int vector_logic_and(filter_t *filter, bcf1_t *line, token_t *rtok, token
                 } \
             } \
         } \
-        else \
+        else if ( atok->nsamples )\
+        { \
+            for (i=0; i<atok->nsamples; i++) \
+            { \
+                if ( !rtok->usmpl[i] ) continue; \
+                double *aptr = atok->values + i*atok->nval1; \
+                double *bptr = btok->values + i*btok->nval1; \
+                for (j=0; j<atok->nval1; j++) \
+                { \
+                    int miss = bcf_double_is_missing_or_vector_end(aptr[j]) ? 1 : 0; \
+                    if ( miss && !missing_logic[0] ) continue; /* any is missing => result is false */ \
+                    for (k=0; k<btok->nvalues; k++) \
+                    { \
+                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(bptr[k]) ? 1 : 0); \
+                        if ( nmiss ) \
+                        { \
+                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                        } \
+                        else if ( aptr[j] > 16777216 || bptr[k] > 16777216 ) /* Ugly, see #871 */ \
+                        { \
+                            if ( aptr[j] CMP_OP bptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                        } \
+                        else if ( (float)aptr[j] CMP_OP (float)bptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = atok->nval1; break; } \
+                    } \
+                } \
+            } \
+        } \
+        else /* btok->nsamples */ \
         { \
-            token_t *xtok = atok->nsamples ? atok : btok; \
-            token_t *ytok = atok->nsamples ? btok : atok; \
-            for (i=0; i<xtok->nsamples; i++) \
+            for (i=0; i<btok->nsamples; i++) \
             { \
                 if ( !rtok->usmpl[i] ) continue; \
-                double *xptr = xtok->values + i*xtok->nval1; \
-                double *yptr = ytok->values + i*ytok->nval1; \
-                for (j=0; j<xtok->nval1; j++) \
+                double *aptr = atok->values + i*atok->nval1; \
+                double *bptr = btok->values + i*btok->nval1; \
+                for (j=0; j<btok->nval1; j++) \
                 { \
-                    int miss = bcf_double_is_missing_or_vector_end(xptr[j]) ? 1 : 0; \
+                    int miss = bcf_double_is_missing_or_vector_end(bptr[j]) ? 1 : 0; \
                     if ( miss && !missing_logic[0] ) continue; /* any is missing => result is false */ \
-                    for (k=0; k<ytok->nvalues; k++) \
+                    for (k=0; k<atok->nvalues; k++) \
                     { \
-                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(yptr[k]) ? 1 : 0); \
+                        int nmiss = miss + (bcf_double_is_missing_or_vector_end(aptr[k]) ? 1 : 0); \
                         if ( nmiss ) \
                         { \
-                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                            if ( missing_logic[nmiss] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                         } \
-                        else if ( xptr[j] > 16777216 || yptr[k] > 16777216 ) /* Ugly, see #871 */ \
+                        else if ( bptr[j] > 16777216 || aptr[k] > 16777216 ) /* Ugly, see #871 */ \
                         { \
-                            if ( xptr[j] CMP_OP yptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                            if ( aptr[k] CMP_OP bptr[j] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                         } \
-                        else if ( (float)xptr[j] CMP_OP (float)yptr[k] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = xtok->nval1; break; } \
+                        else if ( (float)aptr[k] CMP_OP (float)bptr[j] ) { rtok->pass_samples[i] = 1; rtok->pass_site = 1; j = btok->nval1; break; } \
                     } \
                 } \
             } \
@@ -2346,7 +2741,8 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
         {
             int is_info = bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_INFO,tok->hdr_id) ? 1 : 0;
             is_fmt = bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FMT,tok->hdr_id) ? 1 : 0;
-            if ( is_info && is_fmt ) error("Both INFO/%s and FORMAT/%s exist, which one do you want?\n", tmp.s,tmp.s);
+            if ( is_info && is_fmt )
+                error("Error: ambiguous filtering expression, both INFO/%s and FORMAT/%s are defined in the VCF header.\n" , tmp.s,tmp.s);
         }
         if ( is_fmt==-1 ) is_fmt = 0;
     }
@@ -2835,6 +3231,7 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
     // Additionally, treat "." as missing value rather than a string in numeric equalities; that
     // @file is only used with ID; etc.
     // This code is fragile: improve me.
+    static int comma_separator_warned = 0;
     int i;
     for (i=0; i<nout; i++)
     {
@@ -2885,6 +3282,19 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( regcomp(out[j].regex, out[j].key, cflags) )
                 error("Could not compile the regex expression \"%s\": %s\n", out[j].key,filter->str);
         }
+        if ( out[i].is_str && out[i].tok_type==TOK_VAL && out[i].key && strchr(out[i].key,',') )
+        {
+            int print_note = 0;
+            if ( out[i+1].tok_type==TOK_EQ || (out[i+1].is_str && out[i+2].tok_type==TOK_EQ) ) print_note = 1;
+            else if ( out[i+1].tok_type==TOK_NE || (out[i+1].is_str && out[i+2].tok_type==TOK_NE) ) print_note = 1;
+            if ( print_note && !comma_separator_warned )
+            {
+                comma_separator_warned = 1;
+                fprintf(bcftools_stderr,
+                    "Warning: comma is interpreted as a separator and OR logic is used in string comparisons.\n"
+                    "         (Search the manual for \"Comma in strings\" to learn more.)\n");
+            }
+        }
         if ( out[i].tok_type!=TOK_VAL ) continue;
         if ( !out[i].tag ) continue;
         if ( out[i].setter==filters_set_type )
@@ -2941,11 +3351,11 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
             if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str);
             int itok = i, ival;
             if ( out[i+1].tok_type==TOK_EQ || out[i+1].tok_type==TOK_NE ) ival = i - 1;
-            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_EQ, ival = i - 1;
-            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NE, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_LIKE ) out[i+1].tok_type = TOK_IN, ival = i - 1;
+            else if ( out[i+1].tok_type==TOK_NLIKE ) out[i+1].tok_type = TOK_NOT_IN, ival = i - 1;
             else if ( out[i+2].tok_type==TOK_EQ || out[i+2].tok_type==TOK_NE ) ival = ++i;
-            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_EQ, ival = ++i;
-            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NE, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_LIKE ) out[i+2].tok_type = TOK_IN, ival = ++i;
+            else if ( out[i+2].tok_type==TOK_NLIKE ) out[i+2].tok_type = TOK_NOT_IN, ival = ++i;
             else error("[%s:%d %s] Could not parse the expression: %s\n",  __FILE__,__LINE__,__FUNCTION__, filter->str);
             if ( out[ival].tok_type!=TOK_VAL || !out[ival].key )
                 error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str);
@@ -2978,6 +3388,12 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
         else if ( out[i].tok_type==TOK_PHRED ) { out[i].func = func_phred; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_BINOM ) { out[i].func = func_binom; out[i].tok_type = TOK_FUNC; }
         else if ( out[i].tok_type==TOK_PERLSUB ) { out[i].func = perl_exec; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMAX ) { out[i].func = func_smpl_max; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMIN ) { out[i].func = func_smpl_min; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sAVG ) { out[i].func = func_smpl_avg; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sMEDIAN ) { out[i].func = func_smpl_median; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sSTDEV ) { out[i].func = func_smpl_stddev; out[i].tok_type = TOK_FUNC; }
+        else if ( out[i].tok_type==TOK_sSUM ) { out[i].func = func_smpl_sum; out[i].tok_type = TOK_FUNC; }
         hts_expand0(double,1,out[i].mvalues,out[i].values);
         if ( filter->nsamples )
         {
@@ -3153,3 +3569,32 @@ int filter_max_unpack(filter_t *flt)
 {
     return flt->max_unpack;
 }
+
+const double *filter_get_doubles(filter_t *filter, int *nval, int *nval1)
+{
+    token_t *tok = filter->flt_stack[0];
+    if ( tok->nvalues )
+    {
+        *nval  = tok->nvalues;
+        *nval1 = tok->nval1;
+    }
+    else
+    {
+        if ( !tok->values ) error("fixme in filter_get_doubles(): %s\n", filter->str);
+        *nval  = 1;
+        *nval1 = 1;
+        tok->values[0] = filter->flt_stack[0]->pass_site;
+    }
+    return tok->values;
+}
+
+void filter_set_samples(filter_t *filter, const uint8_t *samples)
+{
+    int i,j;
+    for (i=0; i<filter->nfilters; i++)
+    {
+        if ( !filter->filters[i].nsamples ) continue;
+        for (j=0; j<filter->filters[i].nsamples; j++) filter->filters[i].usmpl[j] = samples[j];
+    }
+}
+
diff --git a/bcftools/filter.h b/bcftools/filter.h
index ccd3fe3..243e3b6 100644
--- a/bcftools/filter.h
+++ b/bcftools/filter.h
@@ -1,6 +1,6 @@
 /*  filter.h -- filter expressions.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -46,6 +46,18 @@ void filter_destroy(filter_t *filter);
   */
 int filter_test(filter_t *filter, bcf1_t *rec, const uint8_t **samples);
 
+/**
+  *  filter_set_samples() - restrict filtering expression to samples.
+  *             Call after filter_init().
+  *  @samples:  use samples set to 1, ignore samples set 0
+  */
+void filter_set_samples(filter_t *filter, const uint8_t *samples);
+
+/**
+  *  filter_get_doubles() - return a pointer to values from the last filter_test() evaluation
+  */
+const double *filter_get_doubles(filter_t *filter, int *nval, int *nval1);
+
 void filter_expression_info(FILE *fp);
 int filter_max_unpack(filter_t *filter);
 
diff --git a/bcftools/hclust.c b/bcftools/hclust.c
index 692fa54..945c70e 100644
--- a/bcftools/hclust.c
+++ b/bcftools/hclust.c
@@ -27,6 +27,7 @@
 #include <htslib/hts.h>
 #include <htslib/kstring.h>
 #include <stdlib.h>
+#include <assert.h>
 #include "bcftools.h"
 #include "hclust.h"
 
diff --git a/bcftools/hclust.c.pysam.c b/bcftools/hclust.c.pysam.c
index 29da67c..0a90af8 100644
--- a/bcftools/hclust.c.pysam.c
+++ b/bcftools/hclust.c.pysam.c
@@ -29,6 +29,7 @@
 #include <htslib/hts.h>
 #include <htslib/kstring.h>
 #include <stdlib.h>
+#include <assert.h>
 #include "bcftools.h"
 #include "hclust.h"
 
diff --git a/bcftools/htslib-1.10.2/LICENSE b/bcftools/htslib-1.10.2/LICENSE
deleted file mode 100644
index f70e757..0000000
--- a/bcftools/htslib-1.10.2/LICENSE
+++ /dev/null
@@ -1,69 +0,0 @@
-[Files in this distribution outwith the cram/ subdirectory are distributed
-according to the terms of the following MIT/Expat license.]
-
-The MIT/Expat License
-
-Copyright (C) 2012-2019 Genome Research Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-
-
-[Files within the cram/ subdirectory in this distribution are distributed
-according to the terms of the following Modified 3-Clause BSD license.]
-
-The Modified-BSD License
-
-Copyright (C) 2012-2019 Genome Research Ltd.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute
-   nor the names of its contributors may be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-[The use of a range of years within a copyright notice in this distribution
-should be interpreted as being equivalent to a list of years including the
-first and last year specified and all consecutive years between them.
-
-For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
-2011-2012" should be interpreted as being identical to a notice that reads
-"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
-that reads "Copyright (C) 2005-2012" should be interpreted as being identical
-to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
-2011, 2012".]
diff --git a/bcftools/htslib-1.10.2/README b/bcftools/htslib-1.10.2/README
deleted file mode 100644
index 4225bec..0000000
--- a/bcftools/htslib-1.10.2/README
+++ /dev/null
@@ -1,5 +0,0 @@
-HTSlib is an implementation of a unified C library for accessing common file
-formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing
-data.  It is the core library used by samtools and bcftools.
-
-See INSTALL for building and installation instructions.
diff --git a/bcftools/main.c b/bcftools/main.c
index 2e3e56d..f892711 100644
--- a/bcftools/main.c
+++ b/bcftools/main.c
@@ -1,6 +1,6 @@
 /*  main.c -- main bcftools command front-end.
 
-    Copyright (C) 2012-2018 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -58,7 +58,7 @@ int main_plugin(int argc, char *argv[]);
 #endif
 int main_consensus(int argc, char *argv[]);
 int main_csq(int argc, char *argv[]);
-int bam_mpileup(int argc, char *argv[]);
+int main_mpileup(int argc, char *argv[]);
 int main_sort(int argc, char *argv[]);
 
 typedef struct
@@ -164,7 +164,7 @@ static cmd_t cmds[] =
       .alias = "gtcheck",
       .help  = "check sample concordance, detect sample swaps and contamination"
     },
-    { .func  = bam_mpileup,
+    { .func  = main_mpileup,
         .alias = "mpileup",
         .help  = "multi-way pileup producing genotype likelihoods"
     },
@@ -251,7 +251,7 @@ int main(int argc, char *argv[])
     if (argc < 2) { usage(stderr); return 1; }
 
     if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
-        printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2019 Genome Research Ltd.\n", bcftools_version(), hts_version());
+        printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2021 Genome Research Ltd.\n", bcftools_version(), hts_version());
 #if USE_GPL
         printf("License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
 #else
diff --git a/bcftools/main.c.pysam.c b/bcftools/main.c.pysam.c
index c7cd4b0..bfd0f04 100644
--- a/bcftools/main.c.pysam.c
+++ b/bcftools/main.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  main.c -- main bcftools command front-end.
 
-    Copyright (C) 2012-2018 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -60,7 +60,7 @@ int main_plugin(int argc, char *argv[]);
 #endif
 int main_consensus(int argc, char *argv[]);
 int main_csq(int argc, char *argv[]);
-int bam_mpileup(int argc, char *argv[]);
+int main_mpileup(int argc, char *argv[]);
 int main_sort(int argc, char *argv[]);
 
 typedef struct
@@ -166,7 +166,7 @@ static cmd_t cmds[] =
       .alias = "gtcheck",
       .help  = "check sample concordance, detect sample swaps and contamination"
     },
-    { .func  = bam_mpileup,
+    { .func  = main_mpileup,
         .alias = "mpileup",
         .help  = "multi-way pileup producing genotype likelihoods"
     },
@@ -253,7 +253,7 @@ int bcftools_main(int argc, char *argv[])
     if (argc < 2) { usage(bcftools_stderr); return 1; }
 
     if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
-        fprintf(bcftools_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2019 Genome Research Ltd.\n", bcftools_version(), hts_version());
+        fprintf(bcftools_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2021 Genome Research Ltd.\n", bcftools_version(), hts_version());
 #if USE_GPL
         fprintf(bcftools_stdout, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
 #else
diff --git a/bcftools/mcall.c b/bcftools/mcall.c
index 325093d..e96d41d 100644
--- a/bcftools/mcall.c
+++ b/bcftools/mcall.c
@@ -1,6 +1,6 @@
 /*  mcall.c -- multiallelic and rare variant calling.
 
-    Copyright (C) 2012-2016 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -22,11 +22,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
+#include <assert.h>
 #include <math.h>
 #include <inttypes.h>
+#include <ctype.h>
 #include <htslib/kfunc.h>
 #include <htslib/khash_str2int.h>
 #include "call.h"
+#include "prob1.h"
 
 // Using priors for GTs does not seem to be mathematically justified. Although
 // it seems effective in removing false calls, it also flips a significant
@@ -38,6 +41,7 @@ THE SOFTWARE.  */
 // genotypes is reported instead.
 #define FLAT_PDG_FOR_MISSING 0
 
+int test16(float *anno16, anno16_t *a);
 
 void qcall_init(call_t *call) { return; }
 void qcall_destroy(call_t *call) { return; }
@@ -249,19 +253,46 @@ static void init_sample_groups(call_t *call)
     if ( !call->sample_groups )
     {
         // standard pooled calling, all samples in the same group
-        grp_t *grps = &call->smpl_grp;
-        grps->ngrp = 1;
-        grps->grp  = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)calloc(nsmpl,sizeof(int));
+        call->nsmpl_grp = 1;
+        call->smpl_grp  = (smpl_grp_t*)calloc(1,sizeof(*call->smpl_grp));
+        call->smpl_grp[0].nsmpl = nsmpl;
+        call->smpl_grp[0].smpl  = (uint32_t*)calloc(call->smpl_grp[0].nsmpl,sizeof(uint32_t));
+        for (i=0; i<nsmpl; i++)
+            call->smpl_grp[0].smpl[i] = i;
+        return;
+    }
+
+    if ( call->sample_groups_tag )
+    {
+        // Is the tag defined in the header?
+        int tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->sample_groups_tag);
+        if ( tag_id==-1 ) error("No such tag \"%s\"\n",call->sample_groups_tag);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) )  error("No such FORMAT tag \"%s\"\n", call->sample_groups_tag);
+    }
+    else
+    {
+        int tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,"QS");
+        if ( tag_id >= 0 && bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) ) call->sample_groups_tag = "QS";
+        else
+        {
+            tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,"AD");
+            if ( tag_id >= 0 && bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) ) call->sample_groups_tag = "AD";
+            else error("Error: neither \"AD\" nor \"QS\" FORMAT tag exists and no alternative given with -G\n");
+        }
     }
-    else if ( !strcmp("-",call->sample_groups) )
+
+    // Read samples/groups
+    if ( !strcmp("-",call->sample_groups) )
     {
         // single-sample calling, each sample creates its own group
-        grp_t *grps = &call->smpl_grp;
-        grps->ngrp = nsmpl;
-        grps->grp  = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)malloc(nsmpl*sizeof(int));
-        for (i=0; i<nsmpl; i++) grps->smpl2grp[i] = i;
+        call->nsmpl_grp = nsmpl;
+        call->smpl_grp  = (smpl_grp_t*)calloc(nsmpl,sizeof(*call->smpl_grp));
+        for (i=0; i<nsmpl; i++)
+        {
+            call->smpl_grp[i].nsmpl = 1;
+            call->smpl_grp[i].smpl  = (uint32_t*)calloc(call->smpl_grp[i].nsmpl,sizeof(uint32_t));
+            call->smpl_grp[i].smpl[0] = i;
+        }
     }
     else
     {
@@ -269,40 +300,49 @@ static void init_sample_groups(call_t *call)
         char **lines = hts_readlist(call->sample_groups, 1, &nlines);
         if ( !lines ) error("Could not read the file: %s\n", call->sample_groups);
 
-        uint32_t *smpl2grp1 = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
+        uint32_t *smpl2grp = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
+        uint32_t *grp2n = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
         void *grp2idx = khash_str2int_init();
 
-        grp_t *grps = &call->smpl_grp;
+        call->nsmpl_grp = 0;
         for (i=0; i<nlines; i++)
         {
             char *ptr = lines[i];
-            while ( *ptr && *ptr!='\t' ) ptr++;
+            while ( *ptr && !isspace(*ptr) ) ptr++;
             if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]);
-            *ptr = 0;
+            char *tmp = ptr;
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]);
+            *tmp = 0;
             int ismpl = bcf_hdr_id2int(call->hdr, BCF_DT_SAMPLE, lines[i]); 
             if ( ismpl<0 ) continue;
-            if ( smpl2grp1[ismpl] ) error("Error: the sample \"%s\" is listed twice in %s\n", lines[i],call->sample_groups);
+            if ( smpl2grp[ismpl] ) error("Error: the sample \"%s\" is listed twice in %s\n", lines[i],call->sample_groups);
             if ( !khash_str2int_has_key(grp2idx,ptr+1) )
             {
-                khash_str2int_inc(grp2idx, ptr+1);
-                grps->ngrp++;
+                khash_str2int_set(grp2idx, ptr+1, call->nsmpl_grp);
+                call->nsmpl_grp++;
             }
-            int igrp;
-            if ( khash_str2int_get(grp2idx, ptr+1, &igrp)==0 )
-                smpl2grp1[ismpl] = igrp+1;
-            else
+            int igrp = -1;
+            if ( khash_str2int_get(grp2idx, ptr+1, &igrp)!=0 )
                 error("This should not happen, fixme: %s\n",ptr+1);
+            grp2n[igrp]++;
+            smpl2grp[ismpl] = igrp+1;   // +1 to distinguish unlisted samples
         }
         khash_str2int_destroy(grp2idx);
+        if ( !call->nsmpl_grp ) error("Could not parse the file, no matching samples found: %s\n", call->sample_groups);
 
-        grps->grp = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)malloc(nsmpl*sizeof(int));
+        call->smpl_grp = (smpl_grp_t*)calloc(call->nsmpl_grp,sizeof(*call->smpl_grp));
         for (i=0; i<nsmpl; i++)
         {
-            if ( !smpl2grp1[i] ) error("Error: The sample \"%s\" is not listed in %s\n",call->hdr->samples[i],call->sample_groups);
-            grps->smpl2grp[i] = smpl2grp1[i] - 1;
+            if ( !smpl2grp[i] ) error("Error: The sample \"%s\" is not listed in %s\n",call->hdr->samples[i],call->sample_groups);
+            int igrp = smpl2grp[i] - 1;
+            if ( !call->smpl_grp[igrp].nsmpl ) 
+                call->smpl_grp[igrp].smpl = (uint32_t*)calloc(grp2n[igrp],sizeof(uint32_t));
+            call->smpl_grp[igrp].smpl[call->smpl_grp[igrp].nsmpl] = i;
+            call->smpl_grp[igrp].nsmpl++;
         }
-        free(smpl2grp1);
+        free(smpl2grp);
+        free(grp2n);
         for (i=0; i<nlines; i++) free(lines[i]);
         free(lines);
     }
@@ -310,15 +350,17 @@ static void init_sample_groups(call_t *call)
 static void destroy_sample_groups(call_t *call)
 {
     int i;
-    grp_t *grps = &call->smpl_grp;
-    for (i=0; i<grps->ngrp; i++)
-        free(grps->grp[i].qsum);
-    free(grps->grp);
-    free(grps->smpl2grp);
+    for (i=0; i<call->nsmpl_grp; i++)
+    {
+        free(call->smpl_grp[i].qsum);
+        free(call->smpl_grp[i].smpl);
+    }
+    free(call->smpl_grp);
 }
 
 void mcall_init(call_t *call)
 {
+    init_sample_groups(call);
     call_init_pl2p(call);
 
     call->nals_map = 5;
@@ -341,15 +383,15 @@ void mcall_init(call_t *call)
     if ( call->output_tags & CALL_FMT_GQ )
         bcf_hdr_append(call->hdr,"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Phred-scaled Genotype Quality\">");
     if ( call->output_tags & CALL_FMT_GP )
-        bcf_hdr_append(call->hdr,"##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Phred-scaled genotype posterior probabilities\">");
+        bcf_hdr_append(call->hdr,"##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype posterior probabilities in the range 0 to 1\">");
     if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
         call->GQs = (int32_t*) malloc(sizeof(int32_t)*bcf_hdr_nsamples(call->hdr));
-    bcf_hdr_append(call->hdr,"##INFO=<ID=ICB,Number=1,Type=Float,Description=\"Inbreeding Coefficient Binomial test (bigger is better)\">");
-    bcf_hdr_append(call->hdr,"##INFO=<ID=HOB,Number=1,Type=Float,Description=\"Bias in the number of HOMs number (smaller is better)\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes for each ALT allele, in the same order as listed\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Average mapping quality\">");
+    if ( call->output_tags & CALL_FMT_PV4 )
+        bcf_hdr_append(call->hdr,"##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n");
 
     // init the prior
     if ( call->theta>0 )
@@ -372,8 +414,6 @@ void mcall_init(call_t *call)
         }
         call->theta = log(call->theta);
     }
-
-    init_sample_groups(call);
 }
 
 void mcall_destroy(call_t *call)
@@ -394,7 +434,6 @@ void mcall_destroy(call_t *call)
     free(call->pdg);
     free(call->als);
     free(call->ac);
-    free(call->qsum);
     return;
 }
 
@@ -505,14 +544,14 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
 }
 
 // Create mapping between old and new (trimmed) alleles
-void init_allele_trimming_maps(call_t *call, int als, int nals)
+void init_allele_trimming_maps(call_t *call, int nals_ori, int als_out)
 {
-    int i, j;
+    int i, j, nout = 0;
 
     // als_map: old(i) -> new(j)
-    for (i=0, j=0; i<nals; i++)
+    for (i=0; i<nals_ori; i++)
     {
-        if ( als & 1<<i ) call->als_map[i] = j++;
+        if ( als_out & (1<<i) ) call->als_map[i] = nout++;
         else call->als_map[i] = -1;
     }
 
@@ -520,85 +559,16 @@ void init_allele_trimming_maps(call_t *call, int als, int nals)
 
     // pl_map: new(k) -> old(l)
     int k = 0, l = 0;
-    for (i=0; i<nals; i++)
+    for (i=0; i<nals_ori; i++)
     {
         for (j=0; j<=i; j++)
         {
-            if ( (als & 1<<i) && (als & 1<<j) ) call->pl_map[k++] = l;
+            if ( (als_out & (1<<i)) && (als_out & (1<<j)) ) call->pl_map[k++] = l;
             l++;
         }
     }
 }
 
-double binom_dist(int N, double p, int k)
-{
-    int mean = (int) (N*p);
-    if ( mean==k ) return 1.0;
-
-    double log_p = (k-mean)*log(p) + (mean-k)*log(1.0-p);
-    if ( k > N - k ) k = N - k;
-    if ( mean > N - mean ) mean = N - mean;
-
-    if ( k < mean ) { int tmp = k; k = mean; mean = tmp; }
-    double diff = k - mean;
-
-    double val = 1.0;
-    int i;
-    for (i=0; i<diff; i++)
-        val = val * (N-mean-i) / (k-i);
-
-    return exp(log_p)/val;
-}
-
-
-// Inbreeding Coefficient, binomial test
-float calc_ICB(int nref, int nalt, int nhets, int ndiploid)
-{
-    if ( !nref || !nalt || !ndiploid ) return HUGE_VAL;
-
-    double fref = (double)nref/(nref+nalt); // fraction of reference allelels
-    double falt = (double)nalt/(nref+nalt); // non-ref als
-    double q = 2*fref*falt;                 // probability of a het, assuming HWE
-    double mean = q*ndiploid;
-
-    //fprintf(stderr,"\np=%e N=%d k=%d  .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
-
-    // Can we use normal approximation? The second condition is for performance only
-    // and is not well justified.
-    if ( (mean>10 && (1-q)*ndiploid>10 ) || ndiploid>200 )
-    {
-        //fprintf(stderr,"out: mean=%e  p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
-        return exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q)));
-    }
-
-    return binom_dist(ndiploid, q, nhets);
-}
-
-float calc_HOB(int nref, int nalt, int nhets, int ndiploid)
-{
-    if ( !nref || !nalt || !ndiploid ) return HUGE_VAL;
-
-    double fref = (double)nref/(nref+nalt); // fraction of reference allelels
-    double falt = (double)nalt/(nref+nalt); // non-ref als
-    return fabs((double)nhets/ndiploid - 2*fref*falt);
-}
-
-/**
-  *  log(sum_i exp(a_i))
-  */
-// static inline double logsumexp(double *vals, int nvals)
-// {
-//     int i;
-//     double max_exp = vals[0];
-//     for (i=1; i<nvals; i++)
-//         if ( max_exp < vals[i] ) max_exp = vals[i];
-
-//     double sum = 0;
-//     for (i=0; i<nvals; i++)
-//         sum += exp(vals[i] - max_exp);
-
-//     return log(sum) + max_exp;
-// }
 /** log(exp(a)+exp(b)) */
 static inline double logsumexp2(double a, double b)
 {
@@ -610,7 +580,7 @@ static inline double logsumexp2(double a, double b)
 
 // Macro to set the most likely alleles
 #define UPDATE_MAX_LKs(als,sum) { \
-     if ( max_lk<lk_tot ) { max_lk = lk_tot; max_als = (als); } \
+     if ( max_lk<lk_tot && lk_tot_set ) { max_lk = lk_tot; max_als = (als); } \
      if ( sum ) lk_sum = logsumexp2(lk_tot,lk_sum); \
 }
 
@@ -618,14 +588,13 @@ static inline double logsumexp2(double a, double b)
 
 // Determine the most likely combination of alleles. In this implementation,
 // at most tri-allelic sites are considered. Returns the number of alleles.
-static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
+static int mcall_find_best_alleles(call_t *call, int nals, smpl_grp_t *grp)
 {
-    int j;
     int ia,ib,ic;   // iterators over up to three alleles
     int max_als=0;  // most likely combination of alleles
-    double ref_lk = 0, max_lk = -HUGE_VAL; // likelihood of the reference and of most likely combination of alleles
+    double ref_lk = -HUGE_VAL, max_lk = -HUGE_VAL; // likelihood of the reference and of most likely combination of alleles
     double lk_sum = -HUGE_VAL;    // for normalizing the likelihoods
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
+    int nsmpl = grp->nsmpl;
     int ngts  = nals*(nals+1)/2;
 
     // Single allele
@@ -634,60 +603,45 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         double lk_tot  = 0;
         int lk_tot_set = 0;
         int iaa = (ia+1)*(ia+2)/2-1;    // index in PL which corresponds to the homozygous "ia/ia" genotype
-        int isample;
-        double *pdg = call->pdg + iaa;
-        for (isample=0; isample<nsmpl; isample++)
+        int ismpl;
+        for (ismpl=0; ismpl<nsmpl; ismpl++)
         {
+            double *pdg = call->pdg + grp->smpl[ismpl]*ngts + iaa;
             if ( *pdg ) { lk_tot += log(*pdg); lk_tot_set = 1; }
-            pdg += ngts;
         }
         if ( ia==0 ) ref_lk = lk_tot;   // likelihood of 0/0 for all samples
         else lk_tot += call->theta; // the prior
         UPDATE_MAX_LKs(1<<ia, ia>0 && lk_tot_set);
     }
 
-    grp_t *grps = &call->smpl_grp;
-
     // Two alleles
     if ( nals>1 )
     {
         for (ia=0; ia<nals; ia++)
         {
-            if ( grps->ngrp==1 && grps->grp[0].qsum[ia]==0 ) continue;
+            if ( grp->qsum[ia]==0 ) continue;
             int iaa = (ia+1)*(ia+2)/2-1;
             for (ib=0; ib<ia; ib++)
             {
-                if ( grps->ngrp==1 && grps->grp[0].qsum[ib]==0 ) continue;
+                if ( grp->qsum[ib]==0 ) continue;
                 double lk_tot  = 0;
                 int lk_tot_set = 0;
-                int ia_cov = 0, ib_cov = 0;
-                for (j=0; j<grps->ngrp; j++)
+                double fa  = grp->qsum[ia]/(grp->qsum[ia] + grp->qsum[ib]);
+                double fb  = grp->qsum[ib]/(grp->qsum[ia] + grp->qsum[ib]);
+                double fa2 = fa*fa;
+                double fb2 = fb*fb;
+                double fab = 2*fa*fb;
+                int is, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
+                for (is=0; is<nsmpl; is++)
                 {
-                    grp1_t *grp = &grps->grp[j];
-                    if ( grp->qsum[ia] ) ia_cov = 1;
-                    if ( grp->qsum[ib] ) ib_cov = 1;
-                    if ( !grp->qsum[ia] && !grp->qsum[ib] ) { grp->dp = 0; continue; }
-                    grp->dp  = 1;
-                    grp->fa  = grp->qsum[ia]/(grp->qsum[ia]+grp->qsum[ib]);
-                    grp->fb  = grp->qsum[ib]/(grp->qsum[ia]+grp->qsum[ib]);
-                    grp->fa2 = grp->fa*grp->fa;
-                    grp->fb2 = grp->fb*grp->fb;
-                    grp->fab = 2*grp->fa*grp->fb;
-                }
-                if ( !ia_cov || !ib_cov ) continue;
-                int isample, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
-                double *pdg  = call->pdg;
-                for (isample=0; isample<nsmpl; isample++)
-                {
-                    grp1_t *grp = &grps->grp[grps->smpl2grp[isample]];
-                    if ( !grp->dp ) continue;
+                    int ismpl = grp->smpl[is];
+                    double *pdg = call->pdg + ismpl*ngts;
                     double val = 0;
-                    if ( !call->ploidy || call->ploidy[isample]==2 )
-                        val = grp->fa2*pdg[iaa] + grp->fb2*pdg[ibb] + grp->fab*pdg[iab];
-                    else if ( call->ploidy && call->ploidy[isample]==1 )
-                        val = grp->fa*pdg[iaa] + grp->fb*pdg[ibb];
+                    if ( !call->ploidy || call->ploidy[ismpl]==2 )
+                        val = fa2*pdg[iaa] + fb2*pdg[ibb] + fab*pdg[iab];
+                    else if ( call->ploidy && call->ploidy[ismpl]==1 )
+                        val = fa*pdg[iaa] + fb*pdg[ibb];
                     if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
-                    pdg += ngts;
                 }
                 if ( ia!=0 ) lk_tot += call->theta;    // the prior
                 if ( ib!=0 ) lk_tot += call->theta;
@@ -701,50 +655,38 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
     {
         for (ia=0; ia<nals; ia++)
         {
-            if ( grps->ngrp==1 && grps->grp[0].qsum[ia]==0 ) continue;
+            if ( grp->qsum[ia]==0 ) continue;
             int iaa = (ia+1)*(ia+2)/2-1;
             for (ib=0; ib<ia; ib++)
             {
-                if (  grps->ngrp==1 && grps->grp[0].qsum[ib]==0 ) continue;
+                if ( grp->qsum[ib]==0 ) continue;
                 int ibb = (ib+1)*(ib+2)/2-1;
                 int iab = iaa - ia + ib;
                 for (ic=0; ic<ib; ic++)
                 {
-                    if (  grps->ngrp==1 && grps->grp[0].qsum[ic]==0 ) continue;
+                    if ( grp->qsum[ic]==0 ) continue;
                     double lk_tot  = 0;
-                    int lk_tot_set = 1;
-                    int ia_cov = 0, ib_cov = 0, ic_cov = 0;
-                    for (j=0; j<grps->ngrp; j++)
-                    {
-                        grp1_t *grp = &grps->grp[j];
-                        if ( grp->qsum[ia] ) ia_cov = 1;
-                        if ( grp->qsum[ib] ) ib_cov = 1;
-                        if ( grp->qsum[ic] ) ic_cov = 1;
-                        if ( !grp->qsum[ia] && !grp->qsum[ib] && !grp->qsum[ic] ) { grp->dp = 0; continue; }
-                        grp->dp  = 1;
-                        grp->fa  = grp->qsum[ia]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fb  = grp->qsum[ib]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fc  = grp->qsum[ic]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fa2 = grp->fa*grp->fa;
-                        grp->fb2 = grp->fb*grp->fb;
-                        grp->fc2 = grp->fc*grp->fc;
-                        grp->fab = 2*grp->fa*grp->fb, grp->fac = 2*grp->fa*grp->fc, grp->fbc = 2*grp->fb*grp->fc;
-                    }
-                    if ( !ia_cov || !ib_cov || !ic_cov ) continue;
-                    int isample, icc = (ic+1)*(ic+2)/2-1;
+                    int lk_tot_set = 0;
+
+                    double fa  = grp->qsum[ia]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fb  = grp->qsum[ib]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fc  = grp->qsum[ic]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fa2 = fa*fa;
+                    double fb2 = fb*fb;
+                    double fc2 = fc*fc;
+                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc;
+                    int is, icc = (ic+1)*(ic+2)/2-1;
                     int iac = iaa - ia + ic, ibc = ibb - ib + ic;
-                    double *pdg = call->pdg;
-                    for (isample=0; isample<nsmpl; isample++)
+                    for (is=0; is<nsmpl; is++)
                     {
-                        grp1_t *grp = &grps->grp[grps->smpl2grp[isample]];
-                        if ( !grp->dp ) continue;
+                        int ismpl = grp->smpl[is];
+                        double *pdg = call->pdg + ismpl*ngts;
                         double val = 0;
-                        if ( !call->ploidy || call->ploidy[isample]==2 )
-                            val = grp->fa2*pdg[iaa] + grp->fb2*pdg[ibb] + grp->fc2*pdg[icc] + grp->fab*pdg[iab] + grp->fac*pdg[iac] + grp->fbc*pdg[ibc];
-                        else if ( call->ploidy && call->ploidy[isample]==1 )
-                            val = grp->fa*pdg[iaa] + grp->fb*pdg[ibb] + grp->fc*pdg[icc];
+                        if ( !call->ploidy || call->ploidy[ismpl]==2 )
+                            val = fa2*pdg[iaa] + fb2*pdg[ibb] + fc2*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
+                        else if ( call->ploidy && call->ploidy[ismpl]==1 )
+                            val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc];
                         if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
-                        pdg += ngts;
                     }
                     if ( ia!=0 ) lk_tot += call->theta;    // the prior
                     if ( ib!=0 ) lk_tot += call->theta;    // the prior
@@ -755,25 +697,26 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         }
     }
 
-    call->ref_lk = ref_lk;
-    call->lk_sum = lk_sum;
-    *out_als = max_als;
-
     int i, n = 0;
     for (i=0; i<nals; i++) if ( max_als & 1<<i) n++;
 
+    grp->max_lk = max_lk;
+    grp->ref_lk = ref_lk;
+    grp->lk_sum = lk_sum;
+    grp->als  = max_als;
+    grp->nals = n;
+
     return n;
 }
 
-static void mcall_set_ref_genotypes(call_t *call, int nals)
+// Sets GT=0/0 or GT=. if PL=0,0,0
+static void mcall_set_ref_genotypes(call_t *call, int nals_ori)
 {
     int i;
-    int ngts  = nals*(nals+1)/2;
+    int ngts  = nals_ori*(nals_ori+1)/2;            // need this to distinguish between GT=0/0 vs GT=.
     int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-    for (i=0; i<nals; i++) call->ac[i] = 0;
-    call->nhets = 0;
-    call->ndiploid = 0;
+    for (i=0; i<nals_ori; i++) call->ac[i] = 0;     // nals_new<=nals_ori, never mind setting extra 0's
 
     // Set all genotypes to 0/0 or 0
     int *gts    = call->gts;
@@ -799,34 +742,27 @@ static void mcall_set_ref_genotypes(call_t *call, int nals)
     }
 }
 
-static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+static void mcall_call_genotypes(call_t *call, int nals_ori, smpl_grp_t *grp)
 {
     int ia, ib, i;
-    int ngts  = nals*(nals+1)/2;
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
-    int nout_gts = nout_als*(nout_als+1)/2;
-    hts_expand(float,nout_gts*nsmpl,call->nGPs,call->GPs);
-
-    for (i=0; i<nout_als; i++) call->ac[i] = 0;
-    call->nhets = 0;
-    call->ndiploid = 0;
+    int ngts_ori = nals_ori*(nals_ori+1)/2; 
+    int ngts_new = call->nals_new*(call->nals_new+1)/2;
+    int nsmpl = grp->nsmpl;
 
     #if USE_PRIOR_FOR_GTS
         float prior = exp(call->theta);
     #endif
-    float *gps  = call->GPs - nout_gts;
-    double *pdg = call->pdg - ngts;
-    int *gts  = call->gts - 2;
 
-    int isample;
-    for (isample = 0; isample < nsmpl; isample++)
+    int is;
+    for (is = 0; is < nsmpl; is++)
     {
-        int ploidy = call->ploidy ? call->ploidy[isample] : 2;
-        assert( ploidy>=0 && ploidy<=2 );
+        int ismpl   = grp->smpl[is];
+        double *pdg = call->pdg + ismpl*ngts_ori;
+        float *gps  = call->GPs + ismpl*ngts_new;
+        int *gts    = call->gts + ismpl*2;
 
-        pdg += ngts;
-        gts += 2;
-        gps += nout_gts;
+        int ploidy = call->ploidy ? call->ploidy[ismpl] : 2;
+        assert( ploidy>=0 && ploidy<=2 );
 
         if ( !ploidy )
         {
@@ -838,8 +774,8 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
 
         #if !FLAT_PDG_FOR_MISSING
             // Skip samples with zero depth, they have all pdg's equal to 0
-            for (i=0; i<ngts; i++) if ( pdg[i]!=0.0 ) break;
-            if ( i==ngts )
+            for (i=0; i<ngts_ori; i++) if ( pdg[i]!=0.0 ) break;
+            if ( i==ngts_ori )
             {
                 gts[0] = bcf_gt_missing;
                 gts[1] = ploidy==2 ? bcf_gt_missing : bcf_int32_vector_end;
@@ -848,19 +784,16 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
             }
         #endif
 
-        if ( ploidy==2 ) call->ndiploid++;
-
         // Default fallback for the case all LKs are the same
         gts[0] = bcf_gt_unphased(0);
         gts[1] = ploidy==2 ? bcf_gt_unphased(0) : bcf_int32_vector_end;
 
         // Non-zero depth, determine the most likely genotype
-        grp1_t *grp = &call->smpl_grp.grp[call->smpl_grp.smpl2grp[isample]];
         double best_lk = 0;
-        for (ia=0; ia<nals; ia++)
+        for (ia=0; ia<nals_ori; ia++)
         {
-            if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
-            int iaa = (ia+1)*(ia+2)/2-1;            // PL index of the ia/ia genotype
+            if ( !(grp->als & 1<<ia) ) continue;    // ia-th allele not in the final selection, skip
+            int iaa = (ia+1)*(ia+2)/2-1;                // PL index of the ia/ia genotype
             double lk = ploidy==2 ? pdg[iaa]*grp->qsum[ia]*grp->qsum[ia] : pdg[iaa]*grp->qsum[ia];
             #if USE_PRIOR_FOR_GTS
                 if ( ia!=0 ) lk *= prior;
@@ -876,13 +809,13 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         if ( ploidy==2 )
         {
             gts[1] = gts[0];
-            for (ia=0; ia<nals; ia++)
+            for (ia=0; ia<nals_ori; ia++)
             {
-                if ( !(out_als & 1<<ia) ) continue;
+                if ( !(grp->als & 1<<ia) ) continue;
                 int iaa = (ia+1)*(ia+2)/2-1;
                 for (ib=0; ib<ia; ib++)
                 {
-                    if ( !(out_als & 1<<ib) ) continue;
+                    if ( !(grp->als & 1<<ib) ) continue;
                     int iab = iaa - ia + ib;
                     double lk = 2*pdg[iab]*grp->qsum[ia]*grp->qsum[ib];
                     #if USE_PRIOR_FOR_GTS
@@ -899,7 +832,6 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
                     }
                 }
             }
-            if ( gts[0] != gts[1] ) call->nhets++;
         }
         else
             gts[1] = bcf_int32_vector_end;
@@ -907,55 +839,50 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         call->ac[ bcf_gt_allele(gts[0]) ]++;
         if ( gts[1]!=bcf_int32_vector_end ) call->ac[ bcf_gt_allele(gts[1]) ]++;
     }
-    if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
+    if ( !(call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP)) ) return;
+    double max, sum;
+    for (is=0; is<nsmpl; is++)
     {
-        double max, sum;
-        for (isample=0; isample<nsmpl; isample++)
-        {
-            gps = call->GPs + isample*nout_gts;
+        int ismpl  = grp->smpl[is];
+        float *gps = call->GPs + ismpl*ngts_new;
 
-            int nmax;
-            if ( call->ploidy )
-            {
-                if ( call->ploidy[isample]==2 ) nmax = nout_gts;
-                else if ( call->ploidy[isample]==1 ) nmax = nout_als;
-                else nmax = 0;
-            }
-            else nmax = nout_gts;
+        int nmax;
+        if ( call->ploidy )
+        {
+            if ( call->ploidy[ismpl]==2 ) nmax = ngts_new;
+            else if ( call->ploidy[ismpl]==1 ) nmax = grp->nals;
+            else nmax = 0;
+        }
+        else nmax = ngts_new;
 
-            max = gps[0];
-            if ( max<0 || nmax==0 )
-            {
-                // no call
-                if ( call->output_tags & CALL_FMT_GP )
-                {
-                    for (i=0; i<nmax; i++) gps[i] = 0;
-                    if ( nmax==0 ) { bcf_float_set_missing(gps[i]); nmax++; }
-                    if ( nmax < nout_gts ) bcf_float_set_vector_end(gps[nmax]);
-                }
-                call->GQs[isample] = 0;
-                continue;
-            }
-            sum = gps[0];
-            for (i=1; i<nmax; i++)
-            {
-                if ( max < gps[i] ) max = gps[i];
-                sum += gps[i];
-            }
-            max = -4.34294*log(1 - max/sum);
-            call->GQs[isample] = max<=INT8_MAX ? max : INT8_MAX;
+        max = gps[0];
+        if ( max<0 || nmax==0 )
+        {
+            // no call
             if ( call->output_tags & CALL_FMT_GP )
             {
-                assert( max );
-                for (i=0; i<nmax; i++) gps[i] = (int)(-4.34294*log(gps[i]/sum));
-                if ( nmax < nout_gts ) bcf_float_set_vector_end(gps[nmax]);
+                for (i=0; i<nmax; i++) gps[i] = 0;
+                if ( nmax==0 ) { bcf_float_set_missing(gps[i]); nmax++; }
+                if ( nmax < ngts_new ) bcf_float_set_vector_end(gps[nmax]);
             }
+            call->GQs[ismpl] = 0;
+            continue;
+        }
+        sum = gps[0];
+        for (i=1; i<nmax; i++)
+        {
+            if ( max < gps[i] ) max = gps[i];
+            sum += gps[i];
+        }
+        max = -4.34294*log(1 - max/sum);
+        call->GQs[ismpl] = max<=INT8_MAX ? max : INT8_MAX;
+        if ( call->output_tags & CALL_FMT_GP )
+        {
+            assert( max );
+            for (i=0; i<nmax; i++) gps[i] = gps[i]/sum;
+            for (; i<ngts_new; i++) bcf_float_set_vector_end(gps[i]);
         }
     }
-    if ( call->output_tags & CALL_FMT_GP )
-        bcf_update_format_float(call->hdr, rec, "GP", call->GPs, nsmpl*nout_gts);
-    if ( call->output_tags & CALL_FMT_GQ )
-        bcf_update_format_int32(call->hdr, rec, "GQ", call->GQs, nsmpl);
 }
 
 
@@ -978,12 +905,13 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
     Individual qualities are calculated as
         GQ(F=i,M=j,K=k) = P(F=i,M=j,K=k) / \sum_{x,y} P(F=i,M=x,K=y)
  */
-static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+#if 0
+static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int nals_new, int als_new)
 {
     int ia, ib, i;
     int nsmpl    = bcf_hdr_nsamples(call->hdr);
     int ngts     = nals*(nals+1)/2;
-    int nout_gts = nout_als*(nout_als+1)/2;
+    int nout_gts = nals_new*(nals_new+1)/2;
     double *gls  = call->GLs - nout_gts;
     double *pdg  = call->pdg - ngts;
 
@@ -1013,7 +941,7 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         double best_lk = 0;
         for (ia=0; ia<nals; ia++)
         {
-            if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
+            if ( !(als_new & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa   = bcf_alleles2gt(ia,ia);      // PL index of the ia/ia genotype
             int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ia]);
             double lk = ploidy==2 ? pdg[iaa]*grp->qsum[ia]*grp->qsum[ia] : pdg[iaa]*grp->qsum[ia];
@@ -1029,10 +957,10 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         {
             for (ia=0; ia<nals; ia++)
             {
-                if ( !(out_als & 1<<ia) ) continue;
+                if ( !(als_new & 1<<ia) ) continue;
                 for (ib=0; ib<ia; ib++)
                 {
-                    if ( !(out_als & 1<<ib) ) continue;
+                    if ( !(als_new & 1<<ib) ) continue;
                     int iab   = bcf_alleles2gt(ia,ib);
                     int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ib]);
                     double lk = 2*pdg[iab]*grp->qsum[ia]*grp->qsum[ib];
@@ -1076,8 +1004,8 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
     for (ifm=0; ifm<call->nfams; ifm++)
     {
         family_t *fam = &call->fams[ifm];
-        int ntrio = call->ntrio[fam->type][nout_als];
-        uint16_t *trio = call->trio[fam->type][nout_als];
+        int ntrio = call->ntrio[fam->type][nals_new];
+        uint16_t *trio = call->trio[fam->type][nals_new];
 
         // Unconstrained likelihood
         int uc_itr = 0;
@@ -1225,11 +1153,12 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         bcf_update_format_int32(call->hdr,rec,"CGT",call->cgts,nsmpl);
     }
 }
+#endif
 
-static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+static void mcall_trim_and_update_PLs(call_t *call, bcf1_t *rec, int nals_ori, int nals_new)
 {
-    int ngts  = nals*(nals+1)/2;
-    int npls_src = ngts, npls_dst = nout_als*(nout_als+1)/2;     // number of PL values in diploid samples, ori and new
+    int npls_src = nals_ori*(nals_ori+1)/2;
+    int npls_dst = nals_new*(nals_new+1)/2;     // number of PL values in diploid samples, ori and new
     if ( call->all_diploid && npls_src == npls_dst ) return;
 
     int *pls_src = call->PLs, *pls_dst = call->PLs;
@@ -1246,7 +1175,7 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
         }
         else if ( ploidy==1 )
         {
-            for (ia=0; ia<nout_als; ia++)
+            for (ia=0; ia<nals_new; ia++)
             {
                 int isrc = (ia+1)*(ia+2)/2-1;
                 pls_dst[ia] = pls_src[ call->pl_map[isrc] ];
@@ -1256,7 +1185,7 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
         else
         {
             pls_dst[0] = bcf_int32_missing;
-            pls_dst[1] = bcf_int32_vector_end;  // relying on nout_als>1 in mcall()
+            pls_dst[1] = bcf_int32_vector_end;  // relying on nals_new>1 in mcall()
         }
         pls_src += npls_src;
         pls_dst += npls_dst;
@@ -1264,9 +1193,9 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
     bcf_update_format_int32(call->hdr, rec, "PL", call->PLs, npls_dst*nsmpl);
 }
 
-void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+void mcall_trim_and_update_numberR(call_t *call, bcf1_t *rec, int nals_ori, int nals_new)
 {
-    if ( nals==nout_als ) return;
+    if ( nals_ori==nals_new ) return;
 
     int i,j, nret, size = sizeof(float);
 
@@ -1285,17 +1214,17 @@ void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int o
         nret = bcf_get_info_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
         if ( nret<=0 ) continue;
 
-        if ( nout_als==1 )
+        if ( nals_new==1 )
             bcf_update_info_int32(call->hdr, rec, key, tmp_ori, 1);     // has to be the REF, the order could not change
         else
         {
-            for (j=0; j<nals; j++)
+            for (j=0; j<nals_ori; j++)
             {
                 int k = call->als_map[j];
                 if ( k==-1 ) continue;   // to be dropped
                 memcpy((char *)tmp_new+size*k, (char *)tmp_ori+size*j, size);
             }
-            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nout_als);
+            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nals_new);
         }
     }
 
@@ -1312,21 +1241,21 @@ void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int o
         if (nret<=0) continue;
         int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-        assert( nret==nals*nsmpl );
+        assert( nret==nals_ori*nsmpl );
 
         for (j=0; j<nsmpl; j++)
         {
-            char *ptr_src = (char *)tmp_ori + j*nals*size;
-            char *ptr_dst = (char *)tmp_new + j*nout_als*size;
+            char *ptr_src = (char *)tmp_ori + j*nals_ori*size;
+            char *ptr_dst = (char *)tmp_new + j*nals_new*size;
             int k;
-            for (k=0; k<nals; k++)
+            for (k=0; k<nals_ori; k++)
             {
                 int l = call->als_map[k];
                 if ( l==-1 ) continue;   // to be dropped
                 memcpy(ptr_dst+size*l, ptr_src+size*k, size);
             }
         }
-        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nout_als*nsmpl);
+        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nals_new*nsmpl);
     }
 
     call->PLs    = (int32_t*) tmp_new;
@@ -1441,12 +1370,12 @@ static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     }
     bcf_update_format_int32(call->hdr, rec, "PL", call->itmp, npls_new*nsmpl);
 
-    // update QS
-    int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp.grp[0].qsum, &call->smpl_grp.grp[0].nqsum);
-    hts_expand(float,nals,call->nqsum,call->qsum);
+    // update QS, use temporarily call->GPs to store the values
+    int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp[0].qsum, &call->smpl_grp[0].nqsum);
+    hts_expand(float,nals,call->nGPs,call->GPs);
     for (i=0; i<nals; i++)
-        call->qsum[i] = call->als_map[i]<nqs ? call->smpl_grp.grp[0].qsum[call->als_map[i]] : 0;
-    bcf_update_info_float(call->hdr, rec, "QS", call->qsum, nals);
+        call->GPs[i] = call->als_map[i]<nqs ? call->smpl_grp[0].qsum[call->als_map[i]] : 0;
+    bcf_update_info_float(call->hdr, rec, "QS", call->GPs, nals);
 
     // update any Number=R tags
     void *tmp_ori = call->itmp, *tmp_new = call->PLs;  // reusing PLs storage which is not used at this point
@@ -1487,7 +1416,6 @@ static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     call->itmp   = (int32_t*) tmp_ori;
     call->n_itmp = ntmp_ori;
 
-
     if ( *unseen ) *unseen = nals-1;
     return 0;
 }
@@ -1506,203 +1434,229 @@ int mcall(call_t *call, bcf1_t *rec)
     // Force alleles when calling genotypes given alleles was requested
     if ( call->flag & CALL_CONSTR_ALLELES && mcall_constrain_alleles(call, rec, &unseen)!=0 ) return -2;
 
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
-    int nals  = rec->n_allele;
-    hts_expand(int,nals,call->nac,call->ac);
-    hts_expand(int,nals,call->nals_map,call->als_map);
-    hts_expand(int,nals*(nals+1)/2,call->npl_map,call->pl_map);
+    int nsmpl    = bcf_hdr_nsamples(call->hdr);
+    int nals_ori = rec->n_allele;
+    hts_expand(int,nals_ori,call->nac,call->ac);
+    hts_expand(int,nals_ori,call->nals_map,call->als_map);
+    hts_expand(int,nals_ori*(nals_ori+1)/2,call->npl_map,call->pl_map);
 
     // Get the genotype likelihoods
     call->nPLs = bcf_get_format_int32(call->hdr, rec, "PL", &call->PLs, &call->mPLs);
-    if ( call->nPLs!=nsmpl*nals*(nals+1)/2 && call->nPLs!=nsmpl*nals )  // a mixture of diploid and haploid or haploid only
-        error("Wrong number of PL fields? nals=%d npl=%d\n", nals,call->nPLs);
+    if ( call->nPLs!=nsmpl*nals_ori*(nals_ori+1)/2 && call->nPLs!=nsmpl*nals_ori )  // a mixture of diploid and haploid or haploid only
+        error("Wrong number of PL fields? nals=%d npl=%d\n", nals_ori,call->nPLs);
 
     // Convert PLs to probabilities
-    int ngts = nals*(nals+1)/2;
+    int ngts_ori = nals_ori*(nals_ori+1)/2;
     hts_expand(double, call->nPLs, call->npdg, call->pdg);
-    set_pdg(call->pl2p, call->PLs, call->pdg, nsmpl, ngts, unseen);
+    set_pdg(call->pl2p, call->PLs, call->pdg, nsmpl, ngts_ori, unseen);
 
     // Get sum of qualities, serves as an AF estimate, f_x = QS/N in Eq. 1 in call-m math notes.
-    if ( call->smpl_grp.ngrp == 1  )
+    if ( call->nsmpl_grp == 1  )
     {
-        int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp.grp[0].qsum, &call->smpl_grp.grp[0].nqsum);
+        int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp[0].qsum, &call->smpl_grp[0].nqsum);
         if ( nqs<=0 ) error("The QS annotation not present at %s:%d\n", bcf_seqname(call->hdr,rec),rec->pos+1);
-        if ( nqs < nals )
+        if ( nqs < nals_ori )
         {
             // Some of the listed alleles do not have the corresponding QS field. This is
             // typically ref-only site with <*> in ALT.
-            hts_expand(float,nals,call->smpl_grp.grp[0].nqsum,call->smpl_grp.grp[0].qsum);
-            for (i=nqs; i<nals; i++) call->smpl_grp.grp[0].qsum[i] = 0;
+            hts_expand(float,nals_ori,call->smpl_grp[0].nqsum,call->smpl_grp[0].qsum);
+            for (i=nqs; i<nals_ori; i++) call->smpl_grp[0].qsum[i] = 0;
         }
     }
     else
     {
-        for (j=0; j<call->smpl_grp.ngrp; j++)
+        for (j=0; j<call->nsmpl_grp; j++)
         {
-            hts_expand(float,nals,call->smpl_grp.grp[j].nqsum,call->smpl_grp.grp[j].qsum);
-            memset(call->smpl_grp.grp[j].qsum, 0, sizeof(float)*nals);
+            hts_expand(float,nals_ori,call->smpl_grp[j].nqsum,call->smpl_grp[j].qsum);
+            memset(call->smpl_grp[j].qsum, 0, sizeof(float)*nals_ori);
         }
 
-        int nad = bcf_get_format_int32(call->hdr, rec, "AD", &call->ADs, &call->nADs);
-        if ( nad<1 ) error("Error: FORMAT/AD is required with the -G option, mpileup must be run with -a AD\n");
+        // Use FORMAT/AD or FORMAT/QS
+        int nad = bcf_get_format_int32(call->hdr, rec, call->sample_groups_tag, &call->ADs, &call->nADs);
+        if ( nad<1 ) error("Error: FORMAT/%s is required with the -G option, mpileup must be run with \"-a AD\" or \"-a QS\"\n",call->sample_groups_tag);
         nad /= bcf_hdr_nsamples(call->hdr);
-        hts_expand(float,nals,call->nqsum,call->qsum);
-        float qsum = 0;
-        for (i=0; i<bcf_hdr_nsamples(call->hdr); i++)
+        for (i=0; i<call->nsmpl_grp; i++)
         {
-            int32_t *ptr = call->ADs + i*nad;
-            for (j=0; j<nad; j++)
+            int is;
+            smpl_grp_t *grp = &call->smpl_grp[i];
+            hts_expand(float,nals_ori,grp->nqsum,grp->qsum);
+            for (j=0; j<nals_ori; j++) grp->qsum[j] = 0;
+            for (is=0; is<grp->nsmpl; is++)
             {
-                if ( ptr[j]==bcf_int32_vector_end ) break;
-                if ( ptr[j]==bcf_int32_missing ) call->qsum[j] = 0;
-                else { call->qsum[j] = ptr[j]; qsum += ptr[j]; }
+                int ismpl = grp->smpl[is];
+                int32_t *ptr = call->ADs + ismpl*nad;
+                float sum = 0;
+                for (j=0; j<nad; j++)
+                {
+                    if ( ptr[j]==bcf_int32_vector_end ) break;
+                    if ( ptr[j]!=bcf_int32_missing ) sum += ptr[j];
+                }
+                if ( sum )
+                {
+                    for (j=0; j<nad; j++)
+                    {
+                        if ( ptr[j]==bcf_int32_vector_end ) break;
+                        if ( ptr[j]!=bcf_int32_missing ) grp->qsum[j] += ptr[j]/sum;
+                    }
+                }
             }
-            for (; j<nals; j++) call->qsum[j] = 0;
-            if ( qsum ) 
-                for (j=0; j<nals; j++) call->qsum[j] /= qsum;
-
-            grp1_t *grp = &call->smpl_grp.grp[call->smpl_grp.smpl2grp[i]];
-            for (j=0; j<nals; j++)
-                grp->qsum[j] += call->qsum[j];
         }
     }
 
     // If available, take into account reference panel AFs
     if ( call->prior_AN && bcf_get_info_int32(call->hdr, rec, call->prior_AN ,&call->ac, &call->nac)==1 )
     {
-        int an = call->ac[0];
-        if ( bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals-1 )
+        int an = call->ac[0];   // number of alleles total, procede only if not zero; reuse call->ac
+        if ( an > 0 && bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals_ori-1 )    // number of ALT alleles
         {
-            int ac0 = an;   // number of alleles in the reference population
-            for (i=0; i<nals-1; i++)
+            int ac0 = an;       // this will become the number of REFs
+            for (i=0; i<nals_ori-1; i++)
             {
                 if ( call->ac[i]==bcf_int32_vector_end ) break;
                 if ( call->ac[i]==bcf_int32_missing ) continue;
                 ac0 -= call->ac[i];
-                for (j=0; j<call->smpl_grp.ngrp; j++)
-                    call->smpl_grp.grp[j].qsum[i+1] += call->ac[i]*0.5;
+
+                // here an*0.5 is the number of samples in the populatio and ac*0.5 is the AF weighted by the number of samples
+                for (j=0; j<call->nsmpl_grp; j++)
+                    call->smpl_grp[j].qsum[i+1] = (call->smpl_grp[j].qsum[i+1] + 0.5*call->ac[i]) / (call->smpl_grp[j].nsmpl + 0.5*an);
             }
             if ( ac0<0 ) error("Incorrect %s,%s values at %s:%d\n", call->prior_AN,call->prior_AC,bcf_seqname(call->hdr,rec),rec->pos+1);
-            for (j=0; j<call->smpl_grp.ngrp; j++)
-                call->smpl_grp.grp[j].qsum[0] += ac0*0.5;
-            for (i=0; i<nals; i++)
-            {
-                for (j=0; j<call->smpl_grp.ngrp; j++)
-                    call->smpl_grp.grp[j].qsum[i] /= nsmpl + 0.5*an;
-            }
+            for (j=0; j<call->nsmpl_grp; j++)
+                call->smpl_grp[j].qsum[0] = (call->smpl_grp[j].qsum[0] + 0.5*ac0) / (call->smpl_grp[j].nsmpl + 0.5*an);
         }
     }
 
-    for (j=0; j<call->smpl_grp.ngrp; j++)
+    // normalize so that QS sums to 1 for each group
+    for (j=0; j<call->nsmpl_grp; j++)
     {
-        float qsum_tot = 0;
-        for (i=0; i<nals; i++) qsum_tot += call->smpl_grp.grp[j].qsum[i];
-        if ( qsum_tot ) for (i=0; i<nals; i++) call->smpl_grp.grp[j].qsum[i] /= qsum_tot;
+        float sum = 0;
+        for (i=0; i<nals_ori; i++) sum += call->smpl_grp[j].qsum[i];
+        if ( sum ) for (i=0; i<nals_ori; i++) call->smpl_grp[j].qsum[i] /= sum;
     }
 
     bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
 
-    // Find the best combination of alleles
-    int out_als, nout;
-    if ( nals > 8*sizeof(out_als) )
+    if ( nals_ori > 8*sizeof(call->als_new) )
     { 
         fprintf(stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
         return 0; 
     }
-    nout = mcall_find_best_alleles(call, nals, &out_als);
 
-    // Make sure the REF allele is always present
-    if ( !(out_als&1) )
+    // For each group find the best combination of alleles
+    call->als_new = 0;
+    double ref_lk = -HUGE_VAL, lk_sum = -HUGE_VAL, max_qual = -HUGE_VAL;
+    for (j=0; j<call->nsmpl_grp; j++)
     {
-        out_als |= 1;
-        nout++;
+        smpl_grp_t *grp = &call->smpl_grp[j];
+        mcall_find_best_alleles(call, nals_ori, grp);
+        call->als_new |= grp->als;
+        if ( grp->max_lk==-HUGE_VAL ) continue;
+        double qual = -4.343*(grp->ref_lk - logsumexp2(grp->lk_sum,grp->ref_lk));
+        if ( max_qual < qual )
+        {
+            max_qual = qual;
+            lk_sum = grp->lk_sum;
+            ref_lk = grp->ref_lk;
+        }
     }
-    int is_variant = out_als==1 ? 0 : 1;
+
+    // Make sure the REF allele is always present
+    if ( !(call->als_new&1) ) call->als_new |= 1;
+
+    int is_variant = call->als_new==1 ? 0 : 1;
     if ( call->flag & CALL_VARONLY && !is_variant ) return 0;
 
-    // With -A, keep all ALTs except X
-    if ( call->flag & CALL_KEEPALT )
+    call->nals_new = 0;
+    for (i=0; i<nals_ori; i++)
     {
-        nout = 0;
-        for (i=0; i<nals; i++)
-        {
-            if ( i>0 && i==unseen ) continue;
-            out_als |= 1<<i;
-            nout++;
-        }
+        if ( i>0 && i==unseen ) continue;
+        if ( call->flag & CALL_KEEPALT ) call->als_new |= 1<<i;
+        if ( call->als_new & (1<<i) ) call->nals_new++;
     }
 
+    init_allele_trimming_maps(call,nals_ori,call->als_new);
+
     int nAC = 0;
-    if ( out_als==1 )   // only REF allele on output
+    if ( call->als_new==1 )   // only REF allele on output
     {
-        init_allele_trimming_maps(call, 1, nals);
-        mcall_set_ref_genotypes(call,nals);
+        mcall_set_ref_genotypes(call,nals_ori);
         bcf_update_format_int32(call->hdr, rec, "PL", NULL, 0);    // remove PL, useless now
     }
+    else if ( !is_variant )
+    {
+        mcall_set_ref_genotypes(call,nals_ori);     // running with -A, prevent mcall_call_genotypes from putting some ALT back
+        mcall_trim_and_update_PLs(call, rec, nals_ori, call->nals_new);
+    }
     else
     {
         // The most likely set of alleles includes non-reference allele (or was enforced), call genotypes.
         // Note that it is a valid outcome if the called genotypes exclude some of the ALTs.
-        init_allele_trimming_maps(call, out_als, nals);
-        if ( !is_variant )
-            mcall_set_ref_genotypes(call,nals);     // running with -A, prevent mcall_call_genotypes from putting some ALT back
-        else if ( call->flag & CALL_CONSTR_TRIO )
+        int ngts_new = call->nals_new*(call->nals_new+1)/2;
+        hts_expand(float,ngts_new*nsmpl,call->nGPs,call->GPs);
+        for (i=0; i<call->nals_new; i++) call->ac[i] = 0;
+
+        if ( call->flag & CALL_CONSTR_TRIO && call->nals_new>4 )
+        { 
+            fprintf(stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
+            return 0; 
+        }
+        if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
         {
-            if ( nout>4 ) 
-            { 
-                fprintf(stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
-                return 0; 
-            }
-            mcall_call_trio_genotypes(call, rec, nals,nout,out_als);
+            memset(call->GPs,0,nsmpl*ngts_new*sizeof(*call->GPs));
+            memset(call->GQs,0,nsmpl*sizeof(*call->GQs));
+        }
+        for (i=0; i<call->nsmpl_grp; i++)
+        {
+            if ( call->flag & CALL_CONSTR_TRIO )
+                error("todo: constrained trio calling temporarily disabled\n");   //mcall_call_trio_genotypes(call,rec,nals,&call->smpl_grp[i]);
+            else
+                mcall_call_genotypes(call,nals_ori,&call->smpl_grp[i]);
         }
-        else
-            mcall_call_genotypes(call,rec,nals,nout,out_als);
 
         // Skip the site if all samples are 0/0. This can happen occasionally.
-        nAC = 0;
-        for (i=1; i<nout; i++) nAC += call->ac[i];
+        for (i=1; i<call->nals_new; i++) nAC += call->ac[i];
         if ( !nAC && call->flag & CALL_VARONLY ) return 0;
-        mcall_trim_PLs(call, rec, nals, nout, out_als);
+
+        if ( call->output_tags & CALL_FMT_GP )
+            bcf_update_format_float(call->hdr, rec, "GP", call->GPs, nsmpl*ngts_new);
+        if ( call->output_tags & CALL_FMT_GQ )
+            bcf_update_format_int32(call->hdr, rec, "GQ", call->GQs, nsmpl);
+
+        mcall_trim_and_update_PLs(call,rec,nals_ori,call->nals_new);
     }
-    if ( nals!=nout ) mcall_trim_numberR(call, rec, nals, nout, out_als);
+    if ( nals_ori!=call->nals_new )
+        mcall_trim_and_update_numberR(call,rec,nals_ori,call->nals_new);
 
-    // Set QUAL and calculate HWE-related annotations
+    // Set QUAL
     if ( nAC )
     {
-        float icb = calc_ICB(call->ac[0],nAC, call->nhets, call->ndiploid);
-        if ( icb != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "ICB", &icb, 1);
-
-        float hob = calc_HOB(call->ac[0],nAC, call->nhets, call->ndiploid);
-        if ( hob != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "HOB", &hob, 1);
-
         // Quality of a variant site. fabs() to avoid negative zeros in VCF output when CALL_KEEPALT is set
-        rec->qual = -4.343*(call->ref_lk - logsumexp2(call->lk_sum,call->ref_lk));
+        rec->qual = max_qual;
     }
     else
     {
         // Set the quality of a REF site
-        if ( call->lk_sum==-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+        if ( lk_sum!=-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+            rec->qual = -4.343*(lk_sum - logsumexp2(lk_sum,ref_lk));
+        else if ( call->ac[0] )
             rec->qual = call->theta ? -4.343*call->theta : 0;
         else
-            rec->qual = -4.343*(call->lk_sum - logsumexp2(call->lk_sum,call->ref_lk));
+            bcf_float_set_missing(rec->qual);
     }
 
-    if ( rec->qual>999 ) rec->qual = 999;
-    if ( rec->qual>50 ) rec->qual = rint(rec->qual);
-
     // AC, AN
-    if ( nout>1 ) bcf_update_info_int32(call->hdr, rec, "AC", call->ac+1, nout-1);
+    if ( call->nals_new>1 ) bcf_update_info_int32(call->hdr, rec, "AC", call->ac+1, call->nals_new-1);
     nAC += call->ac[0];
     bcf_update_info_int32(call->hdr, rec, "AN", &nAC, 1);
 
     // Remove unused alleles
-    hts_expand(char*,nout,call->nals,call->als);
-    for (i=0; i<nals; i++)
+    hts_expand(char*,call->nals_new,call->nals,call->als);
+    for (i=0; i<nals_ori; i++)
         if ( call->als_map[i]>=0 ) call->als[call->als_map[i]] = rec->d.allele[i];
-    bcf_update_alleles(call->hdr, rec, (const char**)call->als, nout);
+    bcf_update_alleles(call->hdr, rec, (const char**)call->als, call->nals_new);
     bcf_update_genotypes(call->hdr, rec, call->gts, nsmpl*2);
 
-    // DP4 tag
+    // DP4 and PV4 tags
     if ( bcf_get_info_float(call->hdr, rec, "I16", &call->anno16, &call->n16)==16 )
     {
         int32_t dp[4]; dp[0] = call->anno16[0]; dp[1] = call->anno16[1]; dp[2] = call->anno16[2]; dp[3] = call->anno16[3];
@@ -1710,10 +1664,22 @@ int mcall(call_t *call, bcf1_t *rec)
 
         int32_t mq = (call->anno16[8]+call->anno16[10])/(call->anno16[0]+call->anno16[1]+call->anno16[2]+call->anno16[3]);
         bcf_update_info_int32(call->hdr, rec, "MQ", &mq, 1);
+
+        if ( call->output_tags & CALL_FMT_PV4 )
+        {
+            anno16_t a;
+            float tmpf[4];
+            int is_tested = test16(call->anno16, &a) >= 0 && a.is_tested ? 1 : 0;
+            if ( is_tested ) 
+            {
+                for (i=0; i<4; i++) tmpf[i] = a.p[i];
+                bcf_update_info_float(call->hdr, rec, "PV4", tmpf, 4);
+            }
+        }
     }
 
     bcf_update_info_int32(call->hdr, rec, "I16", NULL, 0);     // remove I16 tag
 
-    return nout;
+    return call->nals_new;
 }
 
diff --git a/bcftools/mcall.c.pysam.c b/bcftools/mcall.c.pysam.c
index 2c2fb37..c2d38a6 100644
--- a/bcftools/mcall.c.pysam.c
+++ b/bcftools/mcall.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  mcall.c -- multiallelic and rare variant calling.
 
-    Copyright (C) 2012-2016 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -24,11 +24,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.  */
 
+#include <assert.h>
 #include <math.h>
 #include <inttypes.h>
+#include <ctype.h>
 #include <htslib/kfunc.h>
 #include <htslib/khash_str2int.h>
 #include "call.h"
+#include "prob1.h"
 
 // Using priors for GTs does not seem to be mathematically justified. Although
 // it seems effective in removing false calls, it also flips a significant
@@ -40,6 +43,7 @@ THE SOFTWARE.  */
 // genotypes is reported instead.
 #define FLAT_PDG_FOR_MISSING 0
 
+int test16(float *anno16, anno16_t *a);
 
 void qcall_init(call_t *call) { return; }
 void qcall_destroy(call_t *call) { return; }
@@ -251,19 +255,46 @@ static void init_sample_groups(call_t *call)
     if ( !call->sample_groups )
     {
         // standard pooled calling, all samples in the same group
-        grp_t *grps = &call->smpl_grp;
-        grps->ngrp = 1;
-        grps->grp  = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)calloc(nsmpl,sizeof(int));
+        call->nsmpl_grp = 1;
+        call->smpl_grp  = (smpl_grp_t*)calloc(1,sizeof(*call->smpl_grp));
+        call->smpl_grp[0].nsmpl = nsmpl;
+        call->smpl_grp[0].smpl  = (uint32_t*)calloc(call->smpl_grp[0].nsmpl,sizeof(uint32_t));
+        for (i=0; i<nsmpl; i++)
+            call->smpl_grp[0].smpl[i] = i;
+        return;
+    }
+
+    if ( call->sample_groups_tag )
+    {
+        // Is the tag defined in the header?
+        int tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,call->sample_groups_tag);
+        if ( tag_id==-1 ) error("No such tag \"%s\"\n",call->sample_groups_tag);
+        if ( !bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) )  error("No such FORMAT tag \"%s\"\n", call->sample_groups_tag);
+    }
+    else
+    {
+        int tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,"QS");
+        if ( tag_id >= 0 && bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) ) call->sample_groups_tag = "QS";
+        else
+        {
+            tag_id = bcf_hdr_id2int(call->hdr,BCF_DT_ID,"AD");
+            if ( tag_id >= 0 && bcf_hdr_idinfo_exists(call->hdr,BCF_HL_FMT,tag_id) ) call->sample_groups_tag = "AD";
+            else error("Error: neither \"AD\" nor \"QS\" FORMAT tag exists and no alternative given with -G\n");
+        }
     }
-    else if ( !strcmp("-",call->sample_groups) )
+
+    // Read samples/groups
+    if ( !strcmp("-",call->sample_groups) )
     {
         // single-sample calling, each sample creates its own group
-        grp_t *grps = &call->smpl_grp;
-        grps->ngrp = nsmpl;
-        grps->grp  = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)malloc(nsmpl*sizeof(int));
-        for (i=0; i<nsmpl; i++) grps->smpl2grp[i] = i;
+        call->nsmpl_grp = nsmpl;
+        call->smpl_grp  = (smpl_grp_t*)calloc(nsmpl,sizeof(*call->smpl_grp));
+        for (i=0; i<nsmpl; i++)
+        {
+            call->smpl_grp[i].nsmpl = 1;
+            call->smpl_grp[i].smpl  = (uint32_t*)calloc(call->smpl_grp[i].nsmpl,sizeof(uint32_t));
+            call->smpl_grp[i].smpl[0] = i;
+        }
     }
     else
     {
@@ -271,40 +302,49 @@ static void init_sample_groups(call_t *call)
         char **lines = hts_readlist(call->sample_groups, 1, &nlines);
         if ( !lines ) error("Could not read the file: %s\n", call->sample_groups);
 
-        uint32_t *smpl2grp1 = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
+        uint32_t *smpl2grp = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
+        uint32_t *grp2n = (uint32_t*)calloc(nsmpl,sizeof(uint32_t));
         void *grp2idx = khash_str2int_init();
 
-        grp_t *grps = &call->smpl_grp;
+        call->nsmpl_grp = 0;
         for (i=0; i<nlines; i++)
         {
             char *ptr = lines[i];
-            while ( *ptr && *ptr!='\t' ) ptr++;
+            while ( *ptr && !isspace(*ptr) ) ptr++;
             if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]);
-            *ptr = 0;
+            char *tmp = ptr;
+            while ( *ptr && isspace(*ptr) ) ptr++;
+            if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]);
+            *tmp = 0;
             int ismpl = bcf_hdr_id2int(call->hdr, BCF_DT_SAMPLE, lines[i]); 
             if ( ismpl<0 ) continue;
-            if ( smpl2grp1[ismpl] ) error("Error: the sample \"%s\" is listed twice in %s\n", lines[i],call->sample_groups);
+            if ( smpl2grp[ismpl] ) error("Error: the sample \"%s\" is listed twice in %s\n", lines[i],call->sample_groups);
             if ( !khash_str2int_has_key(grp2idx,ptr+1) )
             {
-                khash_str2int_inc(grp2idx, ptr+1);
-                grps->ngrp++;
+                khash_str2int_set(grp2idx, ptr+1, call->nsmpl_grp);
+                call->nsmpl_grp++;
             }
-            int igrp;
-            if ( khash_str2int_get(grp2idx, ptr+1, &igrp)==0 )
-                smpl2grp1[ismpl] = igrp+1;
-            else
+            int igrp = -1;
+            if ( khash_str2int_get(grp2idx, ptr+1, &igrp)!=0 )
                 error("This should not happen, fixme: %s\n",ptr+1);
+            grp2n[igrp]++;
+            smpl2grp[ismpl] = igrp+1;   // +1 to distinguish unlisted samples
         }
         khash_str2int_destroy(grp2idx);
+        if ( !call->nsmpl_grp ) error("Could not parse the file, no matching samples found: %s\n", call->sample_groups);
 
-        grps->grp = (grp1_t*)calloc(grps->ngrp, sizeof(grp1_t));
-        grps->smpl2grp = (int*)malloc(nsmpl*sizeof(int));
+        call->smpl_grp = (smpl_grp_t*)calloc(call->nsmpl_grp,sizeof(*call->smpl_grp));
         for (i=0; i<nsmpl; i++)
         {
-            if ( !smpl2grp1[i] ) error("Error: The sample \"%s\" is not listed in %s\n",call->hdr->samples[i],call->sample_groups);
-            grps->smpl2grp[i] = smpl2grp1[i] - 1;
+            if ( !smpl2grp[i] ) error("Error: The sample \"%s\" is not listed in %s\n",call->hdr->samples[i],call->sample_groups);
+            int igrp = smpl2grp[i] - 1;
+            if ( !call->smpl_grp[igrp].nsmpl ) 
+                call->smpl_grp[igrp].smpl = (uint32_t*)calloc(grp2n[igrp],sizeof(uint32_t));
+            call->smpl_grp[igrp].smpl[call->smpl_grp[igrp].nsmpl] = i;
+            call->smpl_grp[igrp].nsmpl++;
         }
-        free(smpl2grp1);
+        free(smpl2grp);
+        free(grp2n);
         for (i=0; i<nlines; i++) free(lines[i]);
         free(lines);
     }
@@ -312,15 +352,17 @@ static void init_sample_groups(call_t *call)
 static void destroy_sample_groups(call_t *call)
 {
     int i;
-    grp_t *grps = &call->smpl_grp;
-    for (i=0; i<grps->ngrp; i++)
-        free(grps->grp[i].qsum);
-    free(grps->grp);
-    free(grps->smpl2grp);
+    for (i=0; i<call->nsmpl_grp; i++)
+    {
+        free(call->smpl_grp[i].qsum);
+        free(call->smpl_grp[i].smpl);
+    }
+    free(call->smpl_grp);
 }
 
 void mcall_init(call_t *call)
 {
+    init_sample_groups(call);
     call_init_pl2p(call);
 
     call->nals_map = 5;
@@ -343,15 +385,15 @@ void mcall_init(call_t *call)
     if ( call->output_tags & CALL_FMT_GQ )
         bcf_hdr_append(call->hdr,"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Phred-scaled Genotype Quality\">");
     if ( call->output_tags & CALL_FMT_GP )
-        bcf_hdr_append(call->hdr,"##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Phred-scaled genotype posterior probabilities\">");
+        bcf_hdr_append(call->hdr,"##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype posterior probabilities in the range 0 to 1\">");
     if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
         call->GQs = (int32_t*) malloc(sizeof(int32_t)*bcf_hdr_nsamples(call->hdr));
-    bcf_hdr_append(call->hdr,"##INFO=<ID=ICB,Number=1,Type=Float,Description=\"Inbreeding Coefficient Binomial test (bigger is better)\">");
-    bcf_hdr_append(call->hdr,"##INFO=<ID=HOB,Number=1,Type=Float,Description=\"Bias in the number of HOMs number (smaller is better)\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes for each ALT allele, in the same order as listed\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases\">");
     bcf_hdr_append(call->hdr,"##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Average mapping quality\">");
+    if ( call->output_tags & CALL_FMT_PV4 )
+        bcf_hdr_append(call->hdr,"##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n");
 
     // init the prior
     if ( call->theta>0 )
@@ -374,8 +416,6 @@ void mcall_init(call_t *call)
         }
         call->theta = log(call->theta);
     }
-
-    init_sample_groups(call);
 }
 
 void mcall_destroy(call_t *call)
@@ -396,7 +436,6 @@ void mcall_destroy(call_t *call)
     free(call->pdg);
     free(call->als);
     free(call->ac);
-    free(call->qsum);
     return;
 }
 
@@ -507,14 +546,14 @@ void set_pdg(double *pl2p, int *PLs, double *pdg, int n_smpl, int n_gt, int unse
 }
 
 // Create mapping between old and new (trimmed) alleles
-void init_allele_trimming_maps(call_t *call, int als, int nals)
+void init_allele_trimming_maps(call_t *call, int nals_ori, int als_out)
 {
-    int i, j;
+    int i, j, nout = 0;
 
     // als_map: old(i) -> new(j)
-    for (i=0, j=0; i<nals; i++)
+    for (i=0; i<nals_ori; i++)
     {
-        if ( als & 1<<i ) call->als_map[i] = j++;
+        if ( als_out & (1<<i) ) call->als_map[i] = nout++;
         else call->als_map[i] = -1;
     }
 
@@ -522,85 +561,16 @@ void init_allele_trimming_maps(call_t *call, int als, int nals)
 
     // pl_map: new(k) -> old(l)
     int k = 0, l = 0;
-    for (i=0; i<nals; i++)
+    for (i=0; i<nals_ori; i++)
     {
         for (j=0; j<=i; j++)
         {
-            if ( (als & 1<<i) && (als & 1<<j) ) call->pl_map[k++] = l;
+            if ( (als_out & (1<<i)) && (als_out & (1<<j)) ) call->pl_map[k++] = l;
             l++;
         }
     }
 }
 
-double binom_dist(int N, double p, int k)
-{
-    int mean = (int) (N*p);
-    if ( mean==k ) return 1.0;
-
-    double log_p = (k-mean)*log(p) + (mean-k)*log(1.0-p);
-    if ( k > N - k ) k = N - k;
-    if ( mean > N - mean ) mean = N - mean;
-
-    if ( k < mean ) { int tmp = k; k = mean; mean = tmp; }
-    double diff = k - mean;
-
-    double val = 1.0;
-    int i;
-    for (i=0; i<diff; i++)
-        val = val * (N-mean-i) / (k-i);
-
-    return exp(log_p)/val;
-}
-
-
-// Inbreeding Coefficient, binomial test
-float calc_ICB(int nref, int nalt, int nhets, int ndiploid)
-{
-    if ( !nref || !nalt || !ndiploid ) return HUGE_VAL;
-
-    double fref = (double)nref/(nref+nalt); // fraction of reference allelels
-    double falt = (double)nalt/(nref+nalt); // non-ref als
-    double q = 2*fref*falt;                 // probability of a het, assuming HWE
-    double mean = q*ndiploid;
-
-    //fprintf(bcftools_stderr,"\np=%e N=%d k=%d  .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
-
-    // Can we use normal approximation? The second condition is for performance only
-    // and is not well justified.
-    if ( (mean>10 && (1-q)*ndiploid>10 ) || ndiploid>200 )
-    {
-        //fprintf(bcftools_stderr,"out: mean=%e  p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
-        return exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q)));
-    }
-
-    return binom_dist(ndiploid, q, nhets);
-}
-
-float calc_HOB(int nref, int nalt, int nhets, int ndiploid)
-{
-    if ( !nref || !nalt || !ndiploid ) return HUGE_VAL;
-
-    double fref = (double)nref/(nref+nalt); // fraction of reference allelels
-    double falt = (double)nalt/(nref+nalt); // non-ref als
-    return fabs((double)nhets/ndiploid - 2*fref*falt);
-}
-
-/**
-  *  log(sum_i exp(a_i))
-  */
-// static inline double logsumexp(double *vals, int nvals)
-// {
-//     int i;
-//     double max_exp = vals[0];
-//     for (i=1; i<nvals; i++)
-//         if ( max_exp < vals[i] ) max_exp = vals[i];
-
-//     double sum = 0;
-//     for (i=0; i<nvals; i++)
-//         sum += exp(vals[i] - max_exp);
-
-//     return log(sum) + max_exp;
-// }
 /** log(exp(a)+exp(b)) */
 static inline double logsumexp2(double a, double b)
 {
@@ -612,7 +582,7 @@ static inline double logsumexp2(double a, double b)
 
 // Macro to set the most likely alleles
 #define UPDATE_MAX_LKs(als,sum) { \
-     if ( max_lk<lk_tot ) { max_lk = lk_tot; max_als = (als); } \
+     if ( max_lk<lk_tot && lk_tot_set ) { max_lk = lk_tot; max_als = (als); } \
      if ( sum ) lk_sum = logsumexp2(lk_tot,lk_sum); \
 }
 
@@ -620,14 +590,13 @@ static inline double logsumexp2(double a, double b)
 
 // Determine the most likely combination of alleles. In this implementation,
 // at most tri-allelic sites are considered. Returns the number of alleles.
-static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
+static int mcall_find_best_alleles(call_t *call, int nals, smpl_grp_t *grp)
 {
-    int j;
     int ia,ib,ic;   // iterators over up to three alleles
     int max_als=0;  // most likely combination of alleles
-    double ref_lk = 0, max_lk = -HUGE_VAL; // likelihood of the reference and of most likely combination of alleles
+    double ref_lk = -HUGE_VAL, max_lk = -HUGE_VAL; // likelihood of the reference and of most likely combination of alleles
     double lk_sum = -HUGE_VAL;    // for normalizing the likelihoods
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
+    int nsmpl = grp->nsmpl;
     int ngts  = nals*(nals+1)/2;
 
     // Single allele
@@ -636,60 +605,45 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         double lk_tot  = 0;
         int lk_tot_set = 0;
         int iaa = (ia+1)*(ia+2)/2-1;    // index in PL which corresponds to the homozygous "ia/ia" genotype
-        int isample;
-        double *pdg = call->pdg + iaa;
-        for (isample=0; isample<nsmpl; isample++)
+        int ismpl;
+        for (ismpl=0; ismpl<nsmpl; ismpl++)
         {
+            double *pdg = call->pdg + grp->smpl[ismpl]*ngts + iaa;
             if ( *pdg ) { lk_tot += log(*pdg); lk_tot_set = 1; }
-            pdg += ngts;
         }
         if ( ia==0 ) ref_lk = lk_tot;   // likelihood of 0/0 for all samples
         else lk_tot += call->theta; // the prior
         UPDATE_MAX_LKs(1<<ia, ia>0 && lk_tot_set);
     }
 
-    grp_t *grps = &call->smpl_grp;
-
     // Two alleles
     if ( nals>1 )
     {
         for (ia=0; ia<nals; ia++)
         {
-            if ( grps->ngrp==1 && grps->grp[0].qsum[ia]==0 ) continue;
+            if ( grp->qsum[ia]==0 ) continue;
             int iaa = (ia+1)*(ia+2)/2-1;
             for (ib=0; ib<ia; ib++)
             {
-                if ( grps->ngrp==1 && grps->grp[0].qsum[ib]==0 ) continue;
+                if ( grp->qsum[ib]==0 ) continue;
                 double lk_tot  = 0;
                 int lk_tot_set = 0;
-                int ia_cov = 0, ib_cov = 0;
-                for (j=0; j<grps->ngrp; j++)
+                double fa  = grp->qsum[ia]/(grp->qsum[ia] + grp->qsum[ib]);
+                double fb  = grp->qsum[ib]/(grp->qsum[ia] + grp->qsum[ib]);
+                double fa2 = fa*fa;
+                double fb2 = fb*fb;
+                double fab = 2*fa*fb;
+                int is, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
+                for (is=0; is<nsmpl; is++)
                 {
-                    grp1_t *grp = &grps->grp[j];
-                    if ( grp->qsum[ia] ) ia_cov = 1;
-                    if ( grp->qsum[ib] ) ib_cov = 1;
-                    if ( !grp->qsum[ia] && !grp->qsum[ib] ) { grp->dp = 0; continue; }
-                    grp->dp  = 1;
-                    grp->fa  = grp->qsum[ia]/(grp->qsum[ia]+grp->qsum[ib]);
-                    grp->fb  = grp->qsum[ib]/(grp->qsum[ia]+grp->qsum[ib]);
-                    grp->fa2 = grp->fa*grp->fa;
-                    grp->fb2 = grp->fb*grp->fb;
-                    grp->fab = 2*grp->fa*grp->fb;
-                }
-                if ( !ia_cov || !ib_cov ) continue;
-                int isample, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
-                double *pdg  = call->pdg;
-                for (isample=0; isample<nsmpl; isample++)
-                {
-                    grp1_t *grp = &grps->grp[grps->smpl2grp[isample]];
-                    if ( !grp->dp ) continue;
+                    int ismpl = grp->smpl[is];
+                    double *pdg = call->pdg + ismpl*ngts;
                     double val = 0;
-                    if ( !call->ploidy || call->ploidy[isample]==2 )
-                        val = grp->fa2*pdg[iaa] + grp->fb2*pdg[ibb] + grp->fab*pdg[iab];
-                    else if ( call->ploidy && call->ploidy[isample]==1 )
-                        val = grp->fa*pdg[iaa] + grp->fb*pdg[ibb];
+                    if ( !call->ploidy || call->ploidy[ismpl]==2 )
+                        val = fa2*pdg[iaa] + fb2*pdg[ibb] + fab*pdg[iab];
+                    else if ( call->ploidy && call->ploidy[ismpl]==1 )
+                        val = fa*pdg[iaa] + fb*pdg[ibb];
                     if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
-                    pdg += ngts;
                 }
                 if ( ia!=0 ) lk_tot += call->theta;    // the prior
                 if ( ib!=0 ) lk_tot += call->theta;
@@ -703,50 +657,38 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
     {
         for (ia=0; ia<nals; ia++)
         {
-            if ( grps->ngrp==1 && grps->grp[0].qsum[ia]==0 ) continue;
+            if ( grp->qsum[ia]==0 ) continue;
             int iaa = (ia+1)*(ia+2)/2-1;
             for (ib=0; ib<ia; ib++)
             {
-                if (  grps->ngrp==1 && grps->grp[0].qsum[ib]==0 ) continue;
+                if ( grp->qsum[ib]==0 ) continue;
                 int ibb = (ib+1)*(ib+2)/2-1;
                 int iab = iaa - ia + ib;
                 for (ic=0; ic<ib; ic++)
                 {
-                    if (  grps->ngrp==1 && grps->grp[0].qsum[ic]==0 ) continue;
+                    if ( grp->qsum[ic]==0 ) continue;
                     double lk_tot  = 0;
-                    int lk_tot_set = 1;
-                    int ia_cov = 0, ib_cov = 0, ic_cov = 0;
-                    for (j=0; j<grps->ngrp; j++)
-                    {
-                        grp1_t *grp = &grps->grp[j];
-                        if ( grp->qsum[ia] ) ia_cov = 1;
-                        if ( grp->qsum[ib] ) ib_cov = 1;
-                        if ( grp->qsum[ic] ) ic_cov = 1;
-                        if ( !grp->qsum[ia] && !grp->qsum[ib] && !grp->qsum[ic] ) { grp->dp = 0; continue; }
-                        grp->dp  = 1;
-                        grp->fa  = grp->qsum[ia]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fb  = grp->qsum[ib]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fc  = grp->qsum[ic]/(grp->qsum[ia]+grp->qsum[ib]+grp->qsum[ic]);
-                        grp->fa2 = grp->fa*grp->fa;
-                        grp->fb2 = grp->fb*grp->fb;
-                        grp->fc2 = grp->fc*grp->fc;
-                        grp->fab = 2*grp->fa*grp->fb, grp->fac = 2*grp->fa*grp->fc, grp->fbc = 2*grp->fb*grp->fc;
-                    }
-                    if ( !ia_cov || !ib_cov || !ic_cov ) continue;
-                    int isample, icc = (ic+1)*(ic+2)/2-1;
+                    int lk_tot_set = 0;
+
+                    double fa  = grp->qsum[ia]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fb  = grp->qsum[ib]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fc  = grp->qsum[ic]/(grp->qsum[ia] + grp->qsum[ib] + grp->qsum[ic]);
+                    double fa2 = fa*fa;
+                    double fb2 = fb*fb;
+                    double fc2 = fc*fc;
+                    double fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc;
+                    int is, icc = (ic+1)*(ic+2)/2-1;
                     int iac = iaa - ia + ic, ibc = ibb - ib + ic;
-                    double *pdg = call->pdg;
-                    for (isample=0; isample<nsmpl; isample++)
+                    for (is=0; is<nsmpl; is++)
                     {
-                        grp1_t *grp = &grps->grp[grps->smpl2grp[isample]];
-                        if ( !grp->dp ) continue;
+                        int ismpl = grp->smpl[is];
+                        double *pdg = call->pdg + ismpl*ngts;
                         double val = 0;
-                        if ( !call->ploidy || call->ploidy[isample]==2 )
-                            val = grp->fa2*pdg[iaa] + grp->fb2*pdg[ibb] + grp->fc2*pdg[icc] + grp->fab*pdg[iab] + grp->fac*pdg[iac] + grp->fbc*pdg[ibc];
-                        else if ( call->ploidy && call->ploidy[isample]==1 )
-                            val = grp->fa*pdg[iaa] + grp->fb*pdg[ibb] + grp->fc*pdg[icc];
+                        if ( !call->ploidy || call->ploidy[ismpl]==2 )
+                            val = fa2*pdg[iaa] + fb2*pdg[ibb] + fc2*pdg[icc] + fab*pdg[iab] + fac*pdg[iac] + fbc*pdg[ibc];
+                        else if ( call->ploidy && call->ploidy[ismpl]==1 )
+                            val = fa*pdg[iaa] + fb*pdg[ibb] + fc*pdg[icc];
                         if ( val ) { lk_tot += log(val); lk_tot_set = 1; }
-                        pdg += ngts;
                     }
                     if ( ia!=0 ) lk_tot += call->theta;    // the prior
                     if ( ib!=0 ) lk_tot += call->theta;    // the prior
@@ -757,25 +699,26 @@ static int mcall_find_best_alleles(call_t *call, int nals, int *out_als)
         }
     }
 
-    call->ref_lk = ref_lk;
-    call->lk_sum = lk_sum;
-    *out_als = max_als;
-
     int i, n = 0;
     for (i=0; i<nals; i++) if ( max_als & 1<<i) n++;
 
+    grp->max_lk = max_lk;
+    grp->ref_lk = ref_lk;
+    grp->lk_sum = lk_sum;
+    grp->als  = max_als;
+    grp->nals = n;
+
     return n;
 }
 
-static void mcall_set_ref_genotypes(call_t *call, int nals)
+// Sets GT=0/0 or GT=. if PL=0,0,0
+static void mcall_set_ref_genotypes(call_t *call, int nals_ori)
 {
     int i;
-    int ngts  = nals*(nals+1)/2;
+    int ngts  = nals_ori*(nals_ori+1)/2;            // need this to distinguish between GT=0/0 vs GT=.
     int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-    for (i=0; i<nals; i++) call->ac[i] = 0;
-    call->nhets = 0;
-    call->ndiploid = 0;
+    for (i=0; i<nals_ori; i++) call->ac[i] = 0;     // nals_new<=nals_ori, never mind setting extra 0's
 
     // Set all genotypes to 0/0 or 0
     int *gts    = call->gts;
@@ -801,34 +744,27 @@ static void mcall_set_ref_genotypes(call_t *call, int nals)
     }
 }
 
-static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+static void mcall_call_genotypes(call_t *call, int nals_ori, smpl_grp_t *grp)
 {
     int ia, ib, i;
-    int ngts  = nals*(nals+1)/2;
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
-    int nout_gts = nout_als*(nout_als+1)/2;
-    hts_expand(float,nout_gts*nsmpl,call->nGPs,call->GPs);
-
-    for (i=0; i<nout_als; i++) call->ac[i] = 0;
-    call->nhets = 0;
-    call->ndiploid = 0;
+    int ngts_ori = nals_ori*(nals_ori+1)/2; 
+    int ngts_new = call->nals_new*(call->nals_new+1)/2;
+    int nsmpl = grp->nsmpl;
 
     #if USE_PRIOR_FOR_GTS
         float prior = exp(call->theta);
     #endif
-    float *gps  = call->GPs - nout_gts;
-    double *pdg = call->pdg - ngts;
-    int *gts  = call->gts - 2;
 
-    int isample;
-    for (isample = 0; isample < nsmpl; isample++)
+    int is;
+    for (is = 0; is < nsmpl; is++)
     {
-        int ploidy = call->ploidy ? call->ploidy[isample] : 2;
-        assert( ploidy>=0 && ploidy<=2 );
+        int ismpl   = grp->smpl[is];
+        double *pdg = call->pdg + ismpl*ngts_ori;
+        float *gps  = call->GPs + ismpl*ngts_new;
+        int *gts    = call->gts + ismpl*2;
 
-        pdg += ngts;
-        gts += 2;
-        gps += nout_gts;
+        int ploidy = call->ploidy ? call->ploidy[ismpl] : 2;
+        assert( ploidy>=0 && ploidy<=2 );
 
         if ( !ploidy )
         {
@@ -840,8 +776,8 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
 
         #if !FLAT_PDG_FOR_MISSING
             // Skip samples with zero depth, they have all pdg's equal to 0
-            for (i=0; i<ngts; i++) if ( pdg[i]!=0.0 ) break;
-            if ( i==ngts )
+            for (i=0; i<ngts_ori; i++) if ( pdg[i]!=0.0 ) break;
+            if ( i==ngts_ori )
             {
                 gts[0] = bcf_gt_missing;
                 gts[1] = ploidy==2 ? bcf_gt_missing : bcf_int32_vector_end;
@@ -850,19 +786,16 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
             }
         #endif
 
-        if ( ploidy==2 ) call->ndiploid++;
-
         // Default fallback for the case all LKs are the same
         gts[0] = bcf_gt_unphased(0);
         gts[1] = ploidy==2 ? bcf_gt_unphased(0) : bcf_int32_vector_end;
 
         // Non-zero depth, determine the most likely genotype
-        grp1_t *grp = &call->smpl_grp.grp[call->smpl_grp.smpl2grp[isample]];
         double best_lk = 0;
-        for (ia=0; ia<nals; ia++)
+        for (ia=0; ia<nals_ori; ia++)
         {
-            if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
-            int iaa = (ia+1)*(ia+2)/2-1;            // PL index of the ia/ia genotype
+            if ( !(grp->als & 1<<ia) ) continue;    // ia-th allele not in the final selection, skip
+            int iaa = (ia+1)*(ia+2)/2-1;                // PL index of the ia/ia genotype
             double lk = ploidy==2 ? pdg[iaa]*grp->qsum[ia]*grp->qsum[ia] : pdg[iaa]*grp->qsum[ia];
             #if USE_PRIOR_FOR_GTS
                 if ( ia!=0 ) lk *= prior;
@@ -878,13 +811,13 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         if ( ploidy==2 )
         {
             gts[1] = gts[0];
-            for (ia=0; ia<nals; ia++)
+            for (ia=0; ia<nals_ori; ia++)
             {
-                if ( !(out_als & 1<<ia) ) continue;
+                if ( !(grp->als & 1<<ia) ) continue;
                 int iaa = (ia+1)*(ia+2)/2-1;
                 for (ib=0; ib<ia; ib++)
                 {
-                    if ( !(out_als & 1<<ib) ) continue;
+                    if ( !(grp->als & 1<<ib) ) continue;
                     int iab = iaa - ia + ib;
                     double lk = 2*pdg[iab]*grp->qsum[ia]*grp->qsum[ib];
                     #if USE_PRIOR_FOR_GTS
@@ -901,7 +834,6 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
                     }
                 }
             }
-            if ( gts[0] != gts[1] ) call->nhets++;
         }
         else
             gts[1] = bcf_int32_vector_end;
@@ -909,55 +841,50 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
         call->ac[ bcf_gt_allele(gts[0]) ]++;
         if ( gts[1]!=bcf_int32_vector_end ) call->ac[ bcf_gt_allele(gts[1]) ]++;
     }
-    if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
+    if ( !(call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP)) ) return;
+    double max, sum;
+    for (is=0; is<nsmpl; is++)
     {
-        double max, sum;
-        for (isample=0; isample<nsmpl; isample++)
-        {
-            gps = call->GPs + isample*nout_gts;
+        int ismpl  = grp->smpl[is];
+        float *gps = call->GPs + ismpl*ngts_new;
 
-            int nmax;
-            if ( call->ploidy )
-            {
-                if ( call->ploidy[isample]==2 ) nmax = nout_gts;
-                else if ( call->ploidy[isample]==1 ) nmax = nout_als;
-                else nmax = 0;
-            }
-            else nmax = nout_gts;
+        int nmax;
+        if ( call->ploidy )
+        {
+            if ( call->ploidy[ismpl]==2 ) nmax = ngts_new;
+            else if ( call->ploidy[ismpl]==1 ) nmax = grp->nals;
+            else nmax = 0;
+        }
+        else nmax = ngts_new;
 
-            max = gps[0];
-            if ( max<0 || nmax==0 )
-            {
-                // no call
-                if ( call->output_tags & CALL_FMT_GP )
-                {
-                    for (i=0; i<nmax; i++) gps[i] = 0;
-                    if ( nmax==0 ) { bcf_float_set_missing(gps[i]); nmax++; }
-                    if ( nmax < nout_gts ) bcf_float_set_vector_end(gps[nmax]);
-                }
-                call->GQs[isample] = 0;
-                continue;
-            }
-            sum = gps[0];
-            for (i=1; i<nmax; i++)
-            {
-                if ( max < gps[i] ) max = gps[i];
-                sum += gps[i];
-            }
-            max = -4.34294*log(1 - max/sum);
-            call->GQs[isample] = max<=INT8_MAX ? max : INT8_MAX;
+        max = gps[0];
+        if ( max<0 || nmax==0 )
+        {
+            // no call
             if ( call->output_tags & CALL_FMT_GP )
             {
-                assert( max );
-                for (i=0; i<nmax; i++) gps[i] = (int)(-4.34294*log(gps[i]/sum));
-                if ( nmax < nout_gts ) bcf_float_set_vector_end(gps[nmax]);
+                for (i=0; i<nmax; i++) gps[i] = 0;
+                if ( nmax==0 ) { bcf_float_set_missing(gps[i]); nmax++; }
+                if ( nmax < ngts_new ) bcf_float_set_vector_end(gps[nmax]);
             }
+            call->GQs[ismpl] = 0;
+            continue;
+        }
+        sum = gps[0];
+        for (i=1; i<nmax; i++)
+        {
+            if ( max < gps[i] ) max = gps[i];
+            sum += gps[i];
+        }
+        max = -4.34294*log(1 - max/sum);
+        call->GQs[ismpl] = max<=INT8_MAX ? max : INT8_MAX;
+        if ( call->output_tags & CALL_FMT_GP )
+        {
+            assert( max );
+            for (i=0; i<nmax; i++) gps[i] = gps[i]/sum;
+            for (; i<ngts_new; i++) bcf_float_set_vector_end(gps[i]);
         }
     }
-    if ( call->output_tags & CALL_FMT_GP )
-        bcf_update_format_float(call->hdr, rec, "GP", call->GPs, nsmpl*nout_gts);
-    if ( call->output_tags & CALL_FMT_GQ )
-        bcf_update_format_int32(call->hdr, rec, "GQ", call->GQs, nsmpl);
 }
 
 
@@ -980,12 +907,13 @@ static void mcall_call_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_a
     Individual qualities are calculated as
         GQ(F=i,M=j,K=k) = P(F=i,M=j,K=k) / \sum_{x,y} P(F=i,M=x,K=y)
  */
-static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+#if 0
+static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int nals_new, int als_new)
 {
     int ia, ib, i;
     int nsmpl    = bcf_hdr_nsamples(call->hdr);
     int ngts     = nals*(nals+1)/2;
-    int nout_gts = nout_als*(nout_als+1)/2;
+    int nout_gts = nals_new*(nals_new+1)/2;
     double *gls  = call->GLs - nout_gts;
     double *pdg  = call->pdg - ngts;
 
@@ -1015,7 +943,7 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         double best_lk = 0;
         for (ia=0; ia<nals; ia++)
         {
-            if ( !(out_als & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
+            if ( !(als_new & 1<<ia) ) continue;     // ia-th allele not in the final selection, skip
             int iaa   = bcf_alleles2gt(ia,ia);      // PL index of the ia/ia genotype
             int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ia]);
             double lk = ploidy==2 ? pdg[iaa]*grp->qsum[ia]*grp->qsum[ia] : pdg[iaa]*grp->qsum[ia];
@@ -1031,10 +959,10 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         {
             for (ia=0; ia<nals; ia++)
             {
-                if ( !(out_als & 1<<ia) ) continue;
+                if ( !(als_new & 1<<ia) ) continue;
                 for (ib=0; ib<ia; ib++)
                 {
-                    if ( !(out_als & 1<<ib) ) continue;
+                    if ( !(als_new & 1<<ib) ) continue;
                     int iab   = bcf_alleles2gt(ia,ib);
                     int idx   = bcf_alleles2gt(call->als_map[ia],call->als_map[ib]);
                     double lk = 2*pdg[iab]*grp->qsum[ia]*grp->qsum[ib];
@@ -1078,8 +1006,8 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
     for (ifm=0; ifm<call->nfams; ifm++)
     {
         family_t *fam = &call->fams[ifm];
-        int ntrio = call->ntrio[fam->type][nout_als];
-        uint16_t *trio = call->trio[fam->type][nout_als];
+        int ntrio = call->ntrio[fam->type][nals_new];
+        uint16_t *trio = call->trio[fam->type][nals_new];
 
         // Unconstrained likelihood
         int uc_itr = 0;
@@ -1227,11 +1155,12 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
         bcf_update_format_int32(call->hdr,rec,"CGT",call->cgts,nsmpl);
     }
 }
+#endif
 
-static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+static void mcall_trim_and_update_PLs(call_t *call, bcf1_t *rec, int nals_ori, int nals_new)
 {
-    int ngts  = nals*(nals+1)/2;
-    int npls_src = ngts, npls_dst = nout_als*(nout_als+1)/2;     // number of PL values in diploid samples, ori and new
+    int npls_src = nals_ori*(nals_ori+1)/2;
+    int npls_dst = nals_new*(nals_new+1)/2;     // number of PL values in diploid samples, ori and new
     if ( call->all_diploid && npls_src == npls_dst ) return;
 
     int *pls_src = call->PLs, *pls_dst = call->PLs;
@@ -1248,7 +1177,7 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
         }
         else if ( ploidy==1 )
         {
-            for (ia=0; ia<nout_als; ia++)
+            for (ia=0; ia<nals_new; ia++)
             {
                 int isrc = (ia+1)*(ia+2)/2-1;
                 pls_dst[ia] = pls_src[ call->pl_map[isrc] ];
@@ -1258,7 +1187,7 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
         else
         {
             pls_dst[0] = bcf_int32_missing;
-            pls_dst[1] = bcf_int32_vector_end;  // relying on nout_als>1 in mcall()
+            pls_dst[1] = bcf_int32_vector_end;  // relying on nals_new>1 in mcall()
         }
         pls_src += npls_src;
         pls_dst += npls_dst;
@@ -1266,9 +1195,9 @@ static void mcall_trim_PLs(call_t *call, bcf1_t *rec, int nals, int nout_als, in
     bcf_update_format_int32(call->hdr, rec, "PL", call->PLs, npls_dst*nsmpl);
 }
 
-void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int out_als)
+void mcall_trim_and_update_numberR(call_t *call, bcf1_t *rec, int nals_ori, int nals_new)
 {
-    if ( nals==nout_als ) return;
+    if ( nals_ori==nals_new ) return;
 
     int i,j, nret, size = sizeof(float);
 
@@ -1287,17 +1216,17 @@ void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int o
         nret = bcf_get_info_values(call->hdr, rec, key, &tmp_ori, &ntmp_ori, type);
         if ( nret<=0 ) continue;
 
-        if ( nout_als==1 )
+        if ( nals_new==1 )
             bcf_update_info_int32(call->hdr, rec, key, tmp_ori, 1);     // has to be the REF, the order could not change
         else
         {
-            for (j=0; j<nals; j++)
+            for (j=0; j<nals_ori; j++)
             {
                 int k = call->als_map[j];
                 if ( k==-1 ) continue;   // to be dropped
                 memcpy((char *)tmp_new+size*k, (char *)tmp_ori+size*j, size);
             }
-            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nout_als);
+            bcf_update_info_int32(call->hdr, rec, key, tmp_new, nals_new);
         }
     }
 
@@ -1314,21 +1243,21 @@ void mcall_trim_numberR(call_t *call, bcf1_t *rec, int nals, int nout_als, int o
         if (nret<=0) continue;
         int nsmpl = bcf_hdr_nsamples(call->hdr);
 
-        assert( nret==nals*nsmpl );
+        assert( nret==nals_ori*nsmpl );
 
         for (j=0; j<nsmpl; j++)
         {
-            char *ptr_src = (char *)tmp_ori + j*nals*size;
-            char *ptr_dst = (char *)tmp_new + j*nout_als*size;
+            char *ptr_src = (char *)tmp_ori + j*nals_ori*size;
+            char *ptr_dst = (char *)tmp_new + j*nals_new*size;
             int k;
-            for (k=0; k<nals; k++)
+            for (k=0; k<nals_ori; k++)
             {
                 int l = call->als_map[k];
                 if ( l==-1 ) continue;   // to be dropped
                 memcpy(ptr_dst+size*l, ptr_src+size*k, size);
             }
         }
-        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nout_als*nsmpl);
+        bcf_update_format_int32(call->hdr, rec, key, tmp_new, nals_new*nsmpl);
     }
 
     call->PLs    = (int32_t*) tmp_new;
@@ -1443,12 +1372,12 @@ static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     }
     bcf_update_format_int32(call->hdr, rec, "PL", call->itmp, npls_new*nsmpl);
 
-    // update QS
-    int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp.grp[0].qsum, &call->smpl_grp.grp[0].nqsum);
-    hts_expand(float,nals,call->nqsum,call->qsum);
+    // update QS, use temporarily call->GPs to store the values
+    int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp[0].qsum, &call->smpl_grp[0].nqsum);
+    hts_expand(float,nals,call->nGPs,call->GPs);
     for (i=0; i<nals; i++)
-        call->qsum[i] = call->als_map[i]<nqs ? call->smpl_grp.grp[0].qsum[call->als_map[i]] : 0;
-    bcf_update_info_float(call->hdr, rec, "QS", call->qsum, nals);
+        call->GPs[i] = call->als_map[i]<nqs ? call->smpl_grp[0].qsum[call->als_map[i]] : 0;
+    bcf_update_info_float(call->hdr, rec, "QS", call->GPs, nals);
 
     // update any Number=R tags
     void *tmp_ori = call->itmp, *tmp_new = call->PLs;  // reusing PLs storage which is not used at this point
@@ -1489,7 +1418,6 @@ static int mcall_constrain_alleles(call_t *call, bcf1_t *rec, int *unseen)
     call->itmp   = (int32_t*) tmp_ori;
     call->n_itmp = ntmp_ori;
 
-
     if ( *unseen ) *unseen = nals-1;
     return 0;
 }
@@ -1508,203 +1436,229 @@ int mcall(call_t *call, bcf1_t *rec)
     // Force alleles when calling genotypes given alleles was requested
     if ( call->flag & CALL_CONSTR_ALLELES && mcall_constrain_alleles(call, rec, &unseen)!=0 ) return -2;
 
-    int nsmpl = bcf_hdr_nsamples(call->hdr);
-    int nals  = rec->n_allele;
-    hts_expand(int,nals,call->nac,call->ac);
-    hts_expand(int,nals,call->nals_map,call->als_map);
-    hts_expand(int,nals*(nals+1)/2,call->npl_map,call->pl_map);
+    int nsmpl    = bcf_hdr_nsamples(call->hdr);
+    int nals_ori = rec->n_allele;
+    hts_expand(int,nals_ori,call->nac,call->ac);
+    hts_expand(int,nals_ori,call->nals_map,call->als_map);
+    hts_expand(int,nals_ori*(nals_ori+1)/2,call->npl_map,call->pl_map);
 
     // Get the genotype likelihoods
     call->nPLs = bcf_get_format_int32(call->hdr, rec, "PL", &call->PLs, &call->mPLs);
-    if ( call->nPLs!=nsmpl*nals*(nals+1)/2 && call->nPLs!=nsmpl*nals )  // a mixture of diploid and haploid or haploid only
-        error("Wrong number of PL fields? nals=%d npl=%d\n", nals,call->nPLs);
+    if ( call->nPLs!=nsmpl*nals_ori*(nals_ori+1)/2 && call->nPLs!=nsmpl*nals_ori )  // a mixture of diploid and haploid or haploid only
+        error("Wrong number of PL fields? nals=%d npl=%d\n", nals_ori,call->nPLs);
 
     // Convert PLs to probabilities
-    int ngts = nals*(nals+1)/2;
+    int ngts_ori = nals_ori*(nals_ori+1)/2;
     hts_expand(double, call->nPLs, call->npdg, call->pdg);
-    set_pdg(call->pl2p, call->PLs, call->pdg, nsmpl, ngts, unseen);
+    set_pdg(call->pl2p, call->PLs, call->pdg, nsmpl, ngts_ori, unseen);
 
     // Get sum of qualities, serves as an AF estimate, f_x = QS/N in Eq. 1 in call-m math notes.
-    if ( call->smpl_grp.ngrp == 1  )
+    if ( call->nsmpl_grp == 1  )
     {
-        int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp.grp[0].qsum, &call->smpl_grp.grp[0].nqsum);
+        int nqs = bcf_get_info_float(call->hdr, rec, "QS", &call->smpl_grp[0].qsum, &call->smpl_grp[0].nqsum);
         if ( nqs<=0 ) error("The QS annotation not present at %s:%d\n", bcf_seqname(call->hdr,rec),rec->pos+1);
-        if ( nqs < nals )
+        if ( nqs < nals_ori )
         {
             // Some of the listed alleles do not have the corresponding QS field. This is
             // typically ref-only site with <*> in ALT.
-            hts_expand(float,nals,call->smpl_grp.grp[0].nqsum,call->smpl_grp.grp[0].qsum);
-            for (i=nqs; i<nals; i++) call->smpl_grp.grp[0].qsum[i] = 0;
+            hts_expand(float,nals_ori,call->smpl_grp[0].nqsum,call->smpl_grp[0].qsum);
+            for (i=nqs; i<nals_ori; i++) call->smpl_grp[0].qsum[i] = 0;
         }
     }
     else
     {
-        for (j=0; j<call->smpl_grp.ngrp; j++)
+        for (j=0; j<call->nsmpl_grp; j++)
         {
-            hts_expand(float,nals,call->smpl_grp.grp[j].nqsum,call->smpl_grp.grp[j].qsum);
-            memset(call->smpl_grp.grp[j].qsum, 0, sizeof(float)*nals);
+            hts_expand(float,nals_ori,call->smpl_grp[j].nqsum,call->smpl_grp[j].qsum);
+            memset(call->smpl_grp[j].qsum, 0, sizeof(float)*nals_ori);
         }
 
-        int nad = bcf_get_format_int32(call->hdr, rec, "AD", &call->ADs, &call->nADs);
-        if ( nad<1 ) error("Error: FORMAT/AD is required with the -G option, mpileup must be run with -a AD\n");
+        // Use FORMAT/AD or FORMAT/QS
+        int nad = bcf_get_format_int32(call->hdr, rec, call->sample_groups_tag, &call->ADs, &call->nADs);
+        if ( nad<1 ) error("Error: FORMAT/%s is required with the -G option, mpileup must be run with \"-a AD\" or \"-a QS\"\n",call->sample_groups_tag);
         nad /= bcf_hdr_nsamples(call->hdr);
-        hts_expand(float,nals,call->nqsum,call->qsum);
-        float qsum = 0;
-        for (i=0; i<bcf_hdr_nsamples(call->hdr); i++)
+        for (i=0; i<call->nsmpl_grp; i++)
         {
-            int32_t *ptr = call->ADs + i*nad;
-            for (j=0; j<nad; j++)
+            int is;
+            smpl_grp_t *grp = &call->smpl_grp[i];
+            hts_expand(float,nals_ori,grp->nqsum,grp->qsum);
+            for (j=0; j<nals_ori; j++) grp->qsum[j] = 0;
+            for (is=0; is<grp->nsmpl; is++)
             {
-                if ( ptr[j]==bcf_int32_vector_end ) break;
-                if ( ptr[j]==bcf_int32_missing ) call->qsum[j] = 0;
-                else { call->qsum[j] = ptr[j]; qsum += ptr[j]; }
+                int ismpl = grp->smpl[is];
+                int32_t *ptr = call->ADs + ismpl*nad;
+                float sum = 0;
+                for (j=0; j<nad; j++)
+                {
+                    if ( ptr[j]==bcf_int32_vector_end ) break;
+                    if ( ptr[j]!=bcf_int32_missing ) sum += ptr[j];
+                }
+                if ( sum )
+                {
+                    for (j=0; j<nad; j++)
+                    {
+                        if ( ptr[j]==bcf_int32_vector_end ) break;
+                        if ( ptr[j]!=bcf_int32_missing ) grp->qsum[j] += ptr[j]/sum;
+                    }
+                }
             }
-            for (; j<nals; j++) call->qsum[j] = 0;
-            if ( qsum ) 
-                for (j=0; j<nals; j++) call->qsum[j] /= qsum;
-
-            grp1_t *grp = &call->smpl_grp.grp[call->smpl_grp.smpl2grp[i]];
-            for (j=0; j<nals; j++)
-                grp->qsum[j] += call->qsum[j];
         }
     }
 
     // If available, take into account reference panel AFs
     if ( call->prior_AN && bcf_get_info_int32(call->hdr, rec, call->prior_AN ,&call->ac, &call->nac)==1 )
     {
-        int an = call->ac[0];
-        if ( bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals-1 )
+        int an = call->ac[0];   // number of alleles total, procede only if not zero; reuse call->ac
+        if ( an > 0 && bcf_get_info_int32(call->hdr, rec, call->prior_AC ,&call->ac, &call->nac)==nals_ori-1 )    // number of ALT alleles
         {
-            int ac0 = an;   // number of alleles in the reference population
-            for (i=0; i<nals-1; i++)
+            int ac0 = an;       // this will become the number of REFs
+            for (i=0; i<nals_ori-1; i++)
             {
                 if ( call->ac[i]==bcf_int32_vector_end ) break;
                 if ( call->ac[i]==bcf_int32_missing ) continue;
                 ac0 -= call->ac[i];
-                for (j=0; j<call->smpl_grp.ngrp; j++)
-                    call->smpl_grp.grp[j].qsum[i+1] += call->ac[i]*0.5;
+
+                // here an*0.5 is the number of samples in the populatio and ac*0.5 is the AF weighted by the number of samples
+                for (j=0; j<call->nsmpl_grp; j++)
+                    call->smpl_grp[j].qsum[i+1] = (call->smpl_grp[j].qsum[i+1] + 0.5*call->ac[i]) / (call->smpl_grp[j].nsmpl + 0.5*an);
             }
             if ( ac0<0 ) error("Incorrect %s,%s values at %s:%d\n", call->prior_AN,call->prior_AC,bcf_seqname(call->hdr,rec),rec->pos+1);
-            for (j=0; j<call->smpl_grp.ngrp; j++)
-                call->smpl_grp.grp[j].qsum[0] += ac0*0.5;
-            for (i=0; i<nals; i++)
-            {
-                for (j=0; j<call->smpl_grp.ngrp; j++)
-                    call->smpl_grp.grp[j].qsum[i] /= nsmpl + 0.5*an;
-            }
+            for (j=0; j<call->nsmpl_grp; j++)
+                call->smpl_grp[j].qsum[0] = (call->smpl_grp[j].qsum[0] + 0.5*ac0) / (call->smpl_grp[j].nsmpl + 0.5*an);
         }
     }
 
-    for (j=0; j<call->smpl_grp.ngrp; j++)
+    // normalize so that QS sums to 1 for each group
+    for (j=0; j<call->nsmpl_grp; j++)
     {
-        float qsum_tot = 0;
-        for (i=0; i<nals; i++) qsum_tot += call->smpl_grp.grp[j].qsum[i];
-        if ( qsum_tot ) for (i=0; i<nals; i++) call->smpl_grp.grp[j].qsum[i] /= qsum_tot;
+        float sum = 0;
+        for (i=0; i<nals_ori; i++) sum += call->smpl_grp[j].qsum[i];
+        if ( sum ) for (i=0; i<nals_ori; i++) call->smpl_grp[j].qsum[i] /= sum;
     }
 
     bcf_update_info_int32(call->hdr, rec, "QS", NULL, 0);      // remove QS tag
 
-    // Find the best combination of alleles
-    int out_als, nout;
-    if ( nals > 8*sizeof(out_als) )
+    if ( nals_ori > 8*sizeof(call->als_new) )
     { 
         fprintf(bcftools_stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
         return 0; 
     }
-    nout = mcall_find_best_alleles(call, nals, &out_als);
 
-    // Make sure the REF allele is always present
-    if ( !(out_als&1) )
+    // For each group find the best combination of alleles
+    call->als_new = 0;
+    double ref_lk = -HUGE_VAL, lk_sum = -HUGE_VAL, max_qual = -HUGE_VAL;
+    for (j=0; j<call->nsmpl_grp; j++)
     {
-        out_als |= 1;
-        nout++;
+        smpl_grp_t *grp = &call->smpl_grp[j];
+        mcall_find_best_alleles(call, nals_ori, grp);
+        call->als_new |= grp->als;
+        if ( grp->max_lk==-HUGE_VAL ) continue;
+        double qual = -4.343*(grp->ref_lk - logsumexp2(grp->lk_sum,grp->ref_lk));
+        if ( max_qual < qual )
+        {
+            max_qual = qual;
+            lk_sum = grp->lk_sum;
+            ref_lk = grp->ref_lk;
+        }
     }
-    int is_variant = out_als==1 ? 0 : 1;
+
+    // Make sure the REF allele is always present
+    if ( !(call->als_new&1) ) call->als_new |= 1;
+
+    int is_variant = call->als_new==1 ? 0 : 1;
     if ( call->flag & CALL_VARONLY && !is_variant ) return 0;
 
-    // With -A, keep all ALTs except X
-    if ( call->flag & CALL_KEEPALT )
+    call->nals_new = 0;
+    for (i=0; i<nals_ori; i++)
     {
-        nout = 0;
-        for (i=0; i<nals; i++)
-        {
-            if ( i>0 && i==unseen ) continue;
-            out_als |= 1<<i;
-            nout++;
-        }
+        if ( i>0 && i==unseen ) continue;
+        if ( call->flag & CALL_KEEPALT ) call->als_new |= 1<<i;
+        if ( call->als_new & (1<<i) ) call->nals_new++;
     }
 
+    init_allele_trimming_maps(call,nals_ori,call->als_new);
+
     int nAC = 0;
-    if ( out_als==1 )   // only REF allele on output
+    if ( call->als_new==1 )   // only REF allele on output
     {
-        init_allele_trimming_maps(call, 1, nals);
-        mcall_set_ref_genotypes(call,nals);
+        mcall_set_ref_genotypes(call,nals_ori);
         bcf_update_format_int32(call->hdr, rec, "PL", NULL, 0);    // remove PL, useless now
     }
+    else if ( !is_variant )
+    {
+        mcall_set_ref_genotypes(call,nals_ori);     // running with -A, prevent mcall_call_genotypes from putting some ALT back
+        mcall_trim_and_update_PLs(call, rec, nals_ori, call->nals_new);
+    }
     else
     {
         // The most likely set of alleles includes non-reference allele (or was enforced), call genotypes.
         // Note that it is a valid outcome if the called genotypes exclude some of the ALTs.
-        init_allele_trimming_maps(call, out_als, nals);
-        if ( !is_variant )
-            mcall_set_ref_genotypes(call,nals);     // running with -A, prevent mcall_call_genotypes from putting some ALT back
-        else if ( call->flag & CALL_CONSTR_TRIO )
+        int ngts_new = call->nals_new*(call->nals_new+1)/2;
+        hts_expand(float,ngts_new*nsmpl,call->nGPs,call->GPs);
+        for (i=0; i<call->nals_new; i++) call->ac[i] = 0;
+
+        if ( call->flag & CALL_CONSTR_TRIO && call->nals_new>4 )
+        { 
+            fprintf(bcftools_stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
+            return 0; 
+        }
+        if ( call->output_tags & (CALL_FMT_GQ|CALL_FMT_GP) )
         {
-            if ( nout>4 ) 
-            { 
-                fprintf(bcftools_stderr,"Too many alleles at %s:%"PRId64", skipping.\n", bcf_seqname(call->hdr,rec),(int64_t) rec->pos+1);
-                return 0; 
-            }
-            mcall_call_trio_genotypes(call, rec, nals,nout,out_als);
+            memset(call->GPs,0,nsmpl*ngts_new*sizeof(*call->GPs));
+            memset(call->GQs,0,nsmpl*sizeof(*call->GQs));
+        }
+        for (i=0; i<call->nsmpl_grp; i++)
+        {
+            if ( call->flag & CALL_CONSTR_TRIO )
+                error("todo: constrained trio calling temporarily disabled\n");   //mcall_call_trio_genotypes(call,rec,nals,&call->smpl_grp[i]);
+            else
+                mcall_call_genotypes(call,nals_ori,&call->smpl_grp[i]);
         }
-        else
-            mcall_call_genotypes(call,rec,nals,nout,out_als);
 
         // Skip the site if all samples are 0/0. This can happen occasionally.
-        nAC = 0;
-        for (i=1; i<nout; i++) nAC += call->ac[i];
+        for (i=1; i<call->nals_new; i++) nAC += call->ac[i];
         if ( !nAC && call->flag & CALL_VARONLY ) return 0;
-        mcall_trim_PLs(call, rec, nals, nout, out_als);
+
+        if ( call->output_tags & CALL_FMT_GP )
+            bcf_update_format_float(call->hdr, rec, "GP", call->GPs, nsmpl*ngts_new);
+        if ( call->output_tags & CALL_FMT_GQ )
+            bcf_update_format_int32(call->hdr, rec, "GQ", call->GQs, nsmpl);
+
+        mcall_trim_and_update_PLs(call,rec,nals_ori,call->nals_new);
     }
-    if ( nals!=nout ) mcall_trim_numberR(call, rec, nals, nout, out_als);
+    if ( nals_ori!=call->nals_new )
+        mcall_trim_and_update_numberR(call,rec,nals_ori,call->nals_new);
 
-    // Set QUAL and calculate HWE-related annotations
+    // Set QUAL
     if ( nAC )
     {
-        float icb = calc_ICB(call->ac[0],nAC, call->nhets, call->ndiploid);
-        if ( icb != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "ICB", &icb, 1);
-
-        float hob = calc_HOB(call->ac[0],nAC, call->nhets, call->ndiploid);
-        if ( hob != HUGE_VAL ) bcf_update_info_float(call->hdr, rec, "HOB", &hob, 1);
-
         // Quality of a variant site. fabs() to avoid negative zeros in VCF output when CALL_KEEPALT is set
-        rec->qual = -4.343*(call->ref_lk - logsumexp2(call->lk_sum,call->ref_lk));
+        rec->qual = max_qual;
     }
     else
     {
         // Set the quality of a REF site
-        if ( call->lk_sum==-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+        if ( lk_sum!=-HUGE_VAL )  // no support from (high quality) reads, so QUAL=1-prior
+            rec->qual = -4.343*(lk_sum - logsumexp2(lk_sum,ref_lk));
+        else if ( call->ac[0] )
             rec->qual = call->theta ? -4.343*call->theta : 0;
         else
-            rec->qual = -4.343*(call->lk_sum - logsumexp2(call->lk_sum,call->ref_lk));
+            bcf_float_set_missing(rec->qual);
     }
 
-    if ( rec->qual>999 ) rec->qual = 999;
-    if ( rec->qual>50 ) rec->qual = rint(rec->qual);
-
     // AC, AN
-    if ( nout>1 ) bcf_update_info_int32(call->hdr, rec, "AC", call->ac+1, nout-1);
+    if ( call->nals_new>1 ) bcf_update_info_int32(call->hdr, rec, "AC", call->ac+1, call->nals_new-1);
     nAC += call->ac[0];
     bcf_update_info_int32(call->hdr, rec, "AN", &nAC, 1);
 
     // Remove unused alleles
-    hts_expand(char*,nout,call->nals,call->als);
-    for (i=0; i<nals; i++)
+    hts_expand(char*,call->nals_new,call->nals,call->als);
+    for (i=0; i<nals_ori; i++)
         if ( call->als_map[i]>=0 ) call->als[call->als_map[i]] = rec->d.allele[i];
-    bcf_update_alleles(call->hdr, rec, (const char**)call->als, nout);
+    bcf_update_alleles(call->hdr, rec, (const char**)call->als, call->nals_new);
     bcf_update_genotypes(call->hdr, rec, call->gts, nsmpl*2);
 
-    // DP4 tag
+    // DP4 and PV4 tags
     if ( bcf_get_info_float(call->hdr, rec, "I16", &call->anno16, &call->n16)==16 )
     {
         int32_t dp[4]; dp[0] = call->anno16[0]; dp[1] = call->anno16[1]; dp[2] = call->anno16[2]; dp[3] = call->anno16[3];
@@ -1712,10 +1666,22 @@ int mcall(call_t *call, bcf1_t *rec)
 
         int32_t mq = (call->anno16[8]+call->anno16[10])/(call->anno16[0]+call->anno16[1]+call->anno16[2]+call->anno16[3]);
         bcf_update_info_int32(call->hdr, rec, "MQ", &mq, 1);
+
+        if ( call->output_tags & CALL_FMT_PV4 )
+        {
+            anno16_t a;
+            float tmpf[4];
+            int is_tested = test16(call->anno16, &a) >= 0 && a.is_tested ? 1 : 0;
+            if ( is_tested ) 
+            {
+                for (i=0; i<4; i++) tmpf[i] = a.p[i];
+                bcf_update_info_float(call->hdr, rec, "PV4", tmpf, 4);
+            }
+        }
     }
 
     bcf_update_info_int32(call->hdr, rec, "I16", NULL, 0);     // remove I16 tag
 
-    return nout;
+    return call->nals_new;
 }
 
diff --git a/bcftools/mpileup.c b/bcftools/mpileup.c
index c621b4c..1f40eff 100644
--- a/bcftools/mpileup.c
+++ b/bcftools/mpileup.c
@@ -1,6 +1,6 @@
 /*  mpileup.c -- mpileup subcommand. Previously bam_plcmd.c from samtools
 
-    Copyright (C) 2008-2018 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -39,6 +39,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <htslib/faidx.h>
 #include <htslib/kstring.h>
 #include <htslib/khash_str2int.h>
+#include <htslib/hts_os.h>
 #include <assert.h>
 #include "regidx.h"
 #include "bcftools.h"
@@ -59,16 +60,19 @@ DEALINGS IN THE SOFTWARE.  */
 #define MPLP_PRINT_MAPQ (1<<10)
 #define MPLP_PER_SAMPLE (1<<11)
 #define MPLP_SMART_OVERLAPS (1<<12)
+#define MPLP_REALN_PARTIAL  (1<<13)
 
 typedef struct _mplp_aux_t mplp_aux_t;
 typedef struct _mplp_pileup_t mplp_pileup_t;
 
 // Data shared by all bam files
 typedef struct {
-    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int min_mq, flag, min_baseQ, max_baseQ, delta_baseQ, capQ_thres, max_depth,
+        max_indel_depth, max_read_len, fmt_flag, ambig_reads;
     int rflag_require, rflag_filter, output_type;
     int openQ, extQ, tandemQ, min_support; // for indels
     double min_frac; // for indels
+    double indel_bias;
     char *reg_fname, *pl_list, *fai_fname, *output_fname;
     int reg_is_file, record_cmd_line, n_threads;
     faidx_t *fai;
@@ -231,7 +235,46 @@ static int mplp_func(void *data, bam1_t *b)
             has_ref = 0;
         }
 
-        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        // Allow sufficient room for bam_aux_append of ZQ tag without
+        // a realloc and consequent breakage of pileup's cached pointers.
+        if (has_ref && (ma->conf->flag &MPLP_REALN) && !bam_aux_get(b, "ZQ")) {
+            // Doing sam_prob_realn later is problematic as it adds to
+            // the tag list (ZQ or BQ), which causes a realloc of b->data.
+            // This happens after pileup has built a hash table on the
+            // read name.  It's a deficiency in pileup IMO.
+
+            // We could implement a new sam_prob_realn that returns ZQ
+            // somewhere else and cache it ourselves (pileup clientdata),
+            // but for now we simply use a workaround.
+            //
+            // We create a fake tag of the correct length, which we remove
+            // just prior calling sam_prob_realn so we can guarantee there is
+            // room. (We can't just make room now as bam_copy1 removes it
+            // again).
+            if (b->core.l_qseq > 500) {
+                uint8_t *ZQ = malloc((uint32_t)b->core.l_qseq+1);
+                memset(ZQ, '@', b->core.l_qseq);
+                ZQ[b->core.l_qseq] = 0;
+                bam_aux_append(b, "_Q", 'Z', b->core.l_qseq+1, ZQ);
+                free(ZQ);
+            } else {
+                static uint8_t ZQ[501] =
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
+                ZQ[b->core.l_qseq] = 0;
+                bam_aux_append(b, "_Q", 'Z', b->core.l_qseq+1, ZQ);
+                ZQ[b->core.l_qseq] = '@';
+            }
+        }
+
         if (has_ref && ma->conf->capQ_thres > 10) {
             int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
             if (q < 0) continue;    // skip
@@ -257,18 +300,46 @@ static int mplp_func(void *data, bam1_t *b)
 static int pileup_constructor(void *data, const bam1_t *b, bam_pileup_cd *cd)
 {
     mplp_aux_t *ma = (mplp_aux_t *)data;
-    cd->i = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b) << 1;
-    if ( ma->conf->fmt_flag & (B2B_INFO_SCR|B2B_FMT_SCR) )
-    {
-        int i;
-        for (i=0; i<b->core.n_cigar; i++)
-        {
-            int cig = bam_get_cigar(b)[i] & BAM_CIGAR_MASK;
-            if ( cig!=BAM_CSOFT_CLIP ) continue;
-            cd->i |= 1;
+    int n = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b);
+    cd->i = 0;
+    PLP_SET_SAMPLE_ID(cd->i, n);
+    // Whether read has a soft-clip is used in mplp_realn's heuristics.
+    // TODO: consider whether clip length is beneficial to use?
+    int i;
+    for (i=0; i<b->core.n_cigar; i++) {
+        int cig = bam_get_cigar(b)[i] & BAM_CIGAR_MASK;
+        if (cig == BAM_CSOFT_CLIP) {
+            PLP_SET_SOFT_CLIP(cd->i);
             break;
         }
     }
+
+    if (ma->conf->flag & MPLP_REALN) {
+        int i, tot_ins = 0;
+        uint32_t *cigar = bam_get_cigar(b);
+        int p = 0;
+        for (i=0; i<b->core.n_cigar; i++) {
+            int cig = cigar[i] & BAM_CIGAR_MASK;
+            if (bam_cigar_type(cig) & 2)
+                p += cigar[i] >> BAM_CIGAR_SHIFT;
+            if (cig == BAM_CINS || cig == BAM_CDEL || cig == BAM_CREF_SKIP) {
+                tot_ins += cigar[i] >> BAM_CIGAR_SHIFT;
+                // Possible further optimsation, check tot_ins==1 later
+                // (and remove break) so we can detect single bp indels.
+                // We may want to focus BAQ on more complex regions only.
+                PLP_SET_INDEL(cd->i);
+                break;
+            }
+
+            // TODO: proper p->cd struct and have cd->i as a size rather
+            // than a flag.
+
+            // Then aggregate together the sizes and if just 1 size for all
+            // reads or 2 sizes for approx 50/50 split in all reads, then
+            // treat this as a well-aligned variant and don't run BAQ.
+        }
+    }
+
     return 0;
 }
 
@@ -282,7 +353,7 @@ static void group_smpl(mplp_pileup_t *m, bam_smpl_t *bsmpl, int n, int *n_plp, c
         {
             const bam_pileup1_t *p = plp[i] + j;
             int id = PLP_SAMPLE_ID(p->cd.i);
-            if (m->n_plp[id] == m->m_plp[id]) 
+            if (m->n_plp[id] == m->m_plp[id])
             {
                 m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
                 m->plp[id] = (bam_pileup1_t*) realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
@@ -317,6 +388,150 @@ static void flush_bcf_records(mplp_conf_t *conf, htsFile *fp, bcf_hdr_t *hdr, bc
     if ( rec && bcf_write1(fp,hdr,rec)!=0 ) error("[%s] Error: failed to write the record to %s\n", __func__,conf->output_fname?conf->output_fname:"standard output");
 }
 
+/*
+ * Loops for an indel at this position.
+ *
+ * Only reads that overlap an indel loci get realigned.  This considerably
+ * reduces the cost of running BAQ while keeping the main benefits.
+ *
+ * TODO: also consider only realigning reads that don't span the indel
+ * by more than a certain amount either-side.  Ie focus BAQ only on reads
+ * ending adjacent to the indel, where the alignment is most likely to
+ * be wrong.  (2nd TODO: do this based on sequence context; STRs bad, unique
+ * data good.)
+ *
+ * NB: this may sadly realign after we've already used the data.  Hmm...
+ */
+static void mplp_realn(int n, int *n_plp, const bam_pileup1_t **plp,
+                       int flag, int max_read_len,
+                       char *ref, int ref_len, int pos) {
+    int i, j, has_indel = 0, has_clip = 0, nt = 0;
+    int min_indel = INT_MAX, max_indel = INT_MIN;
+
+    // Is an indel present.
+    // NB: don't bother even checking if very long as almost guaranteed
+    // to have indel (and likely soft-clips too).
+    for (i = 0; i < n; i++) { // iterate over bams
+        nt += n_plp[i];
+        for (j = 0; j < n_plp[i]; j++) { // iterate over reads
+            bam_pileup1_t *p = (bam_pileup1_t *)plp[i] + j;
+            has_indel += (PLP_HAS_INDEL(p->cd.i) || p->indel) ? 1 : 0;
+            // Has_clip is almost always true for very long reads
+            // (eg PacBio CCS), but these rarely matter as the clip
+            // is likely a long way from this indel.
+            has_clip  += (PLP_HAS_SOFT_CLIP(p->cd.i))         ? 1 : 0;
+            if (max_indel < p->indel)
+                max_indel = p->indel;
+            if (min_indel > p->indel)
+                min_indel = p->indel;
+        }
+    }
+
+    if (flag & MPLP_REALN_PARTIAL) {
+        if (has_indel == 0 ||
+            (has_clip < 0.2*nt && max_indel == min_indel &&
+             (has_indel < 0.1*nt /*|| has_indel > 0.9*nt*/ || has_indel == 1)))
+            return;
+    }
+
+    // Realign
+    for (i = 0; i < n; i++) { // iterate over bams
+        for (j = 0; j < n_plp[i]; j++) { // iterate over reads
+            const bam_pileup1_t *p = plp[i] + j;
+            bam1_t *b = p->b;
+
+            // Avoid doing multiple times.
+            //
+            // Note we cannot modify p->cd.i here with a PLP_SET macro
+            // because the cd item is held by mpileup in an lbnode_t
+            // struct and copied over to the pileup struct for each
+            // iteration, essentially making p->cd.i read only.
+            //
+            // We could use our own structure (p->cd.p), allocated during
+            // the constructor, but for simplicity we play dirty and
+            // abuse an unused flag bit instead.
+            if (b->core.flag & 32768)
+                continue;
+            b->core.flag |= 32768;
+
+            if (b->core.l_qseq > max_read_len)
+                continue;
+
+            // Check p->cigar_ind and see what cigar elements are before
+            // and after.  How close is this location to the end of the
+            // read?  Only realign if we don't span by more than X bases.
+            //
+            // Again, best only done on deeper data as BAQ helps
+            // disproportionately more on shallow data sets.
+            //
+            // This rescues some of the false negatives that are caused by
+            // systematic reduction in quality due to sample vs ref alignment.
+
+// At deep coverage we skip realigning more reads as we have sufficient depth.
+// This rescues for false negatives.  At shallow depth we pay for this with
+// more FP so are more stringent on spanning size.
+#define REALN_DIST (40+10*(nt<40)+10*(nt<20))
+            uint32_t *cig = bam_get_cigar(b);
+            int ncig = b->core.n_cigar;
+
+            // Don't realign reads where indel is in middle?
+            // On long read data we don't care about soft-clips at the ends.
+            // For short read data, we always calc BAQ on these as they're
+            // a common source of false positives.
+            if ((flag & MPLP_REALN_PARTIAL) && nt > 15 && ncig > 1) {
+                // Left & right cigar op match.
+                int lr = b->core.l_qseq > 500;
+                int lm = 0, rm = 0, k;
+                for (k = 0; k < ncig; k++) {
+                    int cop = bam_cigar_op(cig[k]);
+                    if (lr && (cop == BAM_CHARD_CLIP || cop == BAM_CSOFT_CLIP))
+                        continue;
+
+                    if (cop == BAM_CMATCH || cop == BAM_CDIFF ||
+                        cop == BAM_CEQUAL)
+                        lm += bam_cigar_oplen(cig[k]);
+                    else
+                        break;
+                }
+
+                for (k = ncig-1; k >= 0; k--) {
+                    int cop = bam_cigar_op(cig[k]);
+                    if (lr && (cop == BAM_CHARD_CLIP || cop == BAM_CSOFT_CLIP))
+                        continue;
+
+                    if (cop == BAM_CMATCH || cop == BAM_CDIFF ||
+                        cop == BAM_CEQUAL)
+                        rm += bam_cigar_oplen(cig[k]);
+                    else
+                        break;
+                }
+
+                if (lm >= REALN_DIST*4 && rm >= REALN_DIST*4)
+                    continue;
+
+                if (lm >= REALN_DIST && rm >= REALN_DIST &&
+                    has_clip < (0.15+0.05*(nt>20))*nt)
+                    continue;
+            }
+
+            if (b->core.l_qseq > 500) {
+                // don't do BAQ on long-read data if it's going to
+                // cause us to have a large band-with and costly in CPU
+                int rl = bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b));
+                if (abs(rl - b->core.l_qseq) * b->core.l_qseq >= 500000)
+                    continue;
+            }
+
+            // Fudge: make room for ZQ tag.
+            uint8_t *_Q = bam_aux_get(b, "_Q");
+            if (_Q) bam_aux_del(b, _Q);
+            sam_prob_realn(b, ref, ref_len, (flag & MPLP_REDO_BAQ) ? 7 : 3);
+        }
+    }
+
+    return;
+}
+
 static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
 {
     bam_hdr_t *hdr = conf->mplp_data[0]->h; // header of first file in input list
@@ -324,7 +539,7 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
     int ret, i, tid, pos, ref_len;
     char *ref;
 
-    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0) 
+    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0)
     {
         if ( pos<beg || pos>end ) continue;
         if ( conf->bed && tid >= 0 )
@@ -333,7 +548,10 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
             if ( !conf->bed_logic ) overlap = overlap ? 0 : 1;
             if ( !overlap ) continue;
         }
-        mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+        int has_ref = mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+        if (has_ref && (conf->flag & MPLP_REALN))
+            mplp_realn(conf->nfiles, conf->n_plp, conf->plp, conf->flag,
+                       conf->max_read_len, ref, ref_len, pos);
 
         int total_depth, _ref0, ref16;
         for (i = total_depth = 0; i < conf->nfiles; ++i) total_depth += conf->n_plp[i];
@@ -346,18 +564,19 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
         conf->bc.tid = tid; conf->bc.pos = pos;
         bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, ref16, &conf->bc);
         bcf_clear1(conf->bcf_rec);
-        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, 0, 0);
+        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag,
+                     conf->bca, 0);
         flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
 
         // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
         // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them
-        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth 
-            && bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0)
+        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth
+            && (bcf_callaux_clean(conf->bca, &conf->bc),
+                bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0))
         {
-            bcf_callaux_clean(conf->bca, &conf->bc);
             for (i = 0; i < conf->gplp->n; ++i)
                 bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], -1, conf->bca, conf->bcr + i);
-            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0) 
+            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0)
             {
                 bcf_clear1(conf->bcf_rec);
                 bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, conf->bca, ref);
@@ -461,7 +680,7 @@ static int mpileup(mplp_conf_t *conf)
             conf->buf.l = 0;
             ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
             conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s);
-            if ( !conf->mplp_data[i]->iter ) 
+            if ( !conf->mplp_data[i]->iter )
             {
                 conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                 if ( conf->mplp_data[i]->iter ) {
@@ -487,15 +706,19 @@ static int mpileup(mplp_conf_t *conf)
             conf->mplp_data[i]->h = hdr;
         }
     }
+    if ( !hdr ) {
+        fprintf(stderr, "[%s] failed to find a file header with usable read groups\n", __func__);
+        exit(EXIT_FAILURE);
+    }
     // allocate data storage proportionate to number of samples being studied sm->n
     bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n);
     conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int));
     conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int));
-    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));  
+    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));
 
     fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles);
     // write the VCF header
-    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type));
+    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode2(conf->output_type,conf->output_fname));
     if (conf->bcf_fp == NULL) {
         fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
         exit(EXIT_FAILURE);
@@ -542,11 +765,24 @@ static int mpileup(mplp_conf_t *conf)
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
     if ( conf->fmt_flag&B2B_INFO_VDB )
         bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
-    if ( conf->fmt_flag&B2B_INFO_RPB )
-        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+
+    if (conf->fmt_flag & B2B_INFO_ZSCORE) {
+        if ( conf->fmt_flag&B2B_INFO_RPB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Read Position Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Mapping Quality Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Base Quality Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Mapping Quality vs Strand Bias (closer to 0 is better)\">");
+        if ( conf->fmt_flag&B2B_INFO_SCB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SCBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Soft-Clip Length Bias (closer to 0 is better)\">");
+    } else {
+        if ( conf->fmt_flag&B2B_INFO_RPB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+    }
+
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=FS,Number=1,Type=Float,Description=\"Phred-scaled p-value using Fisher's exact test to detect strand bias\">");
 #if CDF_MWU_TESTS
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
@@ -576,6 +812,8 @@ static int mpileup(mplp_conf_t *conf)
         bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand (high-quality bases)\">");
     if ( conf->fmt_flag&B2B_FMT_ADR )
         bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand (high-quality bases)\">");
+    if ( conf->fmt_flag&B2B_FMT_QS )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=QS,Number=R,Type=Integer,Description=\"Phred-score allele quality sum used by `call -mG` and `+trio-dnm`\">");
     if ( conf->fmt_flag&B2B_INFO_AD )
         bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths (high-quality bases)\">");
     if ( conf->fmt_flag&B2B_INFO_ADF )
@@ -595,17 +833,23 @@ static int mpileup(mplp_conf_t *conf)
         bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]);
     if ( bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr)!=0 ) error("[%s] Error: failed to write the header to %s\n",__func__,conf->output_fname?conf->output_fname:"standard output");
 
-    conf->bca = bcf_call_init(-1., conf->min_baseQ);
+    conf->bca = bcf_call_init(-1., conf->min_baseQ, conf->max_baseQ,
+                              conf->delta_baseQ);
     conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t));
     conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ;
+    conf->bca->indel_bias = conf->indel_bias;
     conf->bca->min_frac = conf->min_frac;
     conf->bca->min_support = conf->min_support;
     conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
     conf->bca->fmt_flag = conf->fmt_flag;
+    conf->bca->ambig_reads = conf->ambig_reads;
 
     conf->bc.bcf_hdr = conf->bcf_hdr;
     conf->bc.n  = nsmpl;
     conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL));
+    conf->bc.QS = (int32_t*) malloc(nsmpl*sizeof(*conf->bc.QS)*B2B_MAX_ALLELES);
+    for (i=0; i<nsmpl; i++)
+        conf->bcr[i].QS = conf->bc.QS + i*B2B_MAX_ALLELES;
     if (conf->fmt_flag)
     {
         assert( sizeof(float)==sizeof(int32_t) );
@@ -643,7 +887,7 @@ static int mpileup(mplp_conf_t *conf)
     if ( nregs )
     {
         int ireg = 0;
-        do 
+        do
         {
             // first region is already positioned
             if ( ireg++ > 0 )
@@ -651,11 +895,11 @@ static int mpileup(mplp_conf_t *conf)
                 conf->buf.l = 0;
                 ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
 
-                for (i=0; i<conf->nfiles; i++) 
+                for (i=0; i<conf->nfiles; i++)
                 {
                     hts_itr_destroy(conf->mplp_data[i]->iter);
                     conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s);
-                    if ( !conf->mplp_data[i]->iter ) 
+                    if ( !conf->mplp_data[i]->iter )
                     {
                         conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                         if ( conf->mplp_data[i]->iter ) {
@@ -690,6 +934,7 @@ static int mpileup(mplp_conf_t *conf)
         free(conf->bc.ADR);
         free(conf->bc.ADF);
         free(conf->bc.SCR);
+        free(conf->bc.QS);
         free(conf->bc.fmt_arr);
         free(conf->bcr);
     }
@@ -793,10 +1038,12 @@ int parse_format_flag(const char *str)
         else if ( !strcasecmp(tags[i],"ADF") || !strcasecmp(tags[i],"FORMAT/ADF") || !strcasecmp(tags[i],"FMT/ADF") ) flag |= B2B_FMT_ADF;
         else if ( !strcasecmp(tags[i],"ADR") || !strcasecmp(tags[i],"FORMAT/ADR") || !strcasecmp(tags[i],"FMT/ADR") ) flag |= B2B_FMT_ADR;
         else if ( !strcasecmp(tags[i],"SCR") || !strcasecmp(tags[i],"FORMAT/SCR") || !strcasecmp(tags[i],"FMT/SCR") ) flag |= B2B_FMT_SCR;
+        else if ( !strcasecmp(tags[i],"QS") || !strcasecmp(tags[i],"FORMAT/QS") || !strcasecmp(tags[i],"FMT/QS") ) flag |= B2B_FMT_QS;
         else if ( !strcasecmp(tags[i],"INFO/SCR") ) flag |= B2B_INFO_SCR;
         else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
         else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
         else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
+        else if ( !strcasecmp(tags[i],"SCB") || !strcasecmp(tags[i],"INFO/SCB")) flag |= B2B_INFO_SCB;
         else
         {
             fprintf(stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
@@ -821,6 +1068,7 @@ static void list_annotations(FILE *fp)
 "  FORMAT/ADF .. Allelic depths on the forward strand (Number=R,Type=Integer)\n"
 "  FORMAT/ADR .. Allelic depths on the reverse strand (Number=R,Type=Integer)\n"
 "  FORMAT/DP  .. Number of high-quality bases (Number=1,Type=Integer)\n"
+"  FORMAT/QS  .. Allele phred-score quality sum for use with `call -mG` and +trio-dnm (Number=R,Type=Integer)\n"
 "  FORMAT/SP  .. Phred-scaled strand bias P-value (Number=1,Type=Integer)\n"
 "  FORMAT/SCR .. Number of soft-clipped reads (Number=1,Type=Integer)\n"
 "\n"
@@ -843,78 +1091,98 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     // source code in 80 columns, to the extent that's possible.)
 
     fprintf(fp,
-"\n"
-"Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
-"\n"
-"Input options:\n"
-"  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
-"  -A, --count-orphans     do not discard anomalous read pairs\n"
-"  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
-"  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
-"  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
-"  -d, --max-depth INT     max raw per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+        "\n"
+        "Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
+        "\n"
+        "Input options:\n"
+        "  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
+        "  -A, --count-orphans     do not discard anomalous read pairs\n"
+        "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
+        "  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
+        "  -C, --adjust-MQ INT     adjust mapping quality [0]\n"
+        "  -D, --full-BAQ          Apply BAQ everywhere, not just in problematic regions\n"
+        "  -d, --max-depth INT     max raw per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+            fprintf(fp,
+        "  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
+        "  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
+        "      --no-reference      do not require fasta reference file\n"
+        "  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
+        "  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
     fprintf(fp,
-"  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
-"  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
-"      --no-reference      do not require fasta reference file\n"
-"  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
-"  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
+        "  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
     fprintf(fp,
-"  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
+        "      --max-BQ INT        limit baseQ/BAQ to no more than INT [%d]\n", mplp->max_baseQ);
     fprintf(fp,
-"  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
-"  -R, --regions-file FILE restrict to regions listed in a file\n"
-"      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
-"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+        "      --delta-BQ INT      Use neighbour_qual + INT if less than qual [%d]\n", mplp->delta_baseQ);
     fprintf(fp,
-"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
-"                                            [%s]\n", tmp_filter);
+        "  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
+        "  -R, --regions-file FILE restrict to regions listed in a file\n"
+        "      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
+        "  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
     fprintf(fp,
-"  -s, --samples LIST      comma separated list of samples to include\n"
-"  -S, --samples-file FILE file of samples to include\n"
-"  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
-"  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
-"  -x, --ignore-overlaps   disable read-pair overlap detection\n"
-"\n"
-"Output options:\n"
-"  -a, --annotate LIST     optional tags to output; '?' to list []\n"
-"  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
-"                          to minimum per-sample DP\n"
-"      --no-version        do not append version and command line to the header\n"
-"  -o, --output FILE       write output to FILE [standard output]\n"
-"  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
-"                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
-"      --threads INT       use multithreading with INT worker threads [0]\n"
-"\n"
-"SNP/INDEL genotype likelihoods options:\n"
-"  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
+        "  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+        "                                            [%s]\n", tmp_filter);
     fprintf(fp,
-"  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
+        "  -s, --samples LIST      comma separated list of samples to include\n"
+        "  -S, --samples-file FILE file of samples to include\n"
+        "  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
+        "  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
+        "  -x, --ignore-overlaps   disable read-pair overlap detection\n"
+        "      --seed INT          random number seed used for sampling deep regions [0]\n"
+        "\n"
+        "Output options:\n"
+        "  -a, --annotate LIST     optional tags to output; '?' to list available tags []\n"
+        "  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
+        "                          to minimum per-sample DP\n"
+        "      --no-version        do not append version and command line to the header\n"
+        "  -o, --output FILE       write output to FILE [standard output]\n"
+        "  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
+        "                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
+        "  -U, --mwu-u             use older probability scale for Mann-Whitney U test\n"
+        "      --threads INT       use multithreading with INT worker threads [0]\n"
+        "\n"
+        "SNP/INDEL genotype likelihoods options:\n"
+        "  -X, --config STR        Specify platform specific profiles (see below)\n"
+        "  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
     fprintf(fp,
-"  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
+        "  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
     fprintf(fp,
-"  -I, --skip-indels       do not perform indel calling\n"
-"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+        "  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
     fprintf(fp,
-"  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
+        "  -I, --skip-indels       do not perform indel calling\n"
+        "  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
     fprintf(fp,
-"  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+        "  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
     fprintf(fp,
-"  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
-"  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
-"\n"
-"Notes: Assuming diploid individuals.\n"
-"\n"
-"Example:\n"
-"   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n"
-"   bcftools mpileup -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n"
-"\n");
+        "  -M, --max-read-len INT  maximum length of read to pass to BAQ algorithm [%d]\n", mplp->max_read_len);
+    fprintf(fp,
+        "  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+    fprintf(fp,
+        "  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
+        "  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
+        "  --ar, --ambig-reads STR   What to do with ambiguous indel reads: drop,incAD,incAD0 [drop]\n");
+    fprintf(fp,
+        "      --indel-bias FLOAT  Raise to favour recall over precision [%.2f]\n", mplp->indel_bias);
+    fprintf(fp,"\n");
+    fprintf(fp,
+        "Configuration profiles activated with -X, --config:\n"
+        "    1.12:        -Q13 -h100 -m1 -F0.002\n"
+        "    illumina:    [ default values ]\n"
+        "    ont:         -B -Q5 --max-BQ 30 -I [also try eg |bcftools call -P0.01]\n"
+        "    pacbio-ccs:  -D -Q5 --max-BQ 50 -F0.1 -o25 -e1 --delta-BQ 10 -M99999\n"
+        "\n"
+        "Notes: Assuming diploid individuals.\n"
+        "\n"
+        "Example:\n"
+        "   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n"
+        "   bcftools mpileup -Ou -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n"
+        "\n");
 
     free(tmp_require);
     free(tmp_filter);
 }
 
-int bam_mpileup(int argc, char *argv[])
+int main_mpileup(int argc, char *argv[])
 {
     int c;
     const char *file_list = NULL;
@@ -922,12 +1190,15 @@ int bam_mpileup(int argc, char *argv[])
     int nfiles = 0, use_orphan = 0, noref = 0;
     mplp_conf_t mplp;
     memset(&mplp, 0, sizeof(mplp_conf_t));
-    mplp.min_baseQ = 13;
+    mplp.min_baseQ = 1;
+    mplp.max_baseQ = 60;
+    mplp.delta_baseQ = 30;
     mplp.capQ_thres = 0;
     mplp.max_depth = 250; mplp.max_indel_depth = 250;
-    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
-    mplp.min_frac = 0.002; mplp.min_support = 1;
-    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
+    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 500;
+    mplp.min_frac = 0.05; mplp.indel_bias = 1.0; mplp.min_support = 2;
+    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_REALN_PARTIAL
+              | MPLP_SMART_OVERLAPS;
     mplp.argc = argc; mplp.argv = argv;
     mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
     mplp.output_fname = NULL;
@@ -935,7 +1206,11 @@ int bam_mpileup(int argc, char *argv[])
     mplp.record_cmd_line = 1;
     mplp.n_threads = 0;
     mplp.bsmpl = bam_smpl_init();
-    mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB;    // the default to be changed in future, see also parse_format_flag()
+    // the default to be changed in future, see also parse_format_flag()
+    mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB|B2B_INFO_SCB|B2B_INFO_ZSCORE;
+    mplp.max_read_len = 500;
+    mplp.ambig_reads = B2B_DROP;
+    hts_srand48(0);
 
     static const struct option lopts[] =
     {
@@ -956,6 +1231,8 @@ int bam_mpileup(int argc, char *argv[])
         {"bam-list", required_argument, NULL, 'b'},
         {"no-BAQ", no_argument, NULL, 'B'},
         {"no-baq", no_argument, NULL, 'B'},
+        {"full-BAQ", no_argument, NULL, 'D'},
+        {"full-baq", no_argument, NULL, 'D'},
         {"adjust-MQ", required_argument, NULL, 'C'},
         {"adjust-mq", required_argument, NULL, 'C'},
         {"max-depth", required_argument, NULL, 'd'},
@@ -972,6 +1249,9 @@ int bam_mpileup(int argc, char *argv[])
         {"min-mq", required_argument, NULL, 'q'},
         {"min-BQ", required_argument, NULL, 'Q'},
         {"min-bq", required_argument, NULL, 'Q'},
+        {"max-bq", required_argument, NULL, 11},
+        {"max-BQ", required_argument, NULL, 11},
+        {"delta-BQ", required_argument, NULL, 12},
         {"ignore-overlaps", no_argument, NULL, 'x'},
         {"output-type", required_argument, NULL, 'O'},
         {"samples", required_argument, NULL, 's'},
@@ -979,16 +1259,23 @@ int bam_mpileup(int argc, char *argv[])
         {"annotate", required_argument, NULL, 'a'},
         {"ext-prob", required_argument, NULL, 'e'},
         {"gap-frac", required_argument, NULL, 'F'},
+        {"indel-bias", required_argument, NULL, 10},
         {"tandem-qual", required_argument, NULL, 'h'},
         {"skip-indels", no_argument, NULL, 'I'},
         {"max-idepth", required_argument, NULL, 'L'},
-        {"min-ireads ", required_argument, NULL, 'm'},
+        {"min-ireads", required_argument, NULL, 'm'},
         {"per-sample-mF", no_argument, NULL, 'p'},
         {"per-sample-mf", no_argument, NULL, 'p'},
         {"platforms", required_argument, NULL, 'P'},
+        {"max-read-len", required_argument, NULL, 'M'},
+        {"config", required_argument, NULL, 'X'},
+        {"mwu-u", no_argument, NULL, 'U'},
+        {"seed", required_argument, NULL, 13},
+        {"ambig-reads", required_argument, NULL, 14},
+        {"ar", required_argument, NULL, 14},
         {NULL, 0, NULL, 0}
     };
-    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:BDd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:M:X:U",lopts,NULL)) >= 0) {
         switch (c) {
         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
         case  1 :
@@ -1040,23 +1327,26 @@ int bam_mpileup(int argc, char *argv[])
         case 'P': mplp.pl_list = strdup(optarg); break;
         case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
         case 'B': mplp.flag &= ~MPLP_REALN; break;
+        case 'D': mplp.flag &= ~MPLP_REALN_PARTIAL; break;
         case 'I': mplp.flag |= MPLP_NO_INDEL; break;
         case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
         case '6': mplp.flag |= MPLP_ILLUMINA13; break;
         case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
         case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
-        case 'O': 
+        case 'O':
             switch (optarg[0]) {
                 case 'b': mplp.output_type = FT_BCF_GZ; break;
                 case 'u': mplp.output_type = FT_BCF; break;
                 case 'z': mplp.output_type = FT_VCF_GZ; break;
                 case 'v': mplp.output_type = FT_VCF; break;
-                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); 
+                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n");
             }
             break;
         case 'C': mplp.capQ_thres = atoi(optarg); break;
         case 'q': mplp.min_mq = atoi(optarg); break;
         case 'Q': mplp.min_baseQ = atoi(optarg); break;
+        case  11: mplp.max_baseQ = atoi(optarg); break;
+        case  12: mplp.delta_baseQ = atoi(optarg); break;
         case 'b': file_list = optarg; break;
         case 'o': {
                 char *end;
@@ -1068,6 +1358,12 @@ int bam_mpileup(int argc, char *argv[])
             break;
         case 'e': mplp.extQ = atoi(optarg); break;
         case 'h': mplp.tandemQ = atoi(optarg); break;
+        case 10: // --indel-bias (inverted so higher => more indels called)
+            if (atof(optarg) < 1e-2)
+                mplp.indel_bias = 1/1e2;
+            else
+                mplp.indel_bias = 1/atof(optarg);
+            break;
         case 'A': use_orphan = 1; break;
         case 'F': mplp.min_frac = atof(optarg); break;
         case 'm': mplp.min_support = atoi(optarg); break;
@@ -1080,6 +1376,49 @@ int bam_mpileup(int argc, char *argv[])
             }
             mplp.fmt_flag |= parse_format_flag(optarg);
         break;
+        case 'M': mplp.max_read_len = atoi(optarg); break;
+        case 'U': mplp.fmt_flag &= ~B2B_INFO_ZSCORE; break;
+        case 'X':
+            if (strcasecmp(optarg, "pacbio-ccs") == 0) {
+                mplp.min_frac = 0.1;
+                mplp.min_baseQ = 5;
+                mplp.max_baseQ = 50;
+                mplp.delta_baseQ = 10;
+                mplp.openQ = 25;
+                mplp.extQ = 1;
+                mplp.flag |= MPLP_REALN_PARTIAL;
+                mplp.max_read_len = 99999;
+            } else if (strcasecmp(optarg, "ont") == 0) {
+                fprintf(stderr, "For ONT it may be beneficial to also run bcftools call with "
+                        "a higher -P, eg -P0.01 or -P 0.1\n");
+                mplp.min_baseQ = 5;
+                mplp.max_baseQ = 30;
+                mplp.flag &= ~MPLP_REALN;
+                mplp.flag |= MPLP_NO_INDEL;
+            } else if (strcasecmp(optarg, "1.12") == 0) {
+                // 1.12 and earlier
+                mplp.min_frac = 0.002;
+                mplp.min_support = 1;
+                mplp.min_baseQ = 13;
+                mplp.tandemQ = 100;
+                mplp.flag &= ~MPLP_REALN_PARTIAL;
+                mplp.flag |= MPLP_REALN;
+            } else if (strcasecmp(optarg, "illumina") == 0) {
+                mplp.flag |= MPLP_REALN_PARTIAL;
+            } else {
+                fprintf(stderr, "Unknown configuration name '%s'\n"
+                        "Please choose from 1.12, illumina, pacbio-ccs or ont\n",
+                        optarg);
+                return 1;
+            }
+            break;
+        case 13: hts_srand48(atoi(optarg)); break;
+        case 14:
+            if ( !strcasecmp(optarg,"drop") ) mplp.ambig_reads = B2B_DROP;
+            else if ( !strcasecmp(optarg,"incAD") ) mplp.ambig_reads = B2B_INC_AD;
+            else if ( !strcasecmp(optarg,"incAD0") ) mplp.ambig_reads = B2B_INC_AD0;
+            else error("The option to --ambig-reads not recognised: %s\n",optarg);
+            break;
         default:
             fprintf(stderr,"Invalid option: '%c'\n", c);
             return 1;
@@ -1120,7 +1459,7 @@ int bam_mpileup(int argc, char *argv[])
         return 1;
     }
     int ret,i;
-    if (file_list) 
+    if (file_list)
     {
         if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
         mplp.files  = fn;
@@ -1142,5 +1481,6 @@ int bam_mpileup(int argc, char *argv[])
     if (mplp.bed_itr) regitr_destroy(mplp.bed_itr);
     if (mplp.reg) regidx_destroy(mplp.reg);
     bam_smpl_destroy(mplp.bsmpl);
+
     return ret;
 }
diff --git a/bcftools/mpileup.c.pysam.c b/bcftools/mpileup.c.pysam.c
index 51fcf8b..c66c752 100644
--- a/bcftools/mpileup.c.pysam.c
+++ b/bcftools/mpileup.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  mpileup.c -- mpileup subcommand. Previously bam_plcmd.c from samtools
 
-    Copyright (C) 2008-2018 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -41,6 +41,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <htslib/faidx.h>
 #include <htslib/kstring.h>
 #include <htslib/khash_str2int.h>
+#include <htslib/hts_os.h>
 #include <assert.h>
 #include "regidx.h"
 #include "bcftools.h"
@@ -61,16 +62,19 @@ DEALINGS IN THE SOFTWARE.  */
 #define MPLP_PRINT_MAPQ (1<<10)
 #define MPLP_PER_SAMPLE (1<<11)
 #define MPLP_SMART_OVERLAPS (1<<12)
+#define MPLP_REALN_PARTIAL  (1<<13)
 
 typedef struct _mplp_aux_t mplp_aux_t;
 typedef struct _mplp_pileup_t mplp_pileup_t;
 
 // Data shared by all bam files
 typedef struct {
-    int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+    int min_mq, flag, min_baseQ, max_baseQ, delta_baseQ, capQ_thres, max_depth,
+        max_indel_depth, max_read_len, fmt_flag, ambig_reads;
     int rflag_require, rflag_filter, output_type;
     int openQ, extQ, tandemQ, min_support; // for indels
     double min_frac; // for indels
+    double indel_bias;
     char *reg_fname, *pl_list, *fai_fname, *output_fname;
     int reg_is_file, record_cmd_line, n_threads;
     faidx_t *fai;
@@ -233,7 +237,46 @@ static int mplp_func(void *data, bam1_t *b)
             has_ref = 0;
         }
 
-        if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
+        // Allow sufficient room for bam_aux_append of ZQ tag without
+        // a realloc and consequent breakage of pileup's cached pointers.
+        if (has_ref && (ma->conf->flag &MPLP_REALN) && !bam_aux_get(b, "ZQ")) {
+            // Doing sam_prob_realn later is problematic as it adds to
+            // the tag list (ZQ or BQ), which causes a realloc of b->data.
+            // This happens after pileup has built a hash table on the
+            // read name.  It's a deficiency in pileup IMO.
+
+            // We could implement a new sam_prob_realn that returns ZQ
+            // somewhere else and cache it ourselves (pileup clientdata),
+            // but for now we simply use a workaround.
+            //
+            // We create a fake tag of the correct length, which we remove
+            // just prior calling sam_prob_realn so we can guarantee there is
+            // room. (We can't just make room now as bam_copy1 removes it
+            // again).
+            if (b->core.l_qseq > 500) {
+                uint8_t *ZQ = malloc((uint32_t)b->core.l_qseq+1);
+                memset(ZQ, '@', b->core.l_qseq);
+                ZQ[b->core.l_qseq] = 0;
+                bam_aux_append(b, "_Q", 'Z', b->core.l_qseq+1, ZQ);
+                free(ZQ);
+            } else {
+                static uint8_t ZQ[501] =
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+                    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
+                ZQ[b->core.l_qseq] = 0;
+                bam_aux_append(b, "_Q", 'Z', b->core.l_qseq+1, ZQ);
+                ZQ[b->core.l_qseq] = '@';
+            }
+        }
+
         if (has_ref && ma->conf->capQ_thres > 10) {
             int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
             if (q < 0) continue;    // skip
@@ -259,18 +302,46 @@ static int mplp_func(void *data, bam1_t *b)
 static int pileup_constructor(void *data, const bam1_t *b, bam_pileup_cd *cd)
 {
     mplp_aux_t *ma = (mplp_aux_t *)data;
-    cd->i = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b) << 1;
-    if ( ma->conf->fmt_flag & (B2B_INFO_SCR|B2B_FMT_SCR) )
-    {
-        int i;
-        for (i=0; i<b->core.n_cigar; i++)
-        {
-            int cig = bam_get_cigar(b)[i] & BAM_CIGAR_MASK;
-            if ( cig!=BAM_CSOFT_CLIP ) continue;
-            cd->i |= 1;
+    int n = bam_smpl_get_sample_id(ma->conf->bsmpl, ma->bam_id, (bam1_t *)b);
+    cd->i = 0;
+    PLP_SET_SAMPLE_ID(cd->i, n);
+    // Whether read has a soft-clip is used in mplp_realn's heuristics.
+    // TODO: consider whether clip length is beneficial to use?
+    int i;
+    for (i=0; i<b->core.n_cigar; i++) {
+        int cig = bam_get_cigar(b)[i] & BAM_CIGAR_MASK;
+        if (cig == BAM_CSOFT_CLIP) {
+            PLP_SET_SOFT_CLIP(cd->i);
             break;
         }
     }
+
+    if (ma->conf->flag & MPLP_REALN) {
+        int i, tot_ins = 0;
+        uint32_t *cigar = bam_get_cigar(b);
+        int p = 0;
+        for (i=0; i<b->core.n_cigar; i++) {
+            int cig = cigar[i] & BAM_CIGAR_MASK;
+            if (bam_cigar_type(cig) & 2)
+                p += cigar[i] >> BAM_CIGAR_SHIFT;
+            if (cig == BAM_CINS || cig == BAM_CDEL || cig == BAM_CREF_SKIP) {
+                tot_ins += cigar[i] >> BAM_CIGAR_SHIFT;
+                // Possible further optimsation, check tot_ins==1 later
+                // (and remove break) so we can detect single bp indels.
+                // We may want to focus BAQ on more complex regions only.
+                PLP_SET_INDEL(cd->i);
+                break;
+            }
+
+            // TODO: proper p->cd struct and have cd->i as a size rather
+            // than a flag.
+
+            // Then aggregate together the sizes and if just 1 size for all
+            // reads or 2 sizes for approx 50/50 split in all reads, then
+            // treat this as a well-aligned variant and don't run BAQ.
+        }
+    }
+
     return 0;
 }
 
@@ -284,7 +355,7 @@ static void group_smpl(mplp_pileup_t *m, bam_smpl_t *bsmpl, int n, int *n_plp, c
         {
             const bam_pileup1_t *p = plp[i] + j;
             int id = PLP_SAMPLE_ID(p->cd.i);
-            if (m->n_plp[id] == m->m_plp[id]) 
+            if (m->n_plp[id] == m->m_plp[id])
             {
                 m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
                 m->plp[id] = (bam_pileup1_t*) realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
@@ -319,6 +390,150 @@ static void flush_bcf_records(mplp_conf_t *conf, htsFile *fp, bcf_hdr_t *hdr, bc
     if ( rec && bcf_write1(fp,hdr,rec)!=0 ) error("[%s] Error: failed to write the record to %s\n", __func__,conf->output_fname?conf->output_fname:"standard output");
 }
 
+/*
+ * Loops for an indel at this position.
+ *
+ * Only reads that overlap an indel loci get realigned.  This considerably
+ * reduces the cost of running BAQ while keeping the main benefits.
+ *
+ * TODO: also consider only realigning reads that don't span the indel
+ * by more than a certain amount either-side.  Ie focus BAQ only on reads
+ * ending adjacent to the indel, where the alignment is most likely to
+ * be wrong.  (2nd TODO: do this based on sequence context; STRs bad, unique
+ * data good.)
+ *
+ * NB: this may sadly realign after we've already used the data.  Hmm...
+ */
+static void mplp_realn(int n, int *n_plp, const bam_pileup1_t **plp,
+                       int flag, int max_read_len,
+                       char *ref, int ref_len, int pos) {
+    int i, j, has_indel = 0, has_clip = 0, nt = 0;
+    int min_indel = INT_MAX, max_indel = INT_MIN;
+
+    // Is an indel present.
+    // NB: don't bother even checking if very long as almost guaranteed
+    // to have indel (and likely soft-clips too).
+    for (i = 0; i < n; i++) { // iterate over bams
+        nt += n_plp[i];
+        for (j = 0; j < n_plp[i]; j++) { // iterate over reads
+            bam_pileup1_t *p = (bam_pileup1_t *)plp[i] + j;
+            has_indel += (PLP_HAS_INDEL(p->cd.i) || p->indel) ? 1 : 0;
+            // Has_clip is almost always true for very long reads
+            // (eg PacBio CCS), but these rarely matter as the clip
+            // is likely a long way from this indel.
+            has_clip  += (PLP_HAS_SOFT_CLIP(p->cd.i))         ? 1 : 0;
+            if (max_indel < p->indel)
+                max_indel = p->indel;
+            if (min_indel > p->indel)
+                min_indel = p->indel;
+        }
+    }
+
+    if (flag & MPLP_REALN_PARTIAL) {
+        if (has_indel == 0 ||
+            (has_clip < 0.2*nt && max_indel == min_indel &&
+             (has_indel < 0.1*nt /*|| has_indel > 0.9*nt*/ || has_indel == 1)))
+            return;
+    }
+
+    // Realign
+    for (i = 0; i < n; i++) { // iterate over bams
+        for (j = 0; j < n_plp[i]; j++) { // iterate over reads
+            const bam_pileup1_t *p = plp[i] + j;
+            bam1_t *b = p->b;
+
+            // Avoid doing multiple times.
+            //
+            // Note we cannot modify p->cd.i here with a PLP_SET macro
+            // because the cd item is held by mpileup in an lbnode_t
+            // struct and copied over to the pileup struct for each
+            // iteration, essentially making p->cd.i read only.
+            //
+            // We could use our own structure (p->cd.p), allocated during
+            // the constructor, but for simplicity we play dirty and
+            // abuse an unused flag bit instead.
+            if (b->core.flag & 32768)
+                continue;
+            b->core.flag |= 32768;
+
+            if (b->core.l_qseq > max_read_len)
+                continue;
+
+            // Check p->cigar_ind and see what cigar elements are before
+            // and after.  How close is this location to the end of the
+            // read?  Only realign if we don't span by more than X bases.
+            //
+            // Again, best only done on deeper data as BAQ helps
+            // disproportionately more on shallow data sets.
+            //
+            // This rescues some of the false negatives that are caused by
+            // systematic reduction in quality due to sample vs ref alignment.
+
+// At deep coverage we skip realigning more reads as we have sufficient depth.
+// This rescues for false negatives.  At shallow depth we pay for this with
+// more FP so are more stringent on spanning size.
+#define REALN_DIST (40+10*(nt<40)+10*(nt<20))
+            uint32_t *cig = bam_get_cigar(b);
+            int ncig = b->core.n_cigar;
+
+            // Don't realign reads where indel is in middle?
+            // On long read data we don't care about soft-clips at the ends.
+            // For short read data, we always calc BAQ on these as they're
+            // a common source of false positives.
+            if ((flag & MPLP_REALN_PARTIAL) && nt > 15 && ncig > 1) {
+                // Left & right cigar op match.
+                int lr = b->core.l_qseq > 500;
+                int lm = 0, rm = 0, k;
+                for (k = 0; k < ncig; k++) {
+                    int cop = bam_cigar_op(cig[k]);
+                    if (lr && (cop == BAM_CHARD_CLIP || cop == BAM_CSOFT_CLIP))
+                        continue;
+
+                    if (cop == BAM_CMATCH || cop == BAM_CDIFF ||
+                        cop == BAM_CEQUAL)
+                        lm += bam_cigar_oplen(cig[k]);
+                    else
+                        break;
+                }
+
+                for (k = ncig-1; k >= 0; k--) {
+                    int cop = bam_cigar_op(cig[k]);
+                    if (lr && (cop == BAM_CHARD_CLIP || cop == BAM_CSOFT_CLIP))
+                        continue;
+
+                    if (cop == BAM_CMATCH || cop == BAM_CDIFF ||
+                        cop == BAM_CEQUAL)
+                        rm += bam_cigar_oplen(cig[k]);
+                    else
+                        break;
+                }
+
+                if (lm >= REALN_DIST*4 && rm >= REALN_DIST*4)
+                    continue;
+
+                if (lm >= REALN_DIST && rm >= REALN_DIST &&
+                    has_clip < (0.15+0.05*(nt>20))*nt)
+                    continue;
+            }
+
+            if (b->core.l_qseq > 500) {
+                // don't do BAQ on long-read data if it's going to
+                // cause us to have a large band-with and costly in CPU
+                int rl = bam_cigar2rlen(b->core.n_cigar, bam_get_cigar(b));
+                if (abs(rl - b->core.l_qseq) * b->core.l_qseq >= 500000)
+                    continue;
+            }
+
+            // Fudge: make room for ZQ tag.
+            uint8_t *_Q = bam_aux_get(b, "_Q");
+            if (_Q) bam_aux_del(b, _Q);
+            sam_prob_realn(b, ref, ref_len, (flag & MPLP_REDO_BAQ) ? 7 : 3);
+        }
+    }
+
+    return;
+}
+
 static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
 {
     bam_hdr_t *hdr = conf->mplp_data[0]->h; // header of first file in input list
@@ -326,7 +541,7 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
     int ret, i, tid, pos, ref_len;
     char *ref;
 
-    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0) 
+    while ( (ret=bam_mplp_auto(conf->iter, &tid, &pos, conf->n_plp, conf->plp)) > 0)
     {
         if ( pos<beg || pos>end ) continue;
         if ( conf->bed && tid >= 0 )
@@ -335,7 +550,10 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
             if ( !conf->bed_logic ) overlap = overlap ? 0 : 1;
             if ( !overlap ) continue;
         }
-        mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+        int has_ref = mplp_get_ref(conf->mplp_data[0], tid, &ref, &ref_len);
+        if (has_ref && (conf->flag & MPLP_REALN))
+            mplp_realn(conf->nfiles, conf->n_plp, conf->plp, conf->flag,
+                       conf->max_read_len, ref, ref_len, pos);
 
         int total_depth, _ref0, ref16;
         for (i = total_depth = 0; i < conf->nfiles; ++i) total_depth += conf->n_plp[i];
@@ -348,18 +566,19 @@ static int mpileup_reg(mplp_conf_t *conf, uint32_t beg, uint32_t end)
         conf->bc.tid = tid; conf->bc.pos = pos;
         bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, ref16, &conf->bc);
         bcf_clear1(conf->bcf_rec);
-        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, 0, 0);
+        bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag,
+                     conf->bca, 0);
         flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, conf->bcf_rec);
 
         // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
         // check me: rghash in bcf_call_gap_prep() should have no effect, reads mplp_func already excludes them
-        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth 
-            && bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0)
+        if (!(conf->flag&MPLP_NO_INDEL) && total_depth < conf->max_indel_depth
+            && (bcf_callaux_clean(conf->bca, &conf->bc),
+                bcf_call_gap_prep(conf->gplp->n, conf->gplp->n_plp, conf->gplp->plp, pos, conf->bca, ref) >= 0))
         {
-            bcf_callaux_clean(conf->bca, &conf->bc);
             for (i = 0; i < conf->gplp->n; ++i)
                 bcf_call_glfgen(conf->gplp->n_plp[i], conf->gplp->plp[i], -1, conf->bca, conf->bcr + i);
-            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0) 
+            if (bcf_call_combine(conf->gplp->n, conf->bcr, conf->bca, -1, &conf->bc) >= 0)
             {
                 bcf_clear1(conf->bcf_rec);
                 bcf_call2bcf(&conf->bc, conf->bcf_rec, conf->bcr, conf->fmt_flag, conf->bca, ref);
@@ -374,7 +593,7 @@ static int mpileup(mplp_conf_t *conf)
 {
     if (conf->nfiles == 0) {
         fprintf(bcftools_stderr,"[%s] no input file/data given\n", __func__);
-        exit(EXIT_FAILURE);
+        bcftools_exit(EXIT_FAILURE);
     }
 
     mplp_ref_t mp_ref = MPLP_REF_INIT;
@@ -395,7 +614,7 @@ static int mpileup(mplp_conf_t *conf)
             conf->reg = regidx_init(conf->reg_fname,NULL,NULL,0,NULL);
             if ( !conf->reg ) {
                 fprintf(bcftools_stderr,"Could not parse the regions: %s\n", conf->reg_fname);
-                exit(EXIT_FAILURE);
+                bcftools_exit(EXIT_FAILURE);
             }
         }
         else
@@ -403,7 +622,7 @@ static int mpileup(mplp_conf_t *conf)
             conf->reg = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
             if ( regidx_insert_list(conf->reg,conf->reg_fname,',') !=0 ) {
                 fprintf(bcftools_stderr,"Could not parse the regions: %s\n", conf->reg_fname);
-                exit(EXIT_FAILURE);
+                bcftools_exit(EXIT_FAILURE);
             }
         }
         nregs = regidx_nregs(conf->reg);
@@ -422,23 +641,23 @@ static int mpileup(mplp_conf_t *conf)
         if ( !conf->mplp_data[i]->fp )
         {
             fprintf(bcftools_stderr, "[%s] failed to open %s: %s\n", __func__, conf->files[i], strerror(errno));
-            exit(EXIT_FAILURE);
+            bcftools_exit(EXIT_FAILURE);
         }
         if (hts_set_opt(conf->mplp_data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
             fprintf(bcftools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
-            exit(EXIT_FAILURE);
+            bcftools_exit(EXIT_FAILURE);
         }
         if (conf->fai_fname && hts_set_fai_filename(conf->mplp_data[i]->fp, conf->fai_fname) != 0) {
             fprintf(bcftools_stderr, "[%s] failed to process %s: %s\n",
                     __func__, conf->fai_fname, strerror(errno));
-            exit(EXIT_FAILURE);
+            bcftools_exit(EXIT_FAILURE);
         }
         conf->mplp_data[i]->conf = conf;
         conf->mplp_data[i]->ref = &mp_ref;
         h_tmp = sam_hdr_read(conf->mplp_data[i]->fp);
         if ( !h_tmp ) {
             fprintf(bcftools_stderr,"[%s] fail to read the header of %s\n", __func__, conf->files[i]);
-            exit(EXIT_FAILURE);
+            bcftools_exit(EXIT_FAILURE);
         }
         conf->mplp_data[i]->h = i ? hdr : h_tmp; // for j==0, "h" has not been set yet
         conf->mplp_data[i]->bam_id = bam_smpl_add_bam(conf->bsmpl,h_tmp->text,conf->files[i]);
@@ -458,20 +677,20 @@ static int mpileup(mplp_conf_t *conf)
             hts_idx_t *idx = sam_index_load(conf->mplp_data[i]->fp, conf->files[i]);
             if (idx == NULL) {
                 fprintf(bcftools_stderr, "[%s] fail to load index for %s\n", __func__, conf->files[i]);
-                exit(EXIT_FAILURE);
+                bcftools_exit(EXIT_FAILURE);
             }
             conf->buf.l = 0;
             ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
             conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s);
-            if ( !conf->mplp_data[i]->iter ) 
+            if ( !conf->mplp_data[i]->iter )
             {
                 conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                 if ( conf->mplp_data[i]->iter ) {
                     fprintf(bcftools_stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
-                    exit(EXIT_FAILURE);
+                    bcftools_exit(EXIT_FAILURE);
                 }
                 fprintf(bcftools_stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
-                exit(EXIT_FAILURE);
+                bcftools_exit(EXIT_FAILURE);
             }
             if ( nregs==1 ) // no need to keep the index in memory
                hts_idx_destroy(idx);
@@ -489,18 +708,22 @@ static int mpileup(mplp_conf_t *conf)
             conf->mplp_data[i]->h = hdr;
         }
     }
+    if ( !hdr ) {
+        fprintf(bcftools_stderr, "[%s] failed to find a file header with usable read groups\n", __func__);
+        bcftools_exit(EXIT_FAILURE);
+    }
     // allocate data storage proportionate to number of samples being studied sm->n
     bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n);
     conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int));
     conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int));
-    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));  
+    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));
 
     fprintf(bcftools_stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles);
     // write the VCF header
-    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type));
+    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode2(conf->output_type,conf->output_fname));
     if (conf->bcf_fp == NULL) {
         fprintf(bcftools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
-        exit(EXIT_FAILURE);
+        bcftools_exit(EXIT_FAILURE);
     }
     if ( conf->n_threads ) hts_set_threads(conf->bcf_fp, conf->n_threads);
 
@@ -544,11 +767,24 @@ static int mpileup(mplp_conf_t *conf)
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
     if ( conf->fmt_flag&B2B_INFO_VDB )
         bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
-    if ( conf->fmt_flag&B2B_INFO_RPB )
-        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
-    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+
+    if (conf->fmt_flag & B2B_INFO_ZSCORE) {
+        if ( conf->fmt_flag&B2B_INFO_RPB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Read Position Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Mapping Quality Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Base Quality Bias (closer to 0 is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Mapping Quality vs Strand Bias (closer to 0 is better)\">");
+        if ( conf->fmt_flag&B2B_INFO_SCB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SCBZ,Number=1,Type=Float,Description=\"Mann-Whitney U-z test of Soft-Clip Length Bias (closer to 0 is better)\">");
+    } else {
+        if ( conf->fmt_flag&B2B_INFO_RPB )
+            bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
+        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
+    }
+
+    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=FS,Number=1,Type=Float,Description=\"Phred-scaled p-value using Fisher's exact test to detect strand bias\">");
 #if CDF_MWU_TESTS
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
     bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
@@ -578,6 +814,8 @@ static int mpileup(mplp_conf_t *conf)
         bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand (high-quality bases)\">");
     if ( conf->fmt_flag&B2B_FMT_ADR )
         bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand (high-quality bases)\">");
+    if ( conf->fmt_flag&B2B_FMT_QS )
+        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=QS,Number=R,Type=Integer,Description=\"Phred-score allele quality sum used by `call -mG` and `+trio-dnm`\">");
     if ( conf->fmt_flag&B2B_INFO_AD )
         bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths (high-quality bases)\">");
     if ( conf->fmt_flag&B2B_INFO_ADF )
@@ -597,17 +835,23 @@ static int mpileup(mplp_conf_t *conf)
         bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]);
     if ( bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr)!=0 ) error("[%s] Error: failed to write the header to %s\n",__func__,conf->output_fname?conf->output_fname:"standard output");
 
-    conf->bca = bcf_call_init(-1., conf->min_baseQ);
+    conf->bca = bcf_call_init(-1., conf->min_baseQ, conf->max_baseQ,
+                              conf->delta_baseQ);
     conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t));
     conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ;
+    conf->bca->indel_bias = conf->indel_bias;
     conf->bca->min_frac = conf->min_frac;
     conf->bca->min_support = conf->min_support;
     conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
     conf->bca->fmt_flag = conf->fmt_flag;
+    conf->bca->ambig_reads = conf->ambig_reads;
 
     conf->bc.bcf_hdr = conf->bcf_hdr;
     conf->bc.n  = nsmpl;
     conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL));
+    conf->bc.QS = (int32_t*) malloc(nsmpl*sizeof(*conf->bc.QS)*B2B_MAX_ALLELES);
+    for (i=0; i<nsmpl; i++)
+        conf->bcr[i].QS = conf->bc.QS + i*B2B_MAX_ALLELES;
     if (conf->fmt_flag)
     {
         assert( sizeof(float)==sizeof(int32_t) );
@@ -645,7 +889,7 @@ static int mpileup(mplp_conf_t *conf)
     if ( nregs )
     {
         int ireg = 0;
-        do 
+        do
         {
             // first region is already positioned
             if ( ireg++ > 0 )
@@ -653,19 +897,19 @@ static int mpileup(mplp_conf_t *conf)
                 conf->buf.l = 0;
                 ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
 
-                for (i=0; i<conf->nfiles; i++) 
+                for (i=0; i<conf->nfiles; i++)
                 {
                     hts_itr_destroy(conf->mplp_data[i]->iter);
                     conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s);
-                    if ( !conf->mplp_data[i]->iter ) 
+                    if ( !conf->mplp_data[i]->iter )
                     {
                         conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                         if ( conf->mplp_data[i]->iter ) {
                             fprintf(bcftools_stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
-                            exit(EXIT_FAILURE);
+                            bcftools_exit(EXIT_FAILURE);
                         }
                         fprintf(bcftools_stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
-                        exit(EXIT_FAILURE);
+                        bcftools_exit(EXIT_FAILURE);
                     }
                     bam_mplp_reset(conf->iter);
                 }
@@ -692,6 +936,7 @@ static int mpileup(mplp_conf_t *conf)
         free(conf->bc.ADR);
         free(conf->bc.ADF);
         free(conf->bc.SCR);
+        free(conf->bc.QS);
         free(conf->bc.fmt_arr);
         free(conf->bcr);
     }
@@ -795,14 +1040,16 @@ int parse_format_flag(const char *str)
         else if ( !strcasecmp(tags[i],"ADF") || !strcasecmp(tags[i],"FORMAT/ADF") || !strcasecmp(tags[i],"FMT/ADF") ) flag |= B2B_FMT_ADF;
         else if ( !strcasecmp(tags[i],"ADR") || !strcasecmp(tags[i],"FORMAT/ADR") || !strcasecmp(tags[i],"FMT/ADR") ) flag |= B2B_FMT_ADR;
         else if ( !strcasecmp(tags[i],"SCR") || !strcasecmp(tags[i],"FORMAT/SCR") || !strcasecmp(tags[i],"FMT/SCR") ) flag |= B2B_FMT_SCR;
+        else if ( !strcasecmp(tags[i],"QS") || !strcasecmp(tags[i],"FORMAT/QS") || !strcasecmp(tags[i],"FMT/QS") ) flag |= B2B_FMT_QS;
         else if ( !strcasecmp(tags[i],"INFO/SCR") ) flag |= B2B_INFO_SCR;
         else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
         else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
         else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
+        else if ( !strcasecmp(tags[i],"SCB") || !strcasecmp(tags[i],"INFO/SCB")) flag |= B2B_INFO_SCB;
         else
         {
             fprintf(bcftools_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
-            exit(EXIT_FAILURE);
+            bcftools_exit(EXIT_FAILURE);
         }
         free(tags[i]);
     }
@@ -823,6 +1070,7 @@ static void list_annotations(FILE *fp)
 "  FORMAT/ADF .. Allelic depths on the forward strand (Number=R,Type=Integer)\n"
 "  FORMAT/ADR .. Allelic depths on the reverse strand (Number=R,Type=Integer)\n"
 "  FORMAT/DP  .. Number of high-quality bases (Number=1,Type=Integer)\n"
+"  FORMAT/QS  .. Allele phred-score quality sum for use with `call -mG` and +trio-dnm (Number=R,Type=Integer)\n"
 "  FORMAT/SP  .. Phred-scaled strand bias P-value (Number=1,Type=Integer)\n"
 "  FORMAT/SCR .. Number of soft-clipped reads (Number=1,Type=Integer)\n"
 "\n"
@@ -845,78 +1093,98 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     // source code in 80 columns, to the extent that's possible.)
 
     fprintf(fp,
-"\n"
-"Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
-"\n"
-"Input options:\n"
-"  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
-"  -A, --count-orphans     do not discard anomalous read pairs\n"
-"  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
-"  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
-"  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
-"  -d, --max-depth INT     max raw per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+        "\n"
+        "Usage: bcftools mpileup [options] in1.bam [in2.bam [...]]\n"
+        "\n"
+        "Input options:\n"
+        "  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
+        "  -A, --count-orphans     do not discard anomalous read pairs\n"
+        "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
+        "  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
+        "  -C, --adjust-MQ INT     adjust mapping quality [0]\n"
+        "  -D, --full-BAQ          Apply BAQ everywhere, not just in problematic regions\n"
+        "  -d, --max-depth INT     max raw per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+            fprintf(fp,
+        "  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
+        "  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
+        "      --no-reference      do not require fasta reference file\n"
+        "  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
+        "  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
     fprintf(fp,
-"  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
-"  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
-"      --no-reference      do not require fasta reference file\n"
-"  -G, --read-groups FILE  select or exclude read groups listed in the file\n"
-"  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
+        "  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
     fprintf(fp,
-"  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
+        "      --max-BQ INT        limit baseQ/BAQ to no more than INT [%d]\n", mplp->max_baseQ);
     fprintf(fp,
-"  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
-"  -R, --regions-file FILE restrict to regions listed in a file\n"
-"      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
-"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+        "      --delta-BQ INT      Use neighbour_qual + INT if less than qual [%d]\n", mplp->delta_baseQ);
     fprintf(fp,
-"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
-"                                            [%s]\n", tmp_filter);
+        "  -r, --regions REG[,...] comma separated list of regions in which pileup is generated\n"
+        "  -R, --regions-file FILE restrict to regions listed in a file\n"
+        "      --ignore-RG         ignore RG tags (one BAM = one sample)\n"
+        "  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
     fprintf(fp,
-"  -s, --samples LIST      comma separated list of samples to include\n"
-"  -S, --samples-file FILE file of samples to include\n"
-"  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
-"  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
-"  -x, --ignore-overlaps   disable read-pair overlap detection\n"
-"\n"
-"Output options:\n"
-"  -a, --annotate LIST     optional tags to output; '?' to list []\n"
-"  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
-"                          to minimum per-sample DP\n"
-"      --no-version        do not append version and command line to the header\n"
-"  -o, --output FILE       write output to FILE [standard output]\n"
-"  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
-"                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
-"      --threads INT       use multithreading with INT worker threads [0]\n"
-"\n"
-"SNP/INDEL genotype likelihoods options:\n"
-"  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
+        "  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+        "                                            [%s]\n", tmp_filter);
     fprintf(fp,
-"  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
+        "  -s, --samples LIST      comma separated list of samples to include\n"
+        "  -S, --samples-file FILE file of samples to include\n"
+        "  -t, --targets REG[,...] similar to -r but streams rather than index-jumps\n"
+        "  -T, --targets-file FILE similar to -R but streams rather than index-jumps\n"
+        "  -x, --ignore-overlaps   disable read-pair overlap detection\n"
+        "      --seed INT          random number seed used for sampling deep regions [0]\n"
+        "\n"
+        "Output options:\n"
+        "  -a, --annotate LIST     optional tags to output; '?' to list available tags []\n"
+        "  -g, --gvcf INT[,...]    group non-variant sites into gVCF blocks according\n"
+        "                          to minimum per-sample DP\n"
+        "      --no-version        do not append version and command line to the header\n"
+        "  -o, --output FILE       write output to FILE [standard output]\n"
+        "  -O, --output-type TYPE  'b' compressed BCF; 'u' uncompressed BCF;\n"
+        "                          'z' compressed VCF; 'v' uncompressed VCF [v]\n"
+        "  -U, --mwu-u             use older probability scale for Mann-Whitney U test\n"
+        "      --threads INT       use multithreading with INT worker threads [0]\n"
+        "\n"
+        "SNP/INDEL genotype likelihoods options:\n"
+        "  -X, --config STR        Specify platform specific profiles (see below)\n"
+        "  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [%d]\n", mplp->extQ);
     fprintf(fp,
-"  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
+        "  -F, --gap-frac FLOAT    minimum fraction of gapped reads [%g]\n", mplp->min_frac);
     fprintf(fp,
-"  -I, --skip-indels       do not perform indel calling\n"
-"  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+        "  -h, --tandem-qual INT   coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
     fprintf(fp,
-"  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
+        "  -I, --skip-indels       do not perform indel calling\n"
+        "  -L, --max-idepth INT    maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
     fprintf(fp,
-"  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+        "  -m, --min-ireads INT    minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
     fprintf(fp,
-"  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
-"  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
-"\n"
-"Notes: Assuming diploid individuals.\n"
-"\n"
-"Example:\n"
-"   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n"
-"   bcftools mpileup -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n"
-"\n");
+        "  -M, --max-read-len INT  maximum length of read to pass to BAQ algorithm [%d]\n", mplp->max_read_len);
+    fprintf(fp,
+        "  -o, --open-prob INT     Phred-scaled gap open seq error probability [%d]\n", mplp->openQ);
+    fprintf(fp,
+        "  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity\n"
+        "  -P, --platforms STR     comma separated list of platforms for indels [all]\n"
+        "  --ar, --ambig-reads STR   What to do with ambiguous indel reads: drop,incAD,incAD0 [drop]\n");
+    fprintf(fp,
+        "      --indel-bias FLOAT  Raise to favour recall over precision [%.2f]\n", mplp->indel_bias);
+    fprintf(fp,"\n");
+    fprintf(fp,
+        "Configuration profiles activated with -X, --config:\n"
+        "    1.12:        -Q13 -h100 -m1 -F0.002\n"
+        "    illumina:    [ default values ]\n"
+        "    ont:         -B -Q5 --max-BQ 30 -I [also try eg |bcftools call -P0.01]\n"
+        "    pacbio-ccs:  -D -Q5 --max-BQ 50 -F0.1 -o25 -e1 --delta-BQ 10 -M99999\n"
+        "\n"
+        "Notes: Assuming diploid individuals.\n"
+        "\n"
+        "Example:\n"
+        "   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n"
+        "   bcftools mpileup -Ou -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n"
+        "\n");
 
     free(tmp_require);
     free(tmp_filter);
 }
 
-int bam_mpileup(int argc, char *argv[])
+int main_mpileup(int argc, char *argv[])
 {
     int c;
     const char *file_list = NULL;
@@ -924,12 +1192,15 @@ int bam_mpileup(int argc, char *argv[])
     int nfiles = 0, use_orphan = 0, noref = 0;
     mplp_conf_t mplp;
     memset(&mplp, 0, sizeof(mplp_conf_t));
-    mplp.min_baseQ = 13;
+    mplp.min_baseQ = 1;
+    mplp.max_baseQ = 60;
+    mplp.delta_baseQ = 30;
     mplp.capQ_thres = 0;
     mplp.max_depth = 250; mplp.max_indel_depth = 250;
-    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
-    mplp.min_frac = 0.002; mplp.min_support = 1;
-    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
+    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 500;
+    mplp.min_frac = 0.05; mplp.indel_bias = 1.0; mplp.min_support = 2;
+    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_REALN_PARTIAL
+              | MPLP_SMART_OVERLAPS;
     mplp.argc = argc; mplp.argv = argv;
     mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
     mplp.output_fname = NULL;
@@ -937,7 +1208,11 @@ int bam_mpileup(int argc, char *argv[])
     mplp.record_cmd_line = 1;
     mplp.n_threads = 0;
     mplp.bsmpl = bam_smpl_init();
-    mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB;    // the default to be changed in future, see also parse_format_flag()
+    // the default to be changed in future, see also parse_format_flag()
+    mplp.fmt_flag = B2B_INFO_VDB|B2B_INFO_RPB|B2B_INFO_SCB|B2B_INFO_ZSCORE;
+    mplp.max_read_len = 500;
+    mplp.ambig_reads = B2B_DROP;
+    hts_srand48(0);
 
     static const struct option lopts[] =
     {
@@ -958,6 +1233,8 @@ int bam_mpileup(int argc, char *argv[])
         {"bam-list", required_argument, NULL, 'b'},
         {"no-BAQ", no_argument, NULL, 'B'},
         {"no-baq", no_argument, NULL, 'B'},
+        {"full-BAQ", no_argument, NULL, 'D'},
+        {"full-baq", no_argument, NULL, 'D'},
         {"adjust-MQ", required_argument, NULL, 'C'},
         {"adjust-mq", required_argument, NULL, 'C'},
         {"max-depth", required_argument, NULL, 'd'},
@@ -974,6 +1251,9 @@ int bam_mpileup(int argc, char *argv[])
         {"min-mq", required_argument, NULL, 'q'},
         {"min-BQ", required_argument, NULL, 'Q'},
         {"min-bq", required_argument, NULL, 'Q'},
+        {"max-bq", required_argument, NULL, 11},
+        {"max-BQ", required_argument, NULL, 11},
+        {"delta-BQ", required_argument, NULL, 12},
         {"ignore-overlaps", no_argument, NULL, 'x'},
         {"output-type", required_argument, NULL, 'O'},
         {"samples", required_argument, NULL, 's'},
@@ -981,16 +1261,23 @@ int bam_mpileup(int argc, char *argv[])
         {"annotate", required_argument, NULL, 'a'},
         {"ext-prob", required_argument, NULL, 'e'},
         {"gap-frac", required_argument, NULL, 'F'},
+        {"indel-bias", required_argument, NULL, 10},
         {"tandem-qual", required_argument, NULL, 'h'},
         {"skip-indels", no_argument, NULL, 'I'},
         {"max-idepth", required_argument, NULL, 'L'},
-        {"min-ireads ", required_argument, NULL, 'm'},
+        {"min-ireads", required_argument, NULL, 'm'},
         {"per-sample-mF", no_argument, NULL, 'p'},
         {"per-sample-mf", no_argument, NULL, 'p'},
         {"platforms", required_argument, NULL, 'P'},
+        {"max-read-len", required_argument, NULL, 'M'},
+        {"config", required_argument, NULL, 'X'},
+        {"mwu-u", no_argument, NULL, 'U'},
+        {"seed", required_argument, NULL, 13},
+        {"ambig-reads", required_argument, NULL, 14},
+        {"ar", required_argument, NULL, 14},
         {NULL, 0, NULL, 0}
     };
-    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:BDd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:M:X:U",lopts,NULL)) >= 0) {
         switch (c) {
         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
         case  1 :
@@ -1030,7 +1317,7 @@ int bam_mpileup(int argc, char *argv[])
                   if ( regidx_insert_list(mplp.bed,optarg,',') !=0 )
                   {
                       fprintf(bcftools_stderr,"Could not parse the targets: %s\n", optarg);
-                      exit(EXIT_FAILURE);
+                      bcftools_exit(EXIT_FAILURE);
                   }
                   break;
         case 'T':
@@ -1042,23 +1329,26 @@ int bam_mpileup(int argc, char *argv[])
         case 'P': mplp.pl_list = strdup(optarg); break;
         case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
         case 'B': mplp.flag &= ~MPLP_REALN; break;
+        case 'D': mplp.flag &= ~MPLP_REALN_PARTIAL; break;
         case 'I': mplp.flag |= MPLP_NO_INDEL; break;
         case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
         case '6': mplp.flag |= MPLP_ILLUMINA13; break;
         case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
         case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
-        case 'O': 
+        case 'O':
             switch (optarg[0]) {
                 case 'b': mplp.output_type = FT_BCF_GZ; break;
                 case 'u': mplp.output_type = FT_BCF; break;
                 case 'z': mplp.output_type = FT_VCF_GZ; break;
                 case 'v': mplp.output_type = FT_VCF; break;
-                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); 
+                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n");
             }
             break;
         case 'C': mplp.capQ_thres = atoi(optarg); break;
         case 'q': mplp.min_mq = atoi(optarg); break;
         case 'Q': mplp.min_baseQ = atoi(optarg); break;
+        case  11: mplp.max_baseQ = atoi(optarg); break;
+        case  12: mplp.delta_baseQ = atoi(optarg); break;
         case 'b': file_list = optarg; break;
         case 'o': {
                 char *end;
@@ -1070,6 +1360,12 @@ int bam_mpileup(int argc, char *argv[])
             break;
         case 'e': mplp.extQ = atoi(optarg); break;
         case 'h': mplp.tandemQ = atoi(optarg); break;
+        case 10: // --indel-bias (inverted so higher => more indels called)
+            if (atof(optarg) < 1e-2)
+                mplp.indel_bias = 1/1e2;
+            else
+                mplp.indel_bias = 1/atof(optarg);
+            break;
         case 'A': use_orphan = 1; break;
         case 'F': mplp.min_frac = atof(optarg); break;
         case 'm': mplp.min_support = atoi(optarg); break;
@@ -1082,6 +1378,49 @@ int bam_mpileup(int argc, char *argv[])
             }
             mplp.fmt_flag |= parse_format_flag(optarg);
         break;
+        case 'M': mplp.max_read_len = atoi(optarg); break;
+        case 'U': mplp.fmt_flag &= ~B2B_INFO_ZSCORE; break;
+        case 'X':
+            if (strcasecmp(optarg, "pacbio-ccs") == 0) {
+                mplp.min_frac = 0.1;
+                mplp.min_baseQ = 5;
+                mplp.max_baseQ = 50;
+                mplp.delta_baseQ = 10;
+                mplp.openQ = 25;
+                mplp.extQ = 1;
+                mplp.flag |= MPLP_REALN_PARTIAL;
+                mplp.max_read_len = 99999;
+            } else if (strcasecmp(optarg, "ont") == 0) {
+                fprintf(bcftools_stderr, "For ONT it may be beneficial to also run bcftools call with "
+                        "a higher -P, eg -P0.01 or -P 0.1\n");
+                mplp.min_baseQ = 5;
+                mplp.max_baseQ = 30;
+                mplp.flag &= ~MPLP_REALN;
+                mplp.flag |= MPLP_NO_INDEL;
+            } else if (strcasecmp(optarg, "1.12") == 0) {
+                // 1.12 and earlier
+                mplp.min_frac = 0.002;
+                mplp.min_support = 1;
+                mplp.min_baseQ = 13;
+                mplp.tandemQ = 100;
+                mplp.flag &= ~MPLP_REALN_PARTIAL;
+                mplp.flag |= MPLP_REALN;
+            } else if (strcasecmp(optarg, "illumina") == 0) {
+                mplp.flag |= MPLP_REALN_PARTIAL;
+            } else {
+                fprintf(bcftools_stderr, "Unknown configuration name '%s'\n"
+                        "Please choose from 1.12, illumina, pacbio-ccs or ont\n",
+                        optarg);
+                return 1;
+            }
+            break;
+        case 13: hts_srand48(atoi(optarg)); break;
+        case 14:
+            if ( !strcasecmp(optarg,"drop") ) mplp.ambig_reads = B2B_DROP;
+            else if ( !strcasecmp(optarg,"incAD") ) mplp.ambig_reads = B2B_INC_AD;
+            else if ( !strcasecmp(optarg,"incAD0") ) mplp.ambig_reads = B2B_INC_AD0;
+            else error("The option to --ambig-reads not recognised: %s\n",optarg);
+            break;
         default:
             fprintf(bcftools_stderr,"Invalid option: '%c'\n", c);
             return 1;
@@ -1122,7 +1461,7 @@ int bam_mpileup(int argc, char *argv[])
         return 1;
     }
     int ret,i;
-    if (file_list) 
+    if (file_list)
     {
         if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
         mplp.files  = fn;
@@ -1144,5 +1483,6 @@ int bam_mpileup(int argc, char *argv[])
     if (mplp.bed_itr) regitr_destroy(mplp.bed_itr);
     if (mplp.reg) regidx_destroy(mplp.reg);
     bam_smpl_destroy(mplp.bsmpl);
+
     return ret;
 }
diff --git a/bcftools/ploidy.h b/bcftools/ploidy.h
index 1e7d2f7..7697c65 100644
--- a/bcftools/ploidy.h
+++ b/bcftools/ploidy.h
@@ -1,5 +1,5 @@
 /* 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2015 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/prob1.c b/bcftools/prob1.c
index 954d43c..3ab7bcb 100644
--- a/bcftools/prob1.c
+++ b/bcftools/prob1.c
@@ -1,7 +1,7 @@
 /*  prob1.c -- mathematical utility functions.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
+    Copyright (C) 2012, 2013-2014, 2017 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
diff --git a/bcftools/prob1.c.pysam.c b/bcftools/prob1.c.pysam.c
index bd73e1d..6d2bbd1 100644
--- a/bcftools/prob1.c.pysam.c
+++ b/bcftools/prob1.c.pysam.c
@@ -3,7 +3,7 @@
 /*  prob1.c -- mathematical utility functions.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
+    Copyright (C) 2012, 2013-2014, 2017 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
diff --git a/bcftools/prob1.h b/bcftools/prob1.h
index a3d4b0d..a562265 100644
--- a/bcftools/prob1.h
+++ b/bcftools/prob1.h
@@ -1,7 +1,7 @@
 /*  prob1.h -- mathematical utility functions.
 
     Copyright (C) 2010, 2011 Broad Institute.
-    Copyright (C) 2012, 2013 Genome Research Ltd.
+    Copyright (C) 2012, 2013-2014 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
diff --git a/bcftools/rbuf.h b/bcftools/rbuf.h
index 2c0e5b1..ef2e206 100644
--- a/bcftools/rbuf.h
+++ b/bcftools/rbuf.h
@@ -1,6 +1,6 @@
 /*  rbuf.h -- round buffers.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2017 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/regidx.c b/bcftools/regidx.c
index 5c6c8ce..cdaf7ea 100644
--- a/bcftools/regidx.c
+++ b/bcftools/regidx.c
@@ -1,5 +1,5 @@
 /* 
-    Copyright (C) 2014-2017 Genome Research Ltd.
+    Copyright (C) 2014-2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/regidx.c.pysam.c b/bcftools/regidx.c.pysam.c
index 684993c..4eb96e8 100644
--- a/bcftools/regidx.c.pysam.c
+++ b/bcftools/regidx.c.pysam.c
@@ -1,7 +1,7 @@
 #include "bcftools.pysam.h"
 
 /* 
-    Copyright (C) 2014-2017 Genome Research Ltd.
+    Copyright (C) 2014-2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/regidx.h b/bcftools/regidx.h
index a654dbd..f13b52a 100644
--- a/bcftools/regidx.h
+++ b/bcftools/regidx.h
@@ -1,5 +1,5 @@
 /* 
-    Copyright (C) 2014-2016 Genome Research Ltd.
+    Copyright (C) 2014-2016, 2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/reheader.c b/bcftools/reheader.c
index 60a60e1..ae7c622 100644
--- a/bcftools/reheader.c
+++ b/bcftools/reheader.c
@@ -1,6 +1,6 @@
 /*  reheader.c -- reheader subcommand.
 
-    Copyright (C) 2014-2018 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -49,7 +49,7 @@ THE SOFTWARE.  */
 typedef struct _args_t
 {
     char **argv, *fname, *samples_fname, *header_fname, *output_fname;
-    char *fai_fname, *rm_tmpfile;
+    char *fai_fname, *rm_tmpfile, *tmp_prefix;
     htsFile *fp;
     htsFormat type;
     htsThreadPool *threads;
@@ -140,6 +140,33 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see
     free(key.s); free(val.s); free(tmp.s);
     return q;
 }
+char *init_tmp_prefix(const char *tmp_prefix)
+{
+    char *prefix = NULL;
+    if ( tmp_prefix )
+    {
+        int len = strlen(tmp_prefix);
+        prefix = (char*) calloc(len+7,1);
+        memcpy(prefix,tmp_prefix,len);
+        memcpy(prefix+len,"XXXXXX",6);
+    }
+    else
+    {
+        #ifdef _WIN32
+            char tmp_path[MAX_PATH];
+            int ret = GetTempPath(MAX_PATH, tmp_path);
+            if (!ret || ret > MAX_PATH)
+                error("Could not get the path to the temporary folder\n");
+            if (strlen(tmp_path) + strlen("/bcftools.XXXXXX") >= MAX_PATH)
+                error("Full path to the temporary folder is too long\n");
+            strcat(tmp_path, "/bcftools.XXXXXX");
+            prefix = strdup(tmp_path);
+        #else
+            prefix = strdup("/tmp/bcftools.XXXXXX");
+        #endif
+    }
+    return prefix;
+}
 static void update_from_fai(args_t *args)
 {
     if ( !strcmp("-",args->fname) )
@@ -147,18 +174,7 @@ static void update_from_fai(args_t *args)
 
     faidx_t *fai = fai_load3(args->fai_fname,args->fai_fname,NULL,FAI_FASTA);
     if ( !fai ) error("Could not parse %s\n", args->fai_fname);
-#ifdef _WIN32
-    char tmp_path[MAX_PATH];
-    int ret = GetTempPath(MAX_PATH, tmp_path);
-    if (!ret || ret > MAX_PATH)
-        error("Could not get the path to the temporary folder\n");
-    if (strlen(tmp_path) + strlen("/bcftools-fai-header-XXXXXX") >= MAX_PATH)
-        error("Full path to the temporary folder is too long\n");
-    strcat(tmp_path, "/bcftools-fai-header-XXXXXX");
-    args->rm_tmpfile = strdup(tmp_path);
-#else
-    args->rm_tmpfile = strdup("/tmp/bcftools-fai-header-XXXXXX");
-#endif
+    args->rm_tmpfile = init_tmp_prefix(args->tmp_prefix);
     int fd = mkstemp(args->rm_tmpfile);
     if ( fd<0 ) error("Could not open a temporary file for writing: %s\n", args->rm_tmpfile);
 
@@ -273,8 +289,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
     hdr->s[hdr->l] = 0;
 
     kstring_t tmp = {0,0,0};
-    i = j = n = 0;
-    while ( hdr->s[idx+i] && hdr->s[idx+i])
+    i = j = n = 0;  // i:traverse the #CHROM line 1 by 1; j:points to the last column
+    while ( hdr->s[idx+i] )
     {
         if ( hdr->s[idx+i]=='\t' )
         {
@@ -282,8 +298,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
 
             if ( ++n>9 )
             {
-                char *ori = khash_str2str_get(hash,hdr->s+idx+j);
-                kputs(ori ? ori : hdr->s+idx+j, &tmp);
+                char *new_name = khash_str2str_get(hash,hdr->s+idx+j);
+                kputs(new_name ? new_name : hdr->s+idx+j, &tmp);
             }
             else
                 kputs(hdr->s+idx+j, &tmp);
@@ -295,8 +311,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
         }
         i++;
     }
-    char *ori = khash_str2str_get(hash,hdr->s+idx+j);
-    kputs(ori ? ori : hdr->s+idx+j, &tmp);
+    char *new_name = khash_str2str_get(hash,hdr->s+idx+j);
+    kputs(new_name ? new_name : hdr->s+idx+j, &tmp);
 
     khash_str2str_destroy_free_all(hash);
 
@@ -317,7 +333,13 @@ static void set_samples(char **samples, int nsamples, kstring_t *hdr)
         if ( hdr->s[i]=='\t' ) ncols++;
         i--;
     }
-    if ( i<0 || strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",45) ) error("Could not parse the header: %s\n", hdr->s);
+    if ( i<0 || strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",45) )
+    {
+        if ( i>0 && !strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",38) )
+            error("Error: missing FORMAT fields, cowardly refusing to add samples\n");
+
+        error("Could not parse the header: %s\n", hdr->s);
+    }
 
     // Are the samples "old-sample new-sample" pairs?
     if ( set_sample_pairs(samples,nsamples,hdr, i+1) ) return;
@@ -388,7 +410,10 @@ static void reheader_vcf_gz(args_t *args)
     int nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -444,7 +469,10 @@ static void reheader_vcf(args_t *args)
     int nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -548,7 +576,10 @@ static void reheader_bcf(args_t *args, int is_compressed)
     int i, nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(htxt.s); htxt.s = NULL; htxt.l = htxt.m = 0;
@@ -639,11 +670,16 @@ static void usage(args_t *args)
     fprintf(stderr, "Usage:   bcftools reheader [OPTIONS] <in.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "    -f, --fai <file>        update sequences and their lengths from the .fai file\n");
-    fprintf(stderr, "    -h, --header <file>     new header\n");
-    fprintf(stderr, "    -o, --output <file>     write output to a file [standard output]\n");
-    fprintf(stderr, "    -s, --samples <file>    new sample names\n");
-    fprintf(stderr, "        --threads <int>     use multithreading with <int> worker threads (BCF only) [0]\n");
+    fprintf(stderr, "    -f, --fai FILE             update sequences and their lengths from the .fai file\n");
+    fprintf(stderr, "    -h, --header FILE          new header\n");
+    fprintf(stderr, "    -o, --output FILE          write output to a file [standard output]\n");
+    fprintf(stderr, "    -s, --samples FILE         new sample names\n");
+#ifdef _WIN32
+    fprintf(stderr, "    -T, --temp-prefix PATH     template for temporary file name [/bcftools.XXXXXX]\n");
+#else
+    fprintf(stderr, "    -T, --temp-prefix PATH     template for temporary file name [/tmp/bcftools.XXXXXX]\n");
+#endif
+    fprintf(stderr, "        --threads INT          use multithreading with <int> worker threads (BCF only) [0]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Example:\n");
     fprintf(stderr, "   # Write out the header to be modified\n");
@@ -666,6 +702,7 @@ int main_reheader(int argc, char *argv[])
     
     static struct option loptions[] =
     {
+        {"temp-prefix",1,0,'T'},
         {"fai",1,0,'f'},
         {"output",1,0,'o'},
         {"header",1,0,'h'},
@@ -673,11 +710,12 @@ int main_reheader(int argc, char *argv[])
         {"threads",1,NULL,1},
         {0,0,0,0}
     };
-    while ((c = getopt_long(argc, argv, "s:h:o:f:",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "s:h:o:f:T:",loptions,NULL)) >= 0)
     {
         switch (c)
         {
             case  1 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 'T': args->tmp_prefix = optarg; break;
             case 'f': args->fai_fname = optarg; break;
             case 'o': args->output_fname = optarg; break;
             case 's': args->samples_fname = optarg; break;
@@ -704,10 +742,14 @@ int main_reheader(int argc, char *argv[])
 
     if ( args->type.format==vcf )
     {
-        if ( args->type.compression==bgzf || args->type.compression==gzip )
+        if ( args->type.compression==bgzf )
             reheader_vcf_gz(args);
-        else
+        else if ( args->type.compression==no_compression )
             reheader_vcf(args);
+        else if ( args->type.compression==gzip )
+            error("Error: cannot reheader gzip-compressed files, first convert with `bcftools view --output-type` to a supported format\n");
+        else
+            error("Error: the compression type of \"%s\" is not recognised/supported\n", args->fname);
     }
     else
         reheader_bcf(args, args->type.compression==bgzf || args->type.compression==gzip);
diff --git a/bcftools/reheader.c.pysam.c b/bcftools/reheader.c.pysam.c
index 9f84e4c..380843b 100644
--- a/bcftools/reheader.c.pysam.c
+++ b/bcftools/reheader.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  reheader.c -- reheader subcommand.
 
-    Copyright (C) 2014-2018 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -51,7 +51,7 @@ THE SOFTWARE.  */
 typedef struct _args_t
 {
     char **argv, *fname, *samples_fname, *header_fname, *output_fname;
-    char *fai_fname, *rm_tmpfile;
+    char *fai_fname, *rm_tmpfile, *tmp_prefix;
     htsFile *fp;
     htsFormat type;
     htsThreadPool *threads;
@@ -142,6 +142,33 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see
     free(key.s); free(val.s); free(tmp.s);
     return q;
 }
+char *init_tmp_prefix(const char *tmp_prefix)
+{
+    char *prefix = NULL;
+    if ( tmp_prefix )
+    {
+        int len = strlen(tmp_prefix);
+        prefix = (char*) calloc(len+7,1);
+        memcpy(prefix,tmp_prefix,len);
+        memcpy(prefix+len,"XXXXXX",6);
+    }
+    else
+    {
+        #ifdef _WIN32
+            char tmp_path[MAX_PATH];
+            int ret = GetTempPath(MAX_PATH, tmp_path);
+            if (!ret || ret > MAX_PATH)
+                error("Could not get the path to the temporary folder\n");
+            if (strlen(tmp_path) + strlen("/bcftools.XXXXXX") >= MAX_PATH)
+                error("Full path to the temporary folder is too long\n");
+            strcat(tmp_path, "/bcftools.XXXXXX");
+            prefix = strdup(tmp_path);
+        #else
+            prefix = strdup("/tmp/bcftools.XXXXXX");
+        #endif
+    }
+    return prefix;
+}
 static void update_from_fai(args_t *args)
 {
     if ( !strcmp("-",args->fname) )
@@ -149,18 +176,7 @@ static void update_from_fai(args_t *args)
 
     faidx_t *fai = fai_load3(args->fai_fname,args->fai_fname,NULL,FAI_FASTA);
     if ( !fai ) error("Could not parse %s\n", args->fai_fname);
-#ifdef _WIN32
-    char tmp_path[MAX_PATH];
-    int ret = GetTempPath(MAX_PATH, tmp_path);
-    if (!ret || ret > MAX_PATH)
-        error("Could not get the path to the temporary folder\n");
-    if (strlen(tmp_path) + strlen("/bcftools-fai-header-XXXXXX") >= MAX_PATH)
-        error("Full path to the temporary folder is too long\n");
-    strcat(tmp_path, "/bcftools-fai-header-XXXXXX");
-    args->rm_tmpfile = strdup(tmp_path);
-#else
-    args->rm_tmpfile = strdup("/tmp/bcftools-fai-header-XXXXXX");
-#endif
+    args->rm_tmpfile = init_tmp_prefix(args->tmp_prefix);
     int fd = mkstemp(args->rm_tmpfile);
     if ( fd<0 ) error("Could not open a temporary file for writing: %s\n", args->rm_tmpfile);
 
@@ -275,8 +291,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
     hdr->s[hdr->l] = 0;
 
     kstring_t tmp = {0,0,0};
-    i = j = n = 0;
-    while ( hdr->s[idx+i] && hdr->s[idx+i])
+    i = j = n = 0;  // i:traverse the #CHROM line 1 by 1; j:points to the last column
+    while ( hdr->s[idx+i] )
     {
         if ( hdr->s[idx+i]=='\t' )
         {
@@ -284,8 +300,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
 
             if ( ++n>9 )
             {
-                char *ori = khash_str2str_get(hash,hdr->s+idx+j);
-                kputs(ori ? ori : hdr->s+idx+j, &tmp);
+                char *new_name = khash_str2str_get(hash,hdr->s+idx+j);
+                kputs(new_name ? new_name : hdr->s+idx+j, &tmp);
             }
             else
                 kputs(hdr->s+idx+j, &tmp);
@@ -297,8 +313,8 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id
         }
         i++;
     }
-    char *ori = khash_str2str_get(hash,hdr->s+idx+j);
-    kputs(ori ? ori : hdr->s+idx+j, &tmp);
+    char *new_name = khash_str2str_get(hash,hdr->s+idx+j);
+    kputs(new_name ? new_name : hdr->s+idx+j, &tmp);
 
     khash_str2str_destroy_free_all(hash);
 
@@ -319,7 +335,13 @@ static void set_samples(char **samples, int nsamples, kstring_t *hdr)
         if ( hdr->s[i]=='\t' ) ncols++;
         i--;
     }
-    if ( i<0 || strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",45) ) error("Could not parse the header: %s\n", hdr->s);
+    if ( i<0 || strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",45) )
+    {
+        if ( i>0 && !strncmp(hdr->s+i+1,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",38) )
+            error("Error: missing FORMAT fields, cowardly refusing to add samples\n");
+
+        error("Could not parse the header: %s\n", hdr->s);
+    }
 
     // Are the samples "old-sample new-sample" pairs?
     if ( set_sample_pairs(samples,nsamples,hdr, i+1) ) return;
@@ -390,7 +412,10 @@ static void reheader_vcf_gz(args_t *args)
     int nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -446,7 +471,10 @@ static void reheader_vcf(args_t *args)
     int nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(hdr.s); hdr.s = NULL; hdr.l = hdr.m = 0;
@@ -550,7 +578,10 @@ static void reheader_bcf(args_t *args, int is_compressed)
     int i, nsamples = 0;
     char **samples = NULL;
     if ( args->samples_fname )
+    {
         samples = hts_readlines(args->samples_fname, &nsamples);
+        if ( !samples || !nsamples ) error("Error reading the --samples file \"%s\"\n", args->samples_fname);
+    }
     if ( args->header_fname )
     {
         free(htxt.s); htxt.s = NULL; htxt.l = htxt.m = 0;
@@ -641,11 +672,16 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "Usage:   bcftools reheader [OPTIONS] <in.vcf.gz>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "    -f, --fai <file>        update sequences and their lengths from the .fai file\n");
-    fprintf(bcftools_stderr, "    -h, --header <file>     new header\n");
-    fprintf(bcftools_stderr, "    -o, --output <file>     write output to a file [standard output]\n");
-    fprintf(bcftools_stderr, "    -s, --samples <file>    new sample names\n");
-    fprintf(bcftools_stderr, "        --threads <int>     use multithreading with <int> worker threads (BCF only) [0]\n");
+    fprintf(bcftools_stderr, "    -f, --fai FILE             update sequences and their lengths from the .fai file\n");
+    fprintf(bcftools_stderr, "    -h, --header FILE          new header\n");
+    fprintf(bcftools_stderr, "    -o, --output FILE          write output to a file [standard output]\n");
+    fprintf(bcftools_stderr, "    -s, --samples FILE         new sample names\n");
+#ifdef _WIN32
+    fprintf(bcftools_stderr, "    -T, --temp-prefix PATH     template for temporary file name [/bcftools.XXXXXX]\n");
+#else
+    fprintf(bcftools_stderr, "    -T, --temp-prefix PATH     template for temporary file name [/tmp/bcftools.XXXXXX]\n");
+#endif
+    fprintf(bcftools_stderr, "        --threads INT          use multithreading with <int> worker threads (BCF only) [0]\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Example:\n");
     fprintf(bcftools_stderr, "   # Write out the header to be modified\n");
@@ -657,7 +693,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "   # Reheader the file\n");
     fprintf(bcftools_stderr, "   bcftools reheader -h header.txt -o new.bcf old.bcf\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_reheader(int argc, char *argv[])
@@ -668,6 +704,7 @@ int main_reheader(int argc, char *argv[])
     
     static struct option loptions[] =
     {
+        {"temp-prefix",1,0,'T'},
         {"fai",1,0,'f'},
         {"output",1,0,'o'},
         {"header",1,0,'h'},
@@ -675,11 +712,12 @@ int main_reheader(int argc, char *argv[])
         {"threads",1,NULL,1},
         {0,0,0,0}
     };
-    while ((c = getopt_long(argc, argv, "s:h:o:f:",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "s:h:o:f:T:",loptions,NULL)) >= 0)
     {
         switch (c)
         {
             case  1 : args->n_threads = strtol(optarg, 0, 0); break;
+            case 'T': args->tmp_prefix = optarg; break;
             case 'f': args->fai_fname = optarg; break;
             case 'o': args->output_fname = optarg; break;
             case 's': args->samples_fname = optarg; break;
@@ -706,10 +744,14 @@ int main_reheader(int argc, char *argv[])
 
     if ( args->type.format==vcf )
     {
-        if ( args->type.compression==bgzf || args->type.compression==gzip )
+        if ( args->type.compression==bgzf )
             reheader_vcf_gz(args);
-        else
+        else if ( args->type.compression==no_compression )
             reheader_vcf(args);
+        else if ( args->type.compression==gzip )
+            error("Error: cannot reheader gzip-compressed files, first convert with `bcftools view --output-type` to a supported format\n");
+        else
+            error("Error: the compression type of \"%s\" is not recognised/supported\n", args->fname);
     }
     else
         reheader_bcf(args, args->type.compression==bgzf || args->type.compression==gzip);
diff --git a/bcftools/smpl_ilist.c b/bcftools/smpl_ilist.c
index 9a77e62..d170db5 100644
--- a/bcftools/smpl_ilist.c
+++ b/bcftools/smpl_ilist.c
@@ -1,5 +1,5 @@
 /* 
-    Copyright (C) 2016 Genome Research Ltd.
+    Copyright (C) 2016, 2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/smpl_ilist.c.pysam.c b/bcftools/smpl_ilist.c.pysam.c
index 45fe5af..85b5e2f 100644
--- a/bcftools/smpl_ilist.c.pysam.c
+++ b/bcftools/smpl_ilist.c.pysam.c
@@ -1,7 +1,7 @@
 #include "bcftools.pysam.h"
 
 /* 
-    Copyright (C) 2016 Genome Research Ltd.
+    Copyright (C) 2016, 2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/str_finder.c b/bcftools/str_finder.c
new file mode 100644
index 0000000..800cbfe
--- /dev/null
+++ b/bcftools/str_finder.c
@@ -0,0 +1,270 @@
+/*  str_finder.c -- Short Tandem Repeat finder.
+    Originally from Crumble (https://github.com/jkbonfield/crumble)
+
+    Copyright (C) 2015-2016, 2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include "str_finder.h"
+#include "utlist.h"
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+typedef unsigned char uc;
+
+static void add_rep(rep_ele **list, char *cons, int clen, int pos, int rlen,
+		    int lower_only, unsigned int w) {
+    rep_ele *el, *tmp, *prev;
+    char *cp1, *cp2, *cp_end;
+    int i;
+
+    // Already handled this in previous overlap?
+    if (*list) {
+	tmp = DL_TAIL(*list);
+	if (tmp->start <= pos-rlen*2+1 && tmp->end >= pos)
+	    return;
+    }
+
+    // Find current and last occurence of repeated word.
+
+    cp2 = &cons[pos+1];
+    // If unpadded, this is quicker: cp1 = &cons[pos+1-rlen];
+
+    for (cp1 = &cons[pos], i = 1; i < rlen; cp1--) // compensate for pads
+	if (*cp1 == '*')
+	    continue;
+	else
+	    i++;
+    while (*cp1 == '*')
+	cp1--;
+
+
+    // Scan ahead to see how much further it goes.
+    cp_end = &cons[clen];
+    while (cp2 < cp_end) {
+	if (*cp1 != *cp2)
+	    break;
+
+	w<<=2;
+	w|=*cp2;
+	cp1++;
+	cp2++;
+    }
+
+    if (!(el = malloc(sizeof(*el))))
+	return;
+
+    el->end   = pos + cp2-&cons[pos+1];
+    el->rep_len = rlen;
+    pos++;
+    while (rlen--) {
+	while (cons[--pos] == '*');
+	while (cons[--pos] == '*');
+    }
+    //pos++;
+    while (pos > 1 && cons[pos-1] == '*') pos--;
+    el->start = pos;
+
+    // Check it meets the lower-case only criteria
+    if (lower_only) {
+	int lc = 0;
+	for (i = el->start; i <= el->end; i++) {
+	    if (islower(cons[i])) {
+		lc = 1;
+		break;
+	    }
+	}
+
+	if (!lc) {
+	    free(el);
+	    return;
+	}
+    }
+
+    // Remove any older items on the list that are entirely contained within el
+    if (*list) {
+	tmp = DL_TAIL(*list);
+	do {
+	    prev = tmp->prev;
+	    if (tmp->end < el->start)
+		break;
+
+	    if (tmp->start >= el->start) {
+		DL_DELETE(*list, tmp);
+		free(tmp);
+	    }
+
+	    if (tmp == DL_HEAD(*list))
+		break;
+	    tmp = prev;
+	} while (*list);
+    }
+
+    DL_APPEND(*list, el);
+
+    return;
+}
+
+/*
+ * Finds repeated homopolymers up to 8-mers.
+ * Note this assumes cons is 0-3, so N of 4 may rarely give false hits.
+ *
+ * Returns a list of rep_ele structs holding the start,end tuples of repeats;
+ *         NULL on failure.
+ */
+rep_ele *find_STR(char *cons, int len, int lower_only) {
+    int i, j;
+    uint32_t w = 0;
+    rep_ele *reps = NULL;
+
+    for (i = j = 0; i < len && j < 15; i++) {
+	if (cons[i] == '*') continue;
+
+	w <<= 2;
+	w |= cons[i];
+	//printf("%3d %c w=%08x\n", i, cons[i], w);
+	if (j>= 1 && (w&0x0003) == ((w>> 2)&0x0003))
+	    add_rep(&reps, cons, len, i, 1, lower_only, w);
+	if (j>= 3 && (w&0x000f) == ((w>> 4)&0x000f))
+	    add_rep(&reps, cons, len, i, 2, lower_only, w);
+	if (j>= 5 && (w&0x003f) == ((w>> 6)&0x003f))
+	    add_rep(&reps, cons, len, i, 3, lower_only, w);
+	if (j>= 7 && (w&0x00ff) == ((w>> 8)&0x00ff))
+	    add_rep(&reps, cons, len, i, 4, lower_only, w);
+	if (j>= 9 && (w&0x03ff) == ((w>>10)&0x03ff))
+	    add_rep(&reps, cons, len, i, 5, lower_only, w);
+	if (j>=11 && (w&0x0fff) == ((w>>12)&0x0fff))
+	    add_rep(&reps, cons, len, i, 6, lower_only, w);
+	if (j>=13 && (w&0x3fff) == ((w>>14)&0x3fff))
+	    add_rep(&reps, cons, len, i, 7, lower_only, w);
+
+	j++;
+    }
+
+    for (; i < len; i++) {	
+	if (cons[i] == '*') continue;
+
+	w <<= 2;
+	w |= cons[i];
+	//printf("%3d %c w=%08x\n", i, cons[i], w);
+	if ((w&0xffff) == ((w>>16)&0xffff)) 
+	    add_rep(&reps, cons, len, i, 8, lower_only, w);
+	else if ((w&0x3fff) == ((w>>14)&0x3fff)) 
+	    add_rep(&reps, cons, len, i, 7, lower_only, w);
+	else if ((w&0x0fff) == ((w>>12)&0x0fff)) 
+	    add_rep(&reps, cons, len, i, 6, lower_only, w);
+	else if ((w&0x03ff) == ((w>>10)&0x03ff)) 
+	    add_rep(&reps, cons, len, i, 5, lower_only, w);
+	else if ((w&0x00ff) == ((w>> 8)&0x00ff)) 
+	    add_rep(&reps, cons, len, i, 4, lower_only, w);
+	else if ((w&0x003f) == ((w>> 6)&0x003f)) 
+	    add_rep(&reps, cons, len, i, 3, lower_only, w);
+	else if ((w&0x000f) == ((w>> 4)&0x000f)) 
+	    add_rep(&reps, cons, len, i, 2, lower_only, w);
+	else if ((w&0x0003) == ((w>> 2)&0x0003)) 
+	    add_rep(&reps, cons, len, i, 1, lower_only, w);
+    }
+
+    return reps;
+}
+
+/* -----------------------------------------------------------------------------
+ * Computes repeat regions in the consensus and then provides a bit mask
+ * indicating the extend of the STRs.
+ *
+ * The purpose of this is to identify where a read needs to span the entire
+ * region in order to validate how many copies of a repeat word are present.
+ * This only really has a major impact when indels are involved.
+ *
+ * For example, given this multiple alignment:
+ *
+ * S1 GATCGGACGAGAG
+ * S2 GATCGGACGAGAGAGAGAGAGT
+ * S3 GATCGGACGAGAGAGAGAG**TCGGAC
+ * S4     GGACGAGAGAGAGAGAGTCGGAC
+ * S5        CGAGAGAGAGAG**TCGGAC
+ * S6              AGAGAGAGTCGGAC
+ *
+ * We have subseq of GAGAGAGAGAG** vs GAGAGAGAGAGAG. The first and last
+ * (S1 and S6) sequences do not span and so we do not know which allele they
+ * match. Specifically as the pad is at the right hand end, the alignment of
+ * S6 gives incorrect weight to the consensus as it is stating AG when it
+ * may actually be ** at that point.
+ *
+ * By identifying the repeats we can soft clip as follows:
+ *
+ * S1 GATCGGACgagag
+ * S2 GATCGGACGAGAGAGAGAGAGT
+ * S3 GATCGGACGAGAGAGAGAG**TCGGAC
+ * S4     GGACGAGAGAGAGAGAGTCGGAC
+ * S5        CGAGAGAGAGAG**TCGGAC
+ * S6              agagagagTCGGAC
+ *
+ * Returns an array of STR vs no-STR values.
+ *         0  => non repetitive.
+ *         1+ => repeat with consecutive bit-number for repeat size.
+ *
+ * Eg:  AGGGGAGGAGAAGAC
+ *       1111  1111
+ *         2222222
+ *              444444
+ * =>   011331137754440
+ */
+char *cons_mark_STR(char *cons, int len, int lower_only) {
+    rep_ele *reps, *elt, *tmp;
+    char *str;
+
+    str = calloc(1, len);
+    reps = find_STR(cons, len, lower_only);
+
+    DL_FOREACH_SAFE(reps, elt, tmp) {
+	int i, v = 0;
+	
+	//printf("%2d .. %2d %.*s\n", elt->start, elt->end,
+	//       elt->end - elt->start+1, &cons[elt->start]);
+
+	// What is there?
+	for (i = MAX(elt->start-1,0); i <= MIN(elt->end+1,len-1); i++)
+	    v |= str[i];
+
+	for (i = 0; i < 8; i++) {
+	    if (!(v&(1<<i)))
+		break;
+	}
+	v = (i == 8) ? 1 : (1<<i);
+
+	// Add new if available, or just overload 1 if not
+	for (i = elt->start; i <= elt->end; i++)
+	    str[i] |= v;
+
+	DL_DELETE(reps, elt);
+	free(elt);
+    }
+
+    return str;
+}
diff --git a/bcftools/str_finder.c.pysam.c b/bcftools/str_finder.c.pysam.c
new file mode 100644
index 0000000..296c867
--- /dev/null
+++ b/bcftools/str_finder.c.pysam.c
@@ -0,0 +1,272 @@
+#include "bcftools.pysam.h"
+
+/*  str_finder.c -- Short Tandem Repeat finder.
+    Originally from Crumble (https://github.com/jkbonfield/crumble)
+
+    Copyright (C) 2015-2016, 2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include "str_finder.h"
+#include "utlist.h"
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+typedef unsigned char uc;
+
+static void add_rep(rep_ele **list, char *cons, int clen, int pos, int rlen,
+		    int lower_only, unsigned int w) {
+    rep_ele *el, *tmp, *prev;
+    char *cp1, *cp2, *cp_end;
+    int i;
+
+    // Already handled this in previous overlap?
+    if (*list) {
+	tmp = DL_TAIL(*list);
+	if (tmp->start <= pos-rlen*2+1 && tmp->end >= pos)
+	    return;
+    }
+
+    // Find current and last occurence of repeated word.
+
+    cp2 = &cons[pos+1];
+    // If unpadded, this is quicker: cp1 = &cons[pos+1-rlen];
+
+    for (cp1 = &cons[pos], i = 1; i < rlen; cp1--) // compensate for pads
+	if (*cp1 == '*')
+	    continue;
+	else
+	    i++;
+    while (*cp1 == '*')
+	cp1--;
+
+
+    // Scan ahead to see how much further it goes.
+    cp_end = &cons[clen];
+    while (cp2 < cp_end) {
+	if (*cp1 != *cp2)
+	    break;
+
+	w<<=2;
+	w|=*cp2;
+	cp1++;
+	cp2++;
+    }
+
+    if (!(el = malloc(sizeof(*el))))
+	return;
+
+    el->end   = pos + cp2-&cons[pos+1];
+    el->rep_len = rlen;
+    pos++;
+    while (rlen--) {
+	while (cons[--pos] == '*');
+	while (cons[--pos] == '*');
+    }
+    //pos++;
+    while (pos > 1 && cons[pos-1] == '*') pos--;
+    el->start = pos;
+
+    // Check it meets the lower-case only criteria
+    if (lower_only) {
+	int lc = 0;
+	for (i = el->start; i <= el->end; i++) {
+	    if (islower(cons[i])) {
+		lc = 1;
+		break;
+	    }
+	}
+
+	if (!lc) {
+	    free(el);
+	    return;
+	}
+    }
+
+    // Remove any older items on the list that are entirely contained within el
+    if (*list) {
+	tmp = DL_TAIL(*list);
+	do {
+	    prev = tmp->prev;
+	    if (tmp->end < el->start)
+		break;
+
+	    if (tmp->start >= el->start) {
+		DL_DELETE(*list, tmp);
+		free(tmp);
+	    }
+
+	    if (tmp == DL_HEAD(*list))
+		break;
+	    tmp = prev;
+	} while (*list);
+    }
+
+    DL_APPEND(*list, el);
+
+    return;
+}
+
+/*
+ * Finds repeated homopolymers up to 8-mers.
+ * Note this assumes cons is 0-3, so N of 4 may rarely give false hits.
+ *
+ * Returns a list of rep_ele structs holding the start,end tuples of repeats;
+ *         NULL on failure.
+ */
+rep_ele *find_STR(char *cons, int len, int lower_only) {
+    int i, j;
+    uint32_t w = 0;
+    rep_ele *reps = NULL;
+
+    for (i = j = 0; i < len && j < 15; i++) {
+	if (cons[i] == '*') continue;
+
+	w <<= 2;
+	w |= cons[i];
+	//printf("%3d %c w=%08x\n", i, cons[i], w);
+	if (j>= 1 && (w&0x0003) == ((w>> 2)&0x0003))
+	    add_rep(&reps, cons, len, i, 1, lower_only, w);
+	if (j>= 3 && (w&0x000f) == ((w>> 4)&0x000f))
+	    add_rep(&reps, cons, len, i, 2, lower_only, w);
+	if (j>= 5 && (w&0x003f) == ((w>> 6)&0x003f))
+	    add_rep(&reps, cons, len, i, 3, lower_only, w);
+	if (j>= 7 && (w&0x00ff) == ((w>> 8)&0x00ff))
+	    add_rep(&reps, cons, len, i, 4, lower_only, w);
+	if (j>= 9 && (w&0x03ff) == ((w>>10)&0x03ff))
+	    add_rep(&reps, cons, len, i, 5, lower_only, w);
+	if (j>=11 && (w&0x0fff) == ((w>>12)&0x0fff))
+	    add_rep(&reps, cons, len, i, 6, lower_only, w);
+	if (j>=13 && (w&0x3fff) == ((w>>14)&0x3fff))
+	    add_rep(&reps, cons, len, i, 7, lower_only, w);
+
+	j++;
+    }
+
+    for (; i < len; i++) {	
+	if (cons[i] == '*') continue;
+
+	w <<= 2;
+	w |= cons[i];
+	//printf("%3d %c w=%08x\n", i, cons[i], w);
+	if ((w&0xffff) == ((w>>16)&0xffff)) 
+	    add_rep(&reps, cons, len, i, 8, lower_only, w);
+	else if ((w&0x3fff) == ((w>>14)&0x3fff)) 
+	    add_rep(&reps, cons, len, i, 7, lower_only, w);
+	else if ((w&0x0fff) == ((w>>12)&0x0fff)) 
+	    add_rep(&reps, cons, len, i, 6, lower_only, w);
+	else if ((w&0x03ff) == ((w>>10)&0x03ff)) 
+	    add_rep(&reps, cons, len, i, 5, lower_only, w);
+	else if ((w&0x00ff) == ((w>> 8)&0x00ff)) 
+	    add_rep(&reps, cons, len, i, 4, lower_only, w);
+	else if ((w&0x003f) == ((w>> 6)&0x003f)) 
+	    add_rep(&reps, cons, len, i, 3, lower_only, w);
+	else if ((w&0x000f) == ((w>> 4)&0x000f)) 
+	    add_rep(&reps, cons, len, i, 2, lower_only, w);
+	else if ((w&0x0003) == ((w>> 2)&0x0003)) 
+	    add_rep(&reps, cons, len, i, 1, lower_only, w);
+    }
+
+    return reps;
+}
+
+/* -----------------------------------------------------------------------------
+ * Computes repeat regions in the consensus and then provides a bit mask
+ * indicating the extend of the STRs.
+ *
+ * The purpose of this is to identify where a read needs to span the entire
+ * region in order to validate how many copies of a repeat word are present.
+ * This only really has a major impact when indels are involved.
+ *
+ * For example, given this multiple alignment:
+ *
+ * S1 GATCGGACGAGAG
+ * S2 GATCGGACGAGAGAGAGAGAGT
+ * S3 GATCGGACGAGAGAGAGAG**TCGGAC
+ * S4     GGACGAGAGAGAGAGAGTCGGAC
+ * S5        CGAGAGAGAGAG**TCGGAC
+ * S6              AGAGAGAGTCGGAC
+ *
+ * We have subseq of GAGAGAGAGAG** vs GAGAGAGAGAGAG. The first and last
+ * (S1 and S6) sequences do not span and so we do not know which allele they
+ * match. Specifically as the pad is at the right hand end, the alignment of
+ * S6 gives incorrect weight to the consensus as it is stating AG when it
+ * may actually be ** at that point.
+ *
+ * By identifying the repeats we can soft clip as follows:
+ *
+ * S1 GATCGGACgagag
+ * S2 GATCGGACGAGAGAGAGAGAGT
+ * S3 GATCGGACGAGAGAGAGAG**TCGGAC
+ * S4     GGACGAGAGAGAGAGAGTCGGAC
+ * S5        CGAGAGAGAGAG**TCGGAC
+ * S6              agagagagTCGGAC
+ *
+ * Returns an array of STR vs no-STR values.
+ *         0  => non repetitive.
+ *         1+ => repeat with consecutive bit-number for repeat size.
+ *
+ * Eg:  AGGGGAGGAGAAGAC
+ *       1111  1111
+ *         2222222
+ *              444444
+ * =>   011331137754440
+ */
+char *cons_mark_STR(char *cons, int len, int lower_only) {
+    rep_ele *reps, *elt, *tmp;
+    char *str;
+
+    str = calloc(1, len);
+    reps = find_STR(cons, len, lower_only);
+
+    DL_FOREACH_SAFE(reps, elt, tmp) {
+	int i, v = 0;
+	
+	//printf("%2d .. %2d %.*s\n", elt->start, elt->end,
+	//       elt->end - elt->start+1, &cons[elt->start]);
+
+	// What is there?
+	for (i = MAX(elt->start-1,0); i <= MIN(elt->end+1,len-1); i++)
+	    v |= str[i];
+
+	for (i = 0; i < 8; i++) {
+	    if (!(v&(1<<i)))
+		break;
+	}
+	v = (i == 8) ? 1 : (1<<i);
+
+	// Add new if available, or just overload 1 if not
+	for (i = elt->start; i <= elt->end; i++)
+	    str[i] |= v;
+
+	DL_DELETE(reps, elt);
+	free(elt);
+    }
+
+    return str;
+}
diff --git a/bcftools/str_finder.h b/bcftools/str_finder.h
new file mode 100644
index 0000000..242f59e
--- /dev/null
+++ b/bcftools/str_finder.h
@@ -0,0 +1,64 @@
+/*  str_finder.c -- Short Tandem Repeat finder.
+    Originally from Crumble (https://github.com/jkbonfield/crumble)
+
+    Copyright (C) 2015-2016, 2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _STR_FINDER_H_
+#define _STR_FINDER_H_
+
+#include "utlist.h"
+
+typedef struct rep_ele {
+    int start, end, rep_len;
+    struct rep_ele *prev;
+    struct rep_ele *next;
+} rep_ele;
+
+/*
+ * Finds repeated homopolymers up to 8-mers.
+ *
+ * If lower_only is true then it only adds STRs for regions that
+ * contain at least one lower-case base. This can be used as a marker
+ * for looking for specific types of repeats.
+ * (One use for this is to only mark STRs that overlap a heterozygous
+ * indel region.)
+ *
+ * Returns a list of rep_ele structs holding the start,end tuples of repeats;
+ *         NULL on failure.
+ */
+rep_ele *find_STR(char *cons, int len, int lower_only);
+
+/*
+ * Returns an array of STR vs no-STR values.
+ *         0  => non repetitive.
+ *         1+ => repeat with consecutive bit-number for repeat size.
+ *
+ * Eg:  AGGGGAGGAGAAGAC
+ *       1111  1111
+ *         2222222
+ *              444444
+ * =>   011331137754440
+ */
+char *cons_mark_STR(char *cons, int len, int lower_only);
+
+#endif /* _STR_FINDER_H_ */
diff --git a/bcftools/utlist.h b/bcftools/utlist.h
new file mode 100644
index 0000000..28cf8a3
--- /dev/null
+++ b/bcftools/utlist.h
@@ -0,0 +1,761 @@
+/*
+Copyright (c) 2007-2014, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTLIST_H
+#define UTLIST_H
+
+#define UTLIST_VERSION 1.9.9
+
+#include <assert.h>
+
+/* 
+ * This file contains macros to manipulate singly and doubly-linked lists.
+ *
+ * 1. LL_ macros:  singly-linked lists.
+ * 2. DL_ macros:  doubly-linked lists.
+ * 3. CDL_ macros: circular doubly-linked lists.
+ *
+ * To use singly-linked lists, your structure must have a "next" pointer.
+ * To use doubly-linked lists, your structure must "prev" and "next" pointers.
+ * Either way, the pointer to the head of the list must be initialized to NULL.
+ * 
+ * ----------------.EXAMPLE -------------------------
+ * struct item {
+ *      int id;
+ *      struct item *prev, *next;
+ * }
+ *
+ * struct item *list = NULL:
+ *
+ * int main() {
+ *      struct item *item;
+ *      ... allocate and populate item ...
+ *      DL_APPEND(list, item);
+ * }
+ * --------------------------------------------------
+ *
+ * For doubly-linked lists, the append and delete macros are O(1)
+ * For singly-linked lists, append and delete are O(n) but prepend is O(1)
+ * The sort macro is O(n log(n)) for all types of single/double/circular lists.
+ */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ code), this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER            /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define LDECLTYPE(x) decltype(x)
+#else                     /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define LDECLTYPE(x) char*
+#endif
+#elif defined(__ICCARM__)
+#define NO_DECLTYPE
+#define LDECLTYPE(x) char*
+#else                      /* GNU, Sun and other compilers */
+#define LDECLTYPE(x) __typeof(x)
+#endif
+
+/* for VS2008 we use some workarounds to get around the lack of decltype,
+ * namely, we always reassign our tmp variable to the list head if we need
+ * to dereference its prev/next pointers, and save/restore the real head.*/
+#ifdef NO_DECLTYPE
+#define _SV(elt,list) _tmp = (char*)(list); {char **_alias = (char**)&(list); *_alias = (elt); }
+#define _NEXT(elt,list,next) ((char*)((list)->next))
+#define _NEXTASGN(elt,list,to,next) { char **_alias = (char**)&((list)->next); *_alias=(char*)(to); }
+/* #define _PREV(elt,list,prev) ((char*)((list)->prev)) */
+#define _PREVASGN(elt,list,to,prev) { char **_alias = (char**)&((list)->prev); *_alias=(char*)(to); }
+#define _RS(list) { char **_alias = (char**)&(list); *_alias=_tmp; }
+#define _CASTASGN(a,b) { char **_alias = (char**)&(a); *_alias=(char*)(b); }
+#else 
+#define _SV(elt,list)
+#define _NEXT(elt,list,next) ((elt)->next)
+#define _NEXTASGN(elt,list,to,next) ((elt)->next)=(to)
+/* #define _PREV(elt,list,prev) ((elt)->prev) */
+#define _PREVASGN(elt,list,to,prev) ((elt)->prev)=(to)
+#define _RS(list)
+#define _CASTASGN(a,b) (a)=(b)
+#endif
+
+/******************************************************************************
+ * The sort macro is an adaptation of Simon Tatham's O(n log(n)) mergesort    *
+ * Unwieldy variable names used here to avoid shadowing passed-in variables.  *
+ *****************************************************************************/
+#define LL_SORT(list, cmp)                                                                     \
+    LL_SORT2(list, cmp, next)
+
+#define LL_SORT2(list, cmp, next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      list = NULL;                                                                             \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list); _ls_q = _NEXT(_ls_q,list,next); _RS(list);                          \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) {                                    \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          } else if (_ls_qsize == 0 || !_ls_q) {                                               \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      if (_ls_tail) {                                                                          \
+        _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,NULL,next); _RS(list);                     \
+      }                                                                                        \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+
+#define DL_SORT(list, cmp)                                                                     \
+    DL_SORT2(list, cmp, prev, next)
+
+#define DL_SORT2(list, cmp, prev, next)                                                        \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      list = NULL;                                                                             \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list); _ls_q = _NEXT(_ls_q,list,next); _RS(list);                          \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) {                                    \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          } else if (_ls_qsize == 0 || !_ls_q) {                                               \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _SV(_ls_e,list); _PREVASGN(_ls_e,list,_ls_tail,prev); _RS(list);                     \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      _CASTASGN(list->prev, _ls_tail);                                                         \
+      _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,NULL,next); _RS(list);                       \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+
+#define DL_HEAD(list) (list)
+#define DL_TAIL(list) ((list) ? (list)->prev : NULL)
+
+#define CDL_SORT(list, cmp)                                                                    \
+    CDL_SORT2(list, cmp, prev, next)
+
+#define CDL_SORT2(list, cmp, prev, next)                                                       \
+do {                                                                                           \
+  LDECLTYPE(list) _ls_p;                                                                       \
+  LDECLTYPE(list) _ls_q;                                                                       \
+  LDECLTYPE(list) _ls_e;                                                                       \
+  LDECLTYPE(list) _ls_tail;                                                                    \
+  LDECLTYPE(list) _ls_oldhead;                                                                 \
+  LDECLTYPE(list) _tmp;                                                                        \
+  int _ls_insize, _ls_nmerges, _ls_psize, _ls_qsize, _ls_i, _ls_looping;                       \
+  if (list) {                                                                                  \
+    _ls_insize = 1;                                                                            \
+    _ls_looping = 1;                                                                           \
+    while (_ls_looping) {                                                                      \
+      _CASTASGN(_ls_p,list);                                                                   \
+      _CASTASGN(_ls_oldhead,list);                                                             \
+      list = NULL;                                                                             \
+      _ls_tail = NULL;                                                                         \
+      _ls_nmerges = 0;                                                                         \
+      while (_ls_p) {                                                                          \
+        _ls_nmerges++;                                                                         \
+        _ls_q = _ls_p;                                                                         \
+        _ls_psize = 0;                                                                         \
+        for (_ls_i = 0; _ls_i < _ls_insize; _ls_i++) {                                         \
+          _ls_psize++;                                                                         \
+          _SV(_ls_q,list);                                                                     \
+          if (_NEXT(_ls_q,list,next) == _ls_oldhead) {                                         \
+            _ls_q = NULL;                                                                      \
+          } else {                                                                             \
+            _ls_q = _NEXT(_ls_q,list,next);                                                    \
+          }                                                                                    \
+          _RS(list);                                                                           \
+          if (!_ls_q) break;                                                                   \
+        }                                                                                      \
+        _ls_qsize = _ls_insize;                                                                \
+        while (_ls_psize > 0 || (_ls_qsize > 0 && _ls_q)) {                                    \
+          if (_ls_psize == 0) {                                                                \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+            if (_ls_q == _ls_oldhead) { _ls_q = NULL; }                                        \
+          } else if (_ls_qsize == 0 || !_ls_q) {                                               \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+            if (_ls_p == _ls_oldhead) { _ls_p = NULL; }                                        \
+          } else if (cmp(_ls_p,_ls_q) <= 0) {                                                  \
+            _ls_e = _ls_p; _SV(_ls_p,list); _ls_p =                                            \
+              _NEXT(_ls_p,list,next); _RS(list); _ls_psize--;                                  \
+            if (_ls_p == _ls_oldhead) { _ls_p = NULL; }                                        \
+          } else {                                                                             \
+            _ls_e = _ls_q; _SV(_ls_q,list); _ls_q =                                            \
+              _NEXT(_ls_q,list,next); _RS(list); _ls_qsize--;                                  \
+            if (_ls_q == _ls_oldhead) { _ls_q = NULL; }                                        \
+          }                                                                                    \
+          if (_ls_tail) {                                                                      \
+            _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_ls_e,next); _RS(list);                \
+          } else {                                                                             \
+            _CASTASGN(list,_ls_e);                                                             \
+          }                                                                                    \
+          _SV(_ls_e,list); _PREVASGN(_ls_e,list,_ls_tail,prev); _RS(list);                     \
+          _ls_tail = _ls_e;                                                                    \
+        }                                                                                      \
+        _ls_p = _ls_q;                                                                         \
+      }                                                                                        \
+      _CASTASGN(list->prev,_ls_tail);                                                          \
+      _CASTASGN(_tmp,list);                                                                    \
+      _SV(_ls_tail,list); _NEXTASGN(_ls_tail,list,_tmp,next); _RS(list);                       \
+      if (_ls_nmerges <= 1) {                                                                  \
+        _ls_looping=0;                                                                         \
+      }                                                                                        \
+      _ls_insize *= 2;                                                                         \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+/******************************************************************************
+ * singly linked list macros (non-circular)                                   *
+ *****************************************************************************/
+#define LL_PREPEND(head,add)                                                                   \
+    LL_PREPEND2(head,add,next)
+
+#define LL_PREPEND2(head,add,next)                                                             \
+do {                                                                                           \
+  (add)->next = head;                                                                          \
+  head = add;                                                                                  \
+} while (0)
+
+#define LL_CONCAT(head1,head2)                                                                 \
+    LL_CONCAT2(head1,head2,next)
+
+#define LL_CONCAT2(head1,head2,next)                                                           \
+do {                                                                                           \
+  LDECLTYPE(head1) _tmp;                                                                       \
+  if (head1) {                                                                                 \
+    _tmp = head1;                                                                              \
+    while (_tmp->next) { _tmp = _tmp->next; }                                                  \
+    _tmp->next=(head2);                                                                        \
+  } else {                                                                                     \
+    (head1)=(head2);                                                                           \
+  }                                                                                            \
+} while (0)
+
+#define LL_APPEND(head,add)                                                                    \
+    LL_APPEND2(head,add,next)
+
+#define LL_APPEND2(head,add,next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(head) _tmp;                                                                        \
+  (add)->next=NULL;                                                                            \
+  if (head) {                                                                                  \
+    _tmp = head;                                                                               \
+    while (_tmp->next) { _tmp = _tmp->next; }                                                  \
+    _tmp->next=(add);                                                                          \
+  } else {                                                                                     \
+    (head)=(add);                                                                              \
+  }                                                                                            \
+} while (0)
+
+#define LL_DELETE(head,del)                                                                    \
+    LL_DELETE2(head,del,next)
+
+#define LL_DELETE2(head,del,next)                                                              \
+do {                                                                                           \
+  LDECLTYPE(head) _tmp;                                                                        \
+  if ((head) == (del)) {                                                                       \
+    (head)=(head)->next;                                                                       \
+  } else {                                                                                     \
+    _tmp = head;                                                                               \
+    while (_tmp->next && (_tmp->next != (del))) {                                              \
+      _tmp = _tmp->next;                                                                       \
+    }                                                                                          \
+    if (_tmp->next) {                                                                          \
+      _tmp->next = ((del)->next);                                                              \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+
+/* Here are VS2008 replacements for LL_APPEND and LL_DELETE */
+#define LL_APPEND_VS2008(head,add)                                                             \
+    LL_APPEND2_VS2008(head,add,next)
+
+#define LL_APPEND2_VS2008(head,add,next)                                                       \
+do {                                                                                           \
+  if (head) {                                                                                  \
+    (add)->next = head;     /* use add->next as a temp variable */                             \
+    while ((add)->next->next) { (add)->next = (add)->next->next; }                             \
+    (add)->next->next=(add);                                                                   \
+  } else {                                                                                     \
+    (head)=(add);                                                                              \
+  }                                                                                            \
+  (add)->next=NULL;                                                                            \
+} while (0)
+
+#define LL_DELETE_VS2008(head,del)                                                             \
+    LL_DELETE2_VS2008(head,del,next)
+
+#define LL_DELETE2_VS2008(head,del,next)                                                       \
+do {                                                                                           \
+  if ((head) == (del)) {                                                                       \
+    (head)=(head)->next;                                                                       \
+  } else {                                                                                     \
+    char *_tmp = (char*)(head);                                                                \
+    while ((head)->next && ((head)->next != (del))) {                                          \
+      head = (head)->next;                                                                     \
+    }                                                                                          \
+    if ((head)->next) {                                                                        \
+      (head)->next = ((del)->next);                                                            \
+    }                                                                                          \
+    {                                                                                          \
+      char **_head_alias = (char**)&(head);                                                    \
+      *_head_alias = _tmp;                                                                     \
+    }                                                                                          \
+  }                                                                                            \
+} while (0)
+#ifdef NO_DECLTYPE
+#undef LL_APPEND
+#define LL_APPEND LL_APPEND_VS2008
+#undef LL_DELETE
+#define LL_DELETE LL_DELETE_VS2008
+#undef LL_DELETE2
+#define LL_DELETE2 LL_DELETE2_VS2008
+#undef LL_APPEND2
+#define LL_APPEND2 LL_APPEND2_VS2008
+#undef LL_CONCAT /* no LL_CONCAT_VS2008 */
+#undef DL_CONCAT /* no DL_CONCAT_VS2008 */
+#endif
+/* end VS2008 replacements */
+
+#define LL_COUNT(head,el,counter)                                                              \
+    LL_COUNT2(head,el,counter,next)                                                            \
+
+#define LL_COUNT2(head,el,counter,next)                                                        \
+{                                                                                              \
+    counter = 0;                                                                               \
+    LL_FOREACH2(head,el,next){ ++counter; }                                                    \
+}
+
+#define LL_FOREACH(head,el)                                                                    \
+    LL_FOREACH2(head,el,next)
+
+#define LL_FOREACH2(head,el,next)                                                              \
+    for(el=head;el;el=(el)->next)
+
+#define LL_FOREACH_SAFE(head,el,tmp)                                                           \
+    LL_FOREACH_SAFE2(head,el,tmp,next)
+
+#define LL_FOREACH_SAFE2(head,el,tmp,next)                                                     \
+  for((el)=(head);(el) && (tmp = (el)->next, 1); (el) = tmp)
+
+#define LL_SEARCH_SCALAR(head,out,field,val)                                                   \
+    LL_SEARCH_SCALAR2(head,out,field,val,next)
+
+#define LL_SEARCH_SCALAR2(head,out,field,val,next)                                             \
+do {                                                                                           \
+    LL_FOREACH2(head,out,next) {                                                               \
+      if ((out)->field == (val)) break;                                                        \
+    }                                                                                          \
+} while(0) 
+
+#define LL_SEARCH(head,out,elt,cmp)                                                            \
+    LL_SEARCH2(head,out,elt,cmp,next)
+
+#define LL_SEARCH2(head,out,elt,cmp,next)                                                      \
+do {                                                                                           \
+    LL_FOREACH2(head,out,next) {                                                               \
+      if ((cmp(out,elt))==0) break;                                                            \
+    }                                                                                          \
+} while(0) 
+
+#define LL_REPLACE_ELEM(head, el, add)                                                         \
+do {                                                                                           \
+ LDECLTYPE(head) _tmp;                                                                         \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ (add)->next = (el)->next;                                                                     \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  _tmp = head;                                                                                 \
+  while (_tmp->next && (_tmp->next != (el))) {                                                 \
+   _tmp = _tmp->next;                                                                          \
+  }                                                                                            \
+  if (_tmp->next) {                                                                            \
+    _tmp->next = (add);                                                                        \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define LL_PREPEND_ELEM(head, el, add)                                                         \
+do {                                                                                           \
+ LDECLTYPE(head) _tmp;                                                                         \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ (add)->next = (el);                                                                           \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  _tmp = head;                                                                                 \
+  while (_tmp->next && (_tmp->next != (el))) {                                                 \
+   _tmp = _tmp->next;                                                                          \
+  }                                                                                            \
+  if (_tmp->next) {                                                                            \
+    _tmp->next = (add);                                                                        \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+
+/******************************************************************************
+ * doubly linked list macros (non-circular)                                   *
+ *****************************************************************************/
+#define DL_PREPEND(head,add)                                                                   \
+    DL_PREPEND2(head,add,prev,next)
+
+#define DL_PREPEND2(head,add,prev,next)                                                        \
+do {                                                                                           \
+ (add)->next = head;                                                                           \
+ if (head) {                                                                                   \
+   (add)->prev = (head)->prev;                                                                 \
+   (head)->prev = (add);                                                                       \
+ } else {                                                                                      \
+   (add)->prev = (add);                                                                        \
+ }                                                                                             \
+ (head) = (add);                                                                               \
+} while (0)
+
+#define DL_APPEND(head,add)                                                                    \
+    DL_APPEND2(head,add,prev,next)
+
+#define DL_APPEND2(head,add,prev,next)                                                         \
+do {                                                                                           \
+  if (head) {                                                                                  \
+      (add)->prev = (head)->prev;                                                              \
+      (head)->prev->next = (add);                                                              \
+      (head)->prev = (add);                                                                    \
+      (add)->next = NULL;                                                                      \
+  } else {                                                                                     \
+      (head)=(add);                                                                            \
+      (head)->prev = (head);                                                                   \
+      (head)->next = NULL;                                                                     \
+  }                                                                                            \
+} while (0) 
+
+#define DL_CONCAT(head1,head2)                                                                 \
+    DL_CONCAT2(head1,head2,prev,next)
+
+#define DL_CONCAT2(head1,head2,prev,next)                                                      \
+do {                                                                                           \
+  LDECLTYPE(head1) _tmp;                                                                       \
+  if (head2) {                                                                                 \
+    if (head1) {                                                                               \
+        _tmp = (head2)->prev;                                                                  \
+        (head2)->prev = (head1)->prev;                                                         \
+        (head1)->prev->next = (head2);                                                         \
+        (head1)->prev = _tmp;                                                                  \
+    } else {                                                                                   \
+        (head1)=(head2);                                                                       \
+    }                                                                                          \
+  }                                                                                            \
+} while (0) 
+
+#define DL_DELETE(head,del)                                                                    \
+    DL_DELETE2(head,del,prev,next)
+
+#define DL_DELETE2(head,del,prev,next)                                                         \
+do {                                                                                           \
+  assert((del)->prev != NULL);                                                                 \
+  if ((del)->prev == (del)) {                                                                  \
+      (head)=NULL;                                                                             \
+  } else if ((del)==(head)) {                                                                  \
+      (del)->next->prev = (del)->prev;                                                         \
+      (head) = (del)->next;                                                                    \
+  } else {                                                                                     \
+      (del)->prev->next = (del)->next;                                                         \
+      if ((del)->next) {                                                                       \
+          (del)->next->prev = (del)->prev;                                                     \
+      } else {                                                                                 \
+          (head)->prev = (del)->prev;                                                          \
+      }                                                                                        \
+  }                                                                                            \
+} while (0) 
+
+#define DL_COUNT(head,el,counter)                                                              \
+    DL_COUNT2(head,el,counter,next)                                                            \
+
+#define DL_COUNT2(head,el,counter,next)                                                        \
+{                                                                                              \
+    counter = 0;                                                                               \
+    DL_FOREACH2(head,el,next){ ++counter; }                                                    \
+}
+
+#define DL_FOREACH(head,el)                                                                    \
+    DL_FOREACH2(head,el,next)
+
+#define DL_FOREACH2(head,el,next)                                                              \
+    for(el=head;el;el=(el)->next)
+
+/* this version is safe for deleting the elements during iteration */
+#define DL_FOREACH_SAFE(head,el,tmp)                                                           \
+    DL_FOREACH_SAFE2(head,el,tmp,next)
+
+#define DL_FOREACH_SAFE2(head,el,tmp,next)                                                     \
+  for((el)=(head);(el) && (tmp = (el)->next, 1); (el) = tmp)
+
+/* these are identical to their singly-linked list counterparts */
+#define DL_SEARCH_SCALAR LL_SEARCH_SCALAR
+#define DL_SEARCH LL_SEARCH
+#define DL_SEARCH_SCALAR2 LL_SEARCH_SCALAR2
+#define DL_SEARCH2 LL_SEARCH2
+
+#define DL_REPLACE_ELEM(head, el, add)                                                         \
+do {                                                                                           \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+  (add)->next = (el)->next;                                                                    \
+  if ((el)->next == NULL) {                                                                    \
+   (add)->prev = (add);                                                                        \
+  } else {                                                                                     \
+   (add)->prev = (el)->prev;                                                                   \
+   (add)->next->prev = (add);                                                                  \
+  }                                                                                            \
+ } else {                                                                                      \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el)->prev;                                                                    \
+  (add)->prev->next = (add);                                                                   \
+  if ((el)->next == NULL) {                                                                    \
+   (head)->prev = (add);                                                                       \
+  } else {                                                                                     \
+   (add)->next->prev = (add);                                                                  \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define DL_PREPEND_ELEM(head, el, add)                                                         \
+do {                                                                                           \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ (add)->next = (el);                                                                           \
+ (add)->prev = (el)->prev;                                                                     \
+ (el)->prev = (add);                                                                           \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  (add)->prev->next = (add);                                                                   \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+
+/******************************************************************************
+ * circular doubly linked list macros                                         *
+ *****************************************************************************/
+#define CDL_PREPEND(head,add)                                                                  \
+    CDL_PREPEND2(head,add,prev,next)
+
+#define CDL_PREPEND2(head,add,prev,next)                                                       \
+do {                                                                                           \
+ if (head) {                                                                                   \
+   (add)->prev = (head)->prev;                                                                 \
+   (add)->next = (head);                                                                       \
+   (head)->prev = (add);                                                                       \
+   (add)->prev->next = (add);                                                                  \
+ } else {                                                                                      \
+   (add)->prev = (add);                                                                        \
+   (add)->next = (add);                                                                        \
+ }                                                                                             \
+(head)=(add);                                                                                  \
+} while (0)
+
+#define CDL_DELETE(head,del)                                                                   \
+    CDL_DELETE2(head,del,prev,next)
+
+#define CDL_DELETE2(head,del,prev,next)                                                        \
+do {                                                                                           \
+  if ( ((head)==(del)) && ((head)->next == (head))) {                                          \
+      (head) = 0L;                                                                             \
+  } else {                                                                                     \
+     (del)->next->prev = (del)->prev;                                                          \
+     (del)->prev->next = (del)->next;                                                          \
+     if ((del) == (head)) (head)=(del)->next;                                                  \
+  }                                                                                            \
+} while (0) 
+
+#define CDL_COUNT(head,el,counter)                                                             \
+    CDL_COUNT2(head,el,counter,next)                                                           \
+
+#define CDL_COUNT2(head, el, counter,next)                                                     \
+{                                                                                              \
+    counter = 0;                                                                               \
+    CDL_FOREACH2(head,el,next){ ++counter; }                                                   \
+}
+
+#define CDL_FOREACH(head,el)                                                                   \
+    CDL_FOREACH2(head,el,next)
+
+#define CDL_FOREACH2(head,el,next)                                                             \
+    for(el=head;el;el=((el)->next==head ? 0L : (el)->next)) 
+
+#define CDL_FOREACH_SAFE(head,el,tmp1,tmp2)                                                    \
+    CDL_FOREACH_SAFE2(head,el,tmp1,tmp2,prev,next)
+
+#define CDL_FOREACH_SAFE2(head,el,tmp1,tmp2,prev,next)                                         \
+  for((el)=(head), ((tmp1)=(head)?((head)->prev):NULL);                                        \
+      (el) && ((tmp2)=(el)->next, 1);                                                          \
+      ((el) = (((el)==(tmp1)) ? 0L : (tmp2))))
+
+#define CDL_SEARCH_SCALAR(head,out,field,val)                                                  \
+    CDL_SEARCH_SCALAR2(head,out,field,val,next)
+
+#define CDL_SEARCH_SCALAR2(head,out,field,val,next)                                            \
+do {                                                                                           \
+    CDL_FOREACH2(head,out,next) {                                                              \
+      if ((out)->field == (val)) break;                                                        \
+    }                                                                                          \
+} while(0) 
+
+#define CDL_SEARCH(head,out,elt,cmp)                                                           \
+    CDL_SEARCH2(head,out,elt,cmp,next)
+
+#define CDL_SEARCH2(head,out,elt,cmp,next)                                                     \
+do {                                                                                           \
+    CDL_FOREACH2(head,out,next) {                                                              \
+      if ((cmp(out,elt))==0) break;                                                            \
+    }                                                                                          \
+} while(0) 
+
+#define CDL_REPLACE_ELEM(head, el, add)                                                        \
+do {                                                                                           \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ if ((el)->next == (el)) {                                                                     \
+  (add)->next = (add);                                                                         \
+  (add)->prev = (add);                                                                         \
+  (head) = (add);                                                                              \
+ } else {                                                                                      \
+  (add)->next = (el)->next;                                                                    \
+  (add)->prev = (el)->prev;                                                                    \
+  (add)->next->prev = (add);                                                                   \
+  (add)->prev->next = (add);                                                                   \
+  if ((head) == (el)) {                                                                        \
+   (head) = (add);                                                                             \
+  }                                                                                            \
+ }                                                                                             \
+} while (0)
+
+#define CDL_PREPEND_ELEM(head, el, add)                                                        \
+do {                                                                                           \
+ assert(head != NULL);                                                                         \
+ assert(el != NULL);                                                                           \
+ assert(add != NULL);                                                                          \
+ (add)->next = (el);                                                                           \
+ (add)->prev = (el)->prev;                                                                     \
+ (el)->prev = (add);                                                                           \
+ (add)->prev->next = (add);                                                                    \
+ if ((head) == (el)) {                                                                         \
+  (head) = (add);                                                                              \
+ }                                                                                             \
+} while (0)                                                                                    \
+
+#endif /* UTLIST_H */
+
diff --git a/bcftools/vcfannotate.c b/bcftools/vcfannotate.c
index 3697847..0976fe3 100644
--- a/bcftools/vcfannotate.c
+++ b/bcftools/vcfannotate.c
@@ -1,6 +1,6 @@
 /*  vcfannotate.c -- Annotate and edit VCF/BCF files.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
 #include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -70,6 +71,7 @@ annot_line_t;
 #define REPLACE_ALL      1      // replace both missing and existing values
 #define REPLACE_NON_MISSING 2   // replace only if tgt is not missing
 #define SET_OR_APPEND    3      // set new value if missing or non-existent, append otherwise
+#define MATCH_VALUE      4      // do not set, just match the value -c ~ID
 #define MM_FIRST   0    // if multiple annotation lines overlap a VCF record, use the first, discarding the rest
 #define MM_APPEND  1    // append, possibly multiple times
 #define MM_UNIQUE  2    // append, only unique values
@@ -77,19 +79,26 @@ annot_line_t;
 #define MM_AVG     4
 #define MM_MIN     5
 #define MM_MAX     6
+#define MM_APPEND_MISSING 7     // missing values will be transferred as well
 typedef struct _annot_col_t
 {
     int icol, replace, number;  // number: one of BCF_VL_* types
     char *hdr_key_src, *hdr_key_dst;
-    int (*setter)(struct _args_t *, bcf1_t *, struct _annot_col_t *, void*);
+    // The setters return 0 on successful update of the bcf record, negative value (bcf_update_* return status) on errors,
+    // or 1 on (repeated partial updates) concluded with a src=NULL call
+    int (*setter)(struct _args_t *, bcf1_t *dst, struct _annot_col_t *, void *src); // the last is the annotation line, either src bcf1_t or annot_line_t
+    int (*getter)(struct _args_t *, bcf1_t *src, struct _annot_col_t *, void **ptr, int *mptr);
     int merge_method;               // one of the MM_* defines
     khash_t(str2int) *mm_str_hash;  // lookup table to ensure uniqueness of added string values
     kstring_t mm_kstr;
-    double
+    size_t
         mm_dbl_nalloc,  // the allocated size --merge-logic values array
         mm_dbl_nused,   // the number of used elements in the mm_dbl array
-        mm_dbl_ndat,    // the number of merged rows (for calculating the average)
+        mm_dbl_ndat;    // the number of merged rows (for calculating the average)
+    double
         *mm_dbl;
+    void *ptr;
+    int mptr, done;
 }
 annot_col_t;
 
@@ -103,12 +112,12 @@ annot_col_t;
 typedef struct _args_t
 {
     bcf_srs_t *files;
-    bcf_hdr_t *hdr, *hdr_out;
+    bcf_hdr_t *hdr, *hdr_out, *tgts_hdr;
     htsFile *out_fh;
     int output_type, n_threads;
     bcf_sr_regions_t *tgts;
 
-    regidx_t *tgt_idx;
+    regidx_t *tgt_idx;  // keep everything in memory only with .tab annotation file and -c BEG,END columns
     regitr_t *tgt_itr;
     int tgt_is_bed;
 
@@ -123,10 +132,13 @@ typedef struct _args_t
 
     vcmp_t *vcmp;           // for matching annotation and VCF lines by allele
     annot_line_t *alines;   // buffered annotation lines
-    int nalines, malines;
+    annot_line_t *aline_missing;
+    uint32_t *srt_alines;   // sorted indexes (iALT<<16 || iAline)
+    int nalines, malines, nsrt_alines, msrt_alines;
     int ref_idx, alt_idx, chr_idx, beg_idx, end_idx;   // -1 if not present
     annot_col_t *cols;      // column indexes and setters
     int ncols;
+    int match_id;           // set iff `-c ~ID` given
 
     char *set_ids_fmt;
     convert_t *set_ids;
@@ -144,9 +156,10 @@ typedef struct _args_t
     kstring_t tmpks;
 
     char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
-    char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
-    char *merge_method_str;
+    char *remove_annots, *columns, *rename_chrs, *rename_annots, *sample_names, *mark_sites;
+    kstring_t merge_method_str;
     int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic, force, single_overlaps;
+    int columns_is_file, has_append_mode;
 }
 args_t;
 
@@ -195,6 +208,8 @@ void remove_info(args_t *args, bcf1_t *line, rm_tag_t *tag)
     for (i=0; i<line->n_info; i++)
     {
         bcf_info_t *inf = &line->d.info[i];
+        if (  !strcmp("END",bcf_hdr_int2id(args->hdr,BCF_DT_ID,inf->key)) )
+            line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0;
         if ( inf->vptr_free )
         {
             free(inf->vptr - inf->vptr_off);
@@ -374,6 +389,10 @@ static void init_remove_annots(args_t *args)
         }
         else if ( str.l )
         {
+            int id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, str.s);
+            if ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,id) ) error("Error: did you mean INFO/%s?\n",str.s);
+            if ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,id) ) error("Error: did you mean FORMAT/%s?\n",str.s);
+
             if ( !args->keep_sites )
             {
                 if ( str.s[0]=='#' && str.s[1]=='#' )
@@ -441,6 +460,42 @@ static void init_header_lines(args_t *args)
     if (bcf_hdr_sync(args->hdr) < 0)
         error_errno("[%s] Failed to update input header", __func__);
 }
+static int vcf_getter_info_str2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    return bcf_get_info_string(args->tgts_hdr,rec,col->hdr_key_src,ptr,mptr); 
+}
+static int vcf_getter_id2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    char *str = *((char**)ptr);
+    int len = strlen(rec->d.id);
+    if ( len >= *mptr ) str = realloc(str, len+1);
+    strcpy(str, rec->d.id);
+    *((char**)ptr) = str;
+    *mptr = len+1;
+    return len;
+}
+static int vcf_getter_filter2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    kstring_t str;
+    str.s = *((char**)ptr);
+    str.m = *mptr;
+    str.l = 0;
+
+    int i;
+    if ( rec->d.n_flt )
+    {
+        for (i=0; i<rec->d.n_flt; i++)
+        {
+            if (i) kputc(';', &str);
+            kputs(bcf_hdr_int2id(args->tgts_hdr,BCF_DT_ID,rec->d.flt[i]), &str);
+        }
+    }
+    else kputc('.', &str);
+
+    *((char**)ptr) = str.s;
+    *mptr = str.m;
+    return str.l;
+}
 static int setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     if ( !data ) error("Error: the --merge-logic option cannot be used with FILTER (yet?)\n");
@@ -450,24 +505,24 @@ static int setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *dat
     if ( tab->cols[col->icol] && tab->cols[col->icol][0]=='.' && !tab->cols[col->icol][1] ) return 0; // don't replace with "."
     hts_expand(int,1,args->mtmpi,args->tmpi);
     args->tmpi[0] = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, tab->cols[col->icol]);
-    if ( args->tmpi[0]<0 ) error("The FILTER is not defined in the header: %s\n", tab->cols[col->icol]);
-    if ( col->replace==SET_OR_APPEND ) { bcf_add_filter(args->hdr_out,line,args->tmpi[0]); return 0; }
+    if ( args->tmpi[0]<0 ) error("The FILTER \"%s\" is not defined in the header, was the -h option provided?\n", tab->cols[col->icol]);
+    if ( col->replace==SET_OR_APPEND ) return bcf_add_filter(args->hdr_out,line,args->tmpi[0]);
     if ( col->replace!=REPLACE_MISSING )
     {
         bcf_update_filter(args->hdr_out,line,NULL,0);
-        bcf_update_filter(args->hdr_out,line,args->tmpi,1); 
-        return 0; 
+        return bcf_update_filter(args->hdr_out,line,args->tmpi,1); 
     }
     
     // only update missing FILTER
     if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT);
     if ( !line->d.n_flt )
-        bcf_update_filter(args->hdr_out,line,args->tmpi,1);
+        return bcf_update_filter(args->hdr_out,line,args->tmpi,1);
+
     return 0;
 }
 static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
-    int i;
+    int i, ret = 0;
     bcf1_t *rec = (bcf1_t*) data;
     if ( !(rec->unpacked & BCF_UN_FLT) ) bcf_unpack(rec, BCF_UN_FLT);
     if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT);
@@ -478,9 +533,9 @@ static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void
         for (i=0; i<rec->d.n_flt; i++)
         {
             const char *flt = bcf_hdr_int2id(args->files->readers[1].header, BCF_DT_ID, rec->d.flt[i]);
-            bcf_add_filter(args->hdr_out,line,bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt));
+            if ( bcf_add_filter(args->hdr_out,line,bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt)) < 0 ) ret = -1;
         }
-        return 0;
+        return ret;
     }
     hts_expand(int,rec->d.n_flt,args->mtmpi,args->tmpi);
     for (i=0; i<rec->d.n_flt; i++)
@@ -489,12 +544,12 @@ static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void
         args->tmpi[i] = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt);
     }
     bcf_update_filter(args->hdr_out,line,NULL,0);
-    bcf_update_filter(args->hdr_out,line,args->tmpi,rec->d.n_flt);
-    return 0;
+    return bcf_update_filter(args->hdr_out,line,args->tmpi,rec->d.n_flt);
 }
 static int setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     if ( !data ) error("Error: the --merge-logic option cannot be used with ID (yet?)\n");
+    if ( col->replace==MATCH_VALUE ) return 0;
 
     // possible cases:
     //      IN  ANNOT   OUT     ACHIEVED_BY
@@ -517,14 +572,28 @@ static int setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 }
 static int vcf_setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
+    if ( col->replace==MATCH_VALUE ) return 0;
+
     bcf1_t *rec = (bcf1_t*) data;
-    if ( rec->d.id && rec->d.id[0]=='.' && !rec->d.id[1] ) return 0;    // don't replace with "."
-    if ( col->replace==SET_OR_APPEND ) return bcf_add_id(args->hdr_out,line,rec->d.id);
-    if ( col->replace!=REPLACE_MISSING ) return bcf_update_id(args->hdr_out,line,rec->d.id);
+
+    char *id;
+    if ( col->getter )
+    {
+        int nret = col->getter(args,rec,col,&col->ptr,&col->mptr);
+        id = (char*) col->ptr;
+        if ( nret<=0 || (nret==1 && *id=='.') ) return 0;   // don't replace with "."
+    }
+    else
+    {
+        if ( rec->d.id && rec->d.id[0]=='.' && !rec->d.id[1] ) return 0;    // don't replace with "."
+        id = rec->d.id;
+    }
+    if ( col->replace==SET_OR_APPEND ) return bcf_add_id(args->hdr_out,line,id);
+    if ( col->replace!=REPLACE_MISSING ) return bcf_update_id(args->hdr_out,line,id);
 
     // running with +ID, only update missing ids
     if ( !line->d.id || (line->d.id[0]=='.' && !line->d.id[1]) )
-        return bcf_update_id(args->hdr_out,line,rec->d.id);
+        return bcf_update_id(args->hdr_out,line,id);
     return 0;
 }
 static int vcf_setter_ref(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -535,9 +604,9 @@ static int vcf_setter_ref(args_t *args, bcf1_t *line, annot_col_t *col, void *da
     als[0] = rec->d.allele[0];
     int i;
     for (i=1; i<line->n_allele; i++) als[i] = line->d.allele[i];
-    bcf_update_alleles(args->hdr_out, line, als, line->n_allele);
+    int ret = bcf_update_alleles(args->hdr_out, line, als, line->n_allele);
     free(als);
-    return 0;
+    return ret;
 }
 static int vcf_setter_alt(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -551,9 +620,9 @@ static int vcf_setter_alt(args_t *args, bcf1_t *line, annot_col_t *col, void *da
     const char **als = (const char**) malloc(sizeof(char*)*rec->n_allele);
     als[0] = line->d.allele[0];
     for (i=1; i<rec->n_allele; i++) als[i] = rec->d.allele[i];
-    bcf_update_alleles(args->hdr_out, line, als, rec->n_allele);
+    int ret = bcf_update_alleles(args->hdr_out, line, als, rec->n_allele);
     free(als);
-    return 0;
+    return ret;
 }
 static int setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -627,34 +696,51 @@ static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpi2[i] = args->tmpi[ map[i] ];
     }
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
 }
 static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     annot_line_t *tab = (annot_line_t*) data;
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:APPEND for integers
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_APPEND;
+
     if ( !tab )
     {
-        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG && col->merge_method!=MM_MIN && col->merge_method!=MM_MAX && col->merge_method!=MM_APPEND )
-            error("Error: at the moment only the sum,avg,min,max,append options are supported with --merge-logic for INFO type=Integer\n");
+        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG &&
+             col->merge_method!=MM_MIN && col->merge_method!=MM_MAX &&
+             col->merge_method!=MM_APPEND && 
+             col->merge_method!=MM_APPEND_MISSING )
+            error("Error: at the moment only the sum,avg,min,max,append,append-missing options are supported with --merge-logic for INFO type=Integer\n");
     }
 
     int i,ntmpi = 0;
-    if ( tab )
+    if ( tab )  // has data, not flushing yet
     {
         char *str = tab->cols[col->icol], *end = str;
-        if ( str[0]=='.' && str[1]==0 ) return 0;
+        if ( str[0]=='.' && str[1]==0 && col->merge_method!=MM_APPEND_MISSING ) return 1;
 
         while ( *end )
         {
-            int val = strtol(str, &end, 10); 
-            if ( end==str )
-                error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
             ntmpi++;
             hts_expand(int32_t,ntmpi,args->mtmpi,args->tmpi);
-            args->tmpi[ntmpi-1] = val;
-            str = end+1;
+            if ( str[0]=='.' && (str[1]==0 || str[1]==',') )
+            {
+                if ( col->merge_method==MM_APPEND_MISSING )
+                    args->tmpi[ntmpi-1] = bcf_int32_missing;
+                else
+                    ntmpi--;
+                if ( str[1]==0 ) end = str+1;
+                str += 2;
+            }
+            else
+            {
+                args->tmpi[ntmpi-1] = strtol(str, &end, 10); 
+                if ( end==str )
+                    error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
+                str = end+1;
+            }
         }
         if ( col->merge_method!=MM_FIRST )
         {
@@ -667,7 +753,7 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
             }
             else
             {
-                if ( col->merge_method==MM_APPEND )
+                if ( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
                 {
                     int nori = col->mm_dbl_nused;
                     col->mm_dbl_nused += ntmpi;
@@ -687,9 +773,10 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
                 }
             }
             col->mm_dbl_ndat++;
+            return 1;
         }
     }
-    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND )
+    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
     {
         ntmpi = col->mm_dbl_nused;
         hts_expand(int32_t,ntmpi,args->mtmpi,args->tmpi);
@@ -713,8 +800,7 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
 }
 static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -731,8 +817,7 @@ static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, voi
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
 }
 static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpf)
 {
@@ -763,34 +848,51 @@ static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpf2[i] = args->tmpf[ map[i] ];
     }
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
 }
 static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     annot_line_t *tab = (annot_line_t*) data;
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:APPEND for floats
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_APPEND;
+
     if ( !tab )
     {
-        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG && col->merge_method!=MM_MIN && col->merge_method!=MM_MAX && col->merge_method!=MM_APPEND )
-            error("Error: at the moment only the sum,avg,min,max,append options are supported with --merge-logic for INFO type=Float\n");
+        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG &&
+             col->merge_method!=MM_MIN && col->merge_method!=MM_MAX &&
+             col->merge_method!=MM_APPEND &&
+             col->merge_method!=MM_APPEND_MISSING )
+            error("Error: at the moment only the sum,avg,min,max,append,append-missing options are supported with --merge-logic for INFO type=Float\n");
     }
 
     int i,ntmpf = 0;
     if ( tab )
     {
         char *str = tab->cols[col->icol], *end = str;
-        if ( str[0]=='.' && str[1]==0 ) return 0;
+        if ( str[0]=='.' && str[1]==0 && col->merge_method!=MM_APPEND_MISSING ) return 1;
 
         while ( *end )
         {
-            double val = strtod(str, &end);
-            if ( end==str )
-                error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
             ntmpf++;
             hts_expand(float,ntmpf,args->mtmpf,args->tmpf);
-            args->tmpf[ntmpf-1] = val;
-            str = end+1;
+            if ( str[0]=='.' && (str[1]==0 || str[1]==',') )
+            {
+                if ( col->merge_method==MM_APPEND_MISSING ) 
+                    bcf_float_set_missing(args->tmpf[ntmpf-1]);
+                else
+                    ntmpf--;
+                if ( str[1]==0 ) end = str+1;
+                str += 2;
+            }
+            else
+            {
+                args->tmpf[ntmpf-1] = strtod(str, &end);
+                if ( end==str )
+                    error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
+                str = end+1;
+            }
         }
         if ( col->merge_method!=MM_FIRST )
         {
@@ -799,17 +901,27 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
                 col->mm_dbl_nused = ntmpf;
                 hts_expand(double,col->mm_dbl_nused,col->mm_dbl_nalloc,col->mm_dbl);
                 for (i=0; i<ntmpf; i++)
-                    col->mm_dbl[i] = args->tmpf[i];
+                {
+                    if ( bcf_float_is_missing(args->tmpf[i]) )
+                        bcf_double_set_missing(col->mm_dbl[i]);
+                    else
+                        col->mm_dbl[i] = args->tmpf[i];
+                }
             }
             else
             {
-                if ( col->merge_method==MM_APPEND )
+                if ( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
                 {
                     int nori = col->mm_dbl_nused;
                     col->mm_dbl_nused += ntmpf;
                     hts_expand(double,col->mm_dbl_nused,col->mm_dbl_nalloc,col->mm_dbl);
                     for (i=0; i<ntmpf; i++)
-                        col->mm_dbl[i+nori] = args->tmpf[i];
+                    {
+                        if ( bcf_float_is_missing(args->tmpf[i]) )
+                            bcf_double_set_missing(col->mm_dbl[i+nori]);
+                        else
+                            col->mm_dbl[i+nori] = args->tmpf[i];
+                    }
                 }
                 else
                 {
@@ -823,13 +935,20 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
                 }
             }
             col->mm_dbl_ndat++;
+            return 1;
         }
     }
-    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND )
+    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
     {
         ntmpf = col->mm_dbl_nused;
         hts_expand(int32_t,ntmpf,args->mtmpf,args->tmpf);
-        for (i=0; i<ntmpf; i++) args->tmpf[i] = col->mm_dbl[i];
+        for (i=0; i<ntmpf; i++)
+        {
+            if ( bcf_double_is_missing(col->mm_dbl[i]) )
+                bcf_float_set_missing(args->tmpf[i]);
+            else
+                args->tmpf[i] = col->mm_dbl[i];
+        }
         col->mm_dbl_nused = col->mm_dbl_ndat = 0;
     }
     else if ( col->merge_method==MM_AVG )
@@ -849,8 +968,7 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
 }
 static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -867,8 +985,7 @@ static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, vo
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
 static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als)
@@ -923,10 +1040,9 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
             if ( str[0]!='.' || (str[1]!=',' && str[1]!=0) ) continue;  // value already set
         }
         int ret = copy_string_field(args->tmps,map[i],lsrc,&args->tmpks,i);
-        assert( ret==0 );
+        if ( ret!=0 ) error("[%s:%d %s] Failed to copy a string field\n",  __FILE__,__LINE__,__func__);
     }
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
 }
 void khash_str2int_clear_free(void *_hash)
 {
@@ -945,14 +1061,18 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:unique for strings
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_UNIQUE;
+
     annot_line_t *tab = (annot_line_t*) data;
-    
+
     int len = 0;
     if ( tab )
     {
         len = strlen(tab->cols[col->icol]);
         if ( !len ) return 0;
-        if ( len==1 && tab->cols[col->icol][0]=='.' ) return 0;
+        if ( len==1 && tab->cols[col->icol][0]=='.' && col->merge_method!=MM_APPEND_MISSING ) return 1;
     }
 
     if ( col->merge_method!=MM_FIRST )
@@ -962,17 +1082,17 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
         if ( data )
         {
-            assert( col->merge_method==MM_APPEND || col->merge_method==MM_UNIQUE );
+            assert( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING || col->merge_method==MM_UNIQUE );
             if ( col->merge_method==MM_UNIQUE )
             {
                 if ( !col->mm_str_hash ) col->mm_str_hash = (khash_t(str2int)*)khash_str2int_init();
-                if ( khash_str2int_has_key(col->mm_str_hash, tab->cols[col->icol]) ) return 0;
+                if ( khash_str2int_has_key(col->mm_str_hash, tab->cols[col->icol]) ) return 1;
                 khash_str2int_inc(col->mm_str_hash, strdup(tab->cols[col->icol]));
             }
 
             if ( col->mm_kstr.l ) kputc(',',&col->mm_kstr);
             kputs(tab->cols[col->icol], &col->mm_kstr);
-            return 0;
+            return 1;
         }
 
         if ( col->mm_kstr.l )
@@ -983,12 +1103,10 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         else
             return 0;
 
-        if ( !data )    // flush the line
-        {
-            if ( col->merge_method==MM_UNIQUE )
-                khash_str2int_clear_free(col->mm_str_hash);
-            col->mm_kstr.l = 0;
-        }
+        // flush the line
+        if ( col->merge_method==MM_UNIQUE )
+            khash_str2int_clear_free(col->mm_str_hash);
+        col->mm_kstr.l = 0;
     }
     else
     {
@@ -1000,14 +1118,19 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
             return setter_ARinfo_string(args,line,col,tab->nals,tab->als);
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
 }
 static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
-    if ( ntmps < 0 ) return 0;    // nothing to add
+
+    if ( col->getter )
+        col->getter(args,rec,col,(void**)&args->tmps, &args->mtmps);
+    else
+    {
+        int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
+        if ( ntmps < 0 ) return 0;    // nothing to add
+    }
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
         return setter_ARinfo_string(args,line,col,rec->n_allele,rec->d.allele);
@@ -1018,8 +1141,7 @@ static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, voi
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
 }
 static int genotypes_to_string(args_t *args, int nsrc1, int32_t *src, int nsmpl_dst, kstring_t *str)
 {
@@ -1689,7 +1811,6 @@ static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col,
         }
     }
     return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,nsmpl_dst*ndst1);
-
 }
 
 static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -1771,17 +1892,12 @@ static int init_sample_map(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst)
         // tab annotation file, expecting that all samples are present: sample map not needed
         if ( !src ) return 0;
 
-        int nmatch = 0, order_ok = 1;
+        int nmatch = 0;
         for (i=0; i<bcf_hdr_nsamples(src); i++)
         {
             int id = bcf_hdr_id2int(dst, BCF_DT_SAMPLE, src->samples[i]);
-            if ( id!=-1 ) 
-            {
-                nmatch++;
-                if ( i!=id ) order_ok = 0;
-            }
+            if ( id!=-1 ) nmatch++;
         }
-        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok ) return 0;  // not needed
         if ( !nmatch ) return -1;   // No matching samples found in the source and the destination file
 
         args->nsample_map = bcf_hdr_nsamples(dst);
@@ -1900,11 +2016,45 @@ static void init_columns(args_t *args)
     int need_sample_map = 0;
     int sample_map_ok = init_sample_map(args, args->tgts_is_vcf?args->files->readers[1].header:NULL, args->hdr);
 
+    kstring_t tmp = {0,0,0};
+    if ( args->columns_is_file )
+    {
+        int i,n;
+        char **str = hts_readlist(args->columns, args->columns_is_file, &n);
+        if ( !str ) error("Could not parse %s\n", args->columns);
+        for (i=0; i<n; i++)
+        {
+            char *ptr = str[i];
+            while ( *ptr && !isspace(*ptr) ) ptr++;
+            if ( *ptr )
+            {
+                *ptr = 0;
+                ptr++;
+                while ( *ptr && isspace(*ptr) ) ptr++;
+                if ( *ptr )
+                {
+                    if ( args->merge_method_str.l ) kputc(',',&args->merge_method_str);
+                    kputs(str[i],&args->merge_method_str);
+                    kputc(':',&args->merge_method_str);
+                    kputs(ptr,&args->merge_method_str);
+                }
+            }
+            if ( tmp.l ) kputc(',',&tmp);
+            kputs(str[i],&tmp);
+            free(str[i]);
+        }
+        free(str);
+        free(args->columns);
+        args->columns = tmp.s;
+        tmp.l = tmp.m = 0;
+        tmp.s = NULL;
+    }
+
     void *skip_fmt = NULL, *skip_info = NULL;
     if ( args->tgts_is_vcf )
         args->columns = columns_complement(args->columns, &skip_info, &skip_fmt);
 
-    kstring_t str = {0,0,0}, tmp = {0,0,0};
+    kstring_t str = {0,0,0};
     char *ss = args->columns, *se = ss;
     args->ncols = 0;
     int icol = -1, has_fmt_str = 0;
@@ -1929,6 +2079,7 @@ static void init_columns(args_t *args)
             {
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->setter = vcf_setter_ref;
                 col->hdr_key_src = strdup(str.s);
                 col->hdr_key_dst = strdup(str.s);
@@ -1941,28 +2092,54 @@ static void init_columns(args_t *args)
             {
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->setter = vcf_setter_alt;
                 col->hdr_key_src = strdup(str.s);
                 col->hdr_key_dst = strdup(str.s);
             }
             else args->alt_idx = icol;
         }
-        else if ( !strcasecmp("ID",str.s) )
+        else if ( !strcasecmp("ID",str.s) || !strcasecmp("~ID",str.s) )
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            if ( str.s[0]=='~' ) replace = MATCH_VALUE;
+            if ( args->tgts_is_vcf && replace==MATCH_VALUE ) error("todo: -c ~ID with -a VCF?\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
             col->hdr_key_src = strdup(str.s);
             col->hdr_key_dst = strdup(str.s);
+            if ( replace==MATCH_VALUE ) args->match_id = icol;
+        }
+        else if ( !strncasecmp("ID:=",str.s,4) )    // transfer a tag from INFO to ID column
+        {
+            if ( !args->tgts_is_vcf ) error("The annotation source must be a VCF for \"%s\"\n",str.s);
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
+            annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
+            col->icol = icol;
+            col->replace = replace;
+            col->setter = vcf_setter_id;
+            col->getter = vcf_getter_info_str2str;
+            str.s[2] = 0;
+            col->hdr_key_dst = strdup(str.s);
+            col->hdr_key_src = strncasecmp("INFO/",str.s+4,5) ? strdup(str.s+4) : strdup(str.s+4+5);
+            int hdr_id = bcf_hdr_id2int(args->tgts_hdr, BCF_DT_ID,col->hdr_key_src);
+            if ( !bcf_hdr_idinfo_exists(args->tgts_hdr,BCF_HL_INFO,hdr_id) ) 
+                error("The INFO tag \"%s\" is not defined in %s\n", col->hdr_key_src, args->targets_fname);
+            if ( bcf_hdr_id2type(args->tgts_hdr,BCF_HL_INFO,hdr_id)!=BCF_HT_STR )
+                error("Only Type=String tags can be used to annotate the ID column\n");
         }
         else if ( !strcasecmp("FILTER",str.s) )
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
@@ -1977,7 +2154,7 @@ static void init_columns(args_t *args)
                     bcf_hrec_t *hrec = tgts_hdr->hrec[j];
                     if ( hrec->type!=BCF_HL_FLT ) continue;
                     int k = bcf_hrec_find_key(hrec,"ID");
-                    assert( k>=0 ); // this should always be true for valid VCFs
+                    if ( k<0 ) error("[%s] Failed to parse the header, the ID attribute not found", __func__);
                     tmp.l = 0;
                     bcf_hrec_format(hrec, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
@@ -1992,6 +2169,7 @@ static void init_columns(args_t *args)
             if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
@@ -2001,7 +2179,7 @@ static void init_columns(args_t *args)
         else if ( args->tgts_is_vcf && !strcasecmp("INFO",str.s) ) // All INFO fields
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
-            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO feature has not been implemented yet.\n");
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
             int j;
             for (j=0; j<tgts_hdr->nhrec; j++)
@@ -2019,6 +2197,7 @@ static void init_columns(args_t *args)
                 int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, hrec->vals[k]);
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->icol = -1;
                 col->replace = replace;
                 col->hdr_key_src = strdup(hrec->vals[k]);
@@ -2054,11 +2233,16 @@ static void init_columns(args_t *args)
                 int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, hrec->vals[k]);
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->icol = -1;
                 col->replace = replace;
                 col->hdr_key_src = strdup(hrec->vals[k]);
                 col->hdr_key_dst = strdup(hrec->vals[k]);
-                if ( !strcasecmp("GT",col->hdr_key_src) ) col->setter = vcf_setter_format_gt;
+                if ( !strcasecmp("GT",col->hdr_key_src) )
+                {
+                    if ( !args->tgts_is_vcf ) error("The FORMAT/GT field can be currently populated only from a VCF\n");
+                    col->setter = vcf_setter_format_gt;
+                }
                 else
                     switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                     {
@@ -2097,9 +2281,10 @@ static void init_columns(args_t *args)
             }
             int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
-                error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
+                error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", str.s, args->targets_fname);
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             if ( !args->tgts_is_vcf )
             {
                 col->icol = icol;
@@ -2110,7 +2295,11 @@ static void init_columns(args_t *args)
             col->replace = replace;
             col->hdr_key_src = strdup(key_src);
             col->hdr_key_dst = strdup(key_dst);
-            if ( !strcasecmp("GT",key_src) ) col->setter = vcf_setter_format_gt;
+            if ( !strcasecmp("GT",key_src) )
+            {
+                if ( !args->tgts_is_vcf ) error("The FORMAT/GT field can be currently populated only from a VCF\n");
+                col->setter = vcf_setter_format_gt;
+            }
             else
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 {
@@ -2129,13 +2318,20 @@ static void init_columns(args_t *args)
         else
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
-            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
-            int explicit_info = 0;
+            if ( replace==SET_OR_APPEND )
+            {
+                if ( args->tgts_is_vcf )
+                    error("Error: the =INFO/TAG feature is currently supported only with TAB annotation files and has limitations\n"
+                          "       (the annotation type is modified to \"Number=.\" and allele ordering is disregarded)\n");
+                fprintf(stderr,"Warning: the =INFO/TAG feature modifies the annotation to \"Number=.\" and disregards allele ordering\n");
+            }
+            int explicit_src_info = 0;
+            int explicit_dst_info = 0;
             char *key_dst;
             if ( !strncasecmp("INFO/",str.s,5) )
             {
                 key_dst = str.s + 5;
-                explicit_info = 1;
+                explicit_dst_info = 1;
             }
             else
                 key_dst = str.s;
@@ -2147,7 +2343,7 @@ static void init_columns(args_t *args)
                 if ( !strncasecmp("INFO/",key_src,5) )
                 {
                     key_src += 5;
-                    explicit_info = 1;
+                    explicit_src_info = 1;
                 }
                 else if ( !strncasecmp("FMT/",key_src,4) || !strncasecmp("FORMAT/",key_src,5) )
                 {
@@ -2157,38 +2353,65 @@ static void init_columns(args_t *args)
             }
             else
                 key_src = key_dst;
+
+            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
+            annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
+            col->icol = icol;
+            col->replace = replace;
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
+
             int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 if ( args->tgts_is_vcf ) // reading annotations from a VCF, add a new header line
                 {
-                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
-                    if ( !hrec )
+                    if ( !strcasecmp("ID",key_src) && !explicit_src_info )
                     {
-                        if ( !explicit_info && bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL) )
-                            error("Did you mean \"FMT/%s\" rather than \"%s\"?\n",str.s,str.s);
-                    fprintf(stderr,"[%s] %d\n",key_src,explicit_info);
-                        error("The tag \"%s\" is not defined in %s\n", key_src,args->files->readers[1].fname);
+                        // transferring ID column into a new INFO tag
+                        tmp.l = 0;
+                        ksprintf(&tmp,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Transferred ID column\">",key_dst);
+                    }
+                    else if ( !strcasecmp("FILTER",key_src) && !explicit_src_info )
+                    {
+                        // transferring FILTER column into a new INFO tag
+                        tmp.l = 0;
+                        ksprintf(&tmp,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Transferred FILTER column\">",key_dst);
+                    }
+                    else
+                    {
+                        bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
+                        if ( !hrec )
+                        {
+                            if ( explicit_dst_info+explicit_src_info==0 && bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL) )
+                                error("Did you mean \"FMT/%s\" rather than \"%s\"?\n",str.s,str.s);
+                            char *ptr = strchr(key_src,'=');
+                            if ( ptr )
+                            {
+                                *ptr = 0; tmp.l = 0; ksprintf(&tmp,"%s:=%s",key_src,ptr+1); *ptr = '=';
+                                error("The tag \"%s\" is not defined, is this what you want \"%s\" ?\n",key_src,tmp.s);
+                            }
+                            error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", key_src,args->files->readers[1].fname);
+                        }
+                        tmp.l = 0;
+                        bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     }
-                    tmp.l = 0;
-                    bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
                     if (bcf_hdr_sync(args->hdr_out) < 0)
                         error_errno("[%s] Failed to update header", __func__);
                     hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
                 }
                 else
-                    error("The tag \"%s\" is not defined in %s\n", key_src, args->targets_fname);
+                    error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", key_src, args->targets_fname);
                 assert( bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) );
             }
-
-            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
-            annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = icol;
-            col->replace = replace;
-            col->hdr_key_src = strdup(key_src);
-            col->hdr_key_dst = strdup(key_dst);
-            col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
+            if  ( args->tgts_is_vcf )
+            {
+                if ( !strcasecmp("ID",key_src) && !explicit_src_info ) col->getter = vcf_getter_id2str;
+                else if ( !strcasecmp("FILTER",key_src) && !explicit_src_info ) col->getter = vcf_getter_filter2str;
+            }
+            col->number = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
             switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 case BCF_HT_FLAG:   col->setter = args->tgts_is_vcf ? vcf_setter_info_flag : setter_info_flag; break;
@@ -2197,6 +2420,18 @@ static void init_columns(args_t *args)
                 case BCF_HT_STR:    col->setter = args->tgts_is_vcf ? vcf_setter_info_str  : setter_info_str; break;
                 default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id));
             }
+            if ( replace==SET_OR_APPEND )   // change to Number=.
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->hdr_out, BCF_HL_INFO, "ID", key_dst, NULL);
+                if ( !hrec ) error("Uh, could not find the new tag \"%s\" in the header\n", key_dst);
+                hrec = bcf_hrec_dup(hrec);
+                int j = bcf_hrec_find_key(hrec, "Number");
+                if ( j<0 ) error("Uh, could not find the entry Number in the header record of %s\n",key_dst);
+                free(hrec->vals[j]);
+                hrec->vals[j] = strdup(".");
+                bcf_hdr_remove(args->hdr_out,BCF_HL_INFO, key_dst);
+                bcf_hdr_add_hrec(args->hdr_out, hrec);
+            }
         }
         if ( !*se ) break;
         ss = ++se;
@@ -2232,10 +2467,10 @@ static void init_merge_method(args_t *args)
         args->cols[i].mm_dbl_nalloc = args->cols[i].mm_dbl_nused = args->cols[i].mm_dbl_ndat = 0;
         memset(&args->cols[i].mm_kstr, 0, sizeof(args->cols[i].mm_kstr));
     }
-    if ( !args->merge_method_str ) return;
+    if ( !args->merge_method_str.l ) return;
     if ( args->tgts_is_vcf ) error("Error: the --merge-logic is intended for use with BED or TAB-delimited files only.\n");
-    if ( !args->tgt_idx ) error("Error: BEG,END (or FROM,TO) columns are expected with the --merge-logic option.\n");
-    char *sb = args->merge_method_str;
+    if ( !args->tgt_idx && !args->tgts ) error("Error: BEG,END (or FROM,TO) columns or REF,ALT columns are expected with the --merge-logic option.\n");
+    char *sb = args->merge_method_str.s;
     while ( *sb )
     {
         char *se = sb;
@@ -2246,21 +2481,27 @@ static void init_merge_method(args_t *args)
         char *mm_type_str = args->tmpks.s + args->tmpks.l;
         while ( *mm_type_str!=':' && mm_type_str > args->tmpks.s ) mm_type_str--;
         if ( *mm_type_str!=':' )
-            error("Error: could not parse the argument to --merge-logic: %s\n", args->merge_method_str);
+            error("Error: could not parse the argument to --merge-logic: %s\n", args->merge_method_str.s);
         *mm_type_str = 0;
         mm_type_str++;
         int mm_type = MM_FIRST;
         if ( !strcasecmp("unique",mm_type_str) ) mm_type = MM_UNIQUE;
+        else if ( !strcasecmp("first",mm_type_str) ) mm_type = MM_FIRST;
         else if ( !strcasecmp("append",mm_type_str) ) mm_type = MM_APPEND;
+        else if ( !strcasecmp("append-missing",mm_type_str) )
+        {
+            mm_type = MM_APPEND_MISSING;
+            if ( args->ref_idx!=-1 ) args->has_append_mode = 1;
+        }
         else if ( !strcasecmp("sum",mm_type_str) ) mm_type = MM_SUM;
         else if ( !strcasecmp("avg",mm_type_str) ) mm_type = MM_AVG;
         else if ( !strcasecmp("min",mm_type_str) ) mm_type = MM_MIN;
         else if ( !strcasecmp("max",mm_type_str) ) mm_type = MM_MAX;
-        else error("Error: could not parse --merge-logic %s, the logic \"%s\" is not recognised\n", args->merge_method_str,mm_type_str);
+        else error("Error: could not parse --merge-logic %s, the logic \"%s\" is not recognised\n", args->merge_method_str.s,mm_type_str);
         for (i=0; i<args->ncols; i++)
         {
             if ( strcmp(args->cols[i].hdr_key_dst,args->tmpks.s) ) continue;
-            if ( mm_type==MM_APPEND && args->cols[i].number!=BCF_VL_VAR )
+            if ( (mm_type==MM_APPEND || mm_type==MM_APPEND_MISSING) && args->cols[i].number!=BCF_VL_VAR )
                 error("Error: --merge-logic append can be requested only for tags of variable length (Number=.)\n");
             args->cols[i].merge_method = mm_type;
             break;
@@ -2268,6 +2509,20 @@ static void init_merge_method(args_t *args)
         if ( i==args->ncols ) error("No such tag in the destination file: %s\n", args->tmpks.s);
         sb = *se ? se + 1 : se;
     }
+    if ( args->has_append_mode )
+    {
+        // create a missing line to insert missing values when VCF ALT finds no match in the annotation file
+        args->aline_missing = (annot_line_t*)calloc(1,sizeof(*args->aline_missing));
+        int ncol = 0;
+        for (i=0; i<args->ncols; i++)
+            if ( ncol < args->cols[i].icol + 1 ) ncol = args->cols[i].icol + 1;
+        if ( ncol < args->ref_idx + 1 ) ncol = args->ref_idx + 1;
+        args->aline_missing->mcols = ncol;
+        args->aline_missing->ncols = ncol;
+        args->aline_missing->cols = (char**) malloc(ncol*sizeof(char*));
+        for (i=0; i<ncol; i++)
+            args->aline_missing->cols[i] = strdup(".");
+    }
 }
 
 static void rename_chrs(args_t *args, char *fname)
@@ -2299,6 +2554,42 @@ static void rename_chrs(args_t *args, char *fname)
     free(map);
 }
 
+static void rename_annots(args_t *args, char *fname)
+{
+    int n, i;
+    char **map = hts_readlist(fname, 1, &n);
+    if ( !map ) error("Could not read: %s\n", fname);
+    for (i=0; i<n; i++)
+    {
+        char *sb = NULL, *ss = map[i];
+        while ( *ss && !isspace(*ss) ) ss++;
+        if ( !*ss ) error("Could not parse: %s\n", fname);
+        *ss = 0;
+        int type;
+        if ( !strncasecmp("info/",map[i],5) ) type = BCF_HL_INFO, sb = map[i] + 5;
+        else if ( !strncasecmp("format/",map[i],7) ) type = BCF_HL_FMT, sb = map[i] + 7;
+        else if ( !strncasecmp("fmt/",map[i],4) ) type = BCF_HL_FMT, sb = map[i] + 4;
+        else if ( !strncasecmp("filter/",map[i],7) ) type = BCF_HL_FLT, sb = map[i] + 7;
+        else error("Could not parse \"%s\", expected INFO, FORMAT, or FILTER prefix for each line: %s\n",map[i],fname);
+        int id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, sb);
+        if ( id<0 ) continue;
+        bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->hdr_out, type, "ID", sb, NULL);
+        if ( !hrec ) continue;  // the sequence not present
+        int j = bcf_hrec_find_key(hrec, "ID");
+        assert( j>=0 );
+        free(hrec->vals[j]);
+        ss++;
+        while ( *ss && isspace(*ss) ) ss++;
+        char *se = ss;
+        while ( *se && !isspace(*se) ) se++;
+        *se = 0;
+        hrec->vals[j] = strdup(ss);
+        args->hdr_out->id[BCF_DT_ID][id].key = hrec->vals[j];
+    }
+    for (i=0; i<n; i++) free(map[i]);
+    free(map);
+}
+
 static void init_data(args_t *args)
 {
     args->hdr = args->files->readers[0].header;
@@ -2311,6 +2602,7 @@ static void init_data(args_t *args)
         // reading annots from a VCF
         if ( !bcf_sr_add_reader(args->files, args->targets_fname) )
             error("Failed to open %s: %s\n", args->targets_fname,bcf_sr_strerror(args->files->errnum));
+        args->tgts_hdr = args->files->readers[1].header;
     }
     if ( args->columns ) init_columns(args);
     if ( args->targets_fname && !args->tgts_is_vcf )
@@ -2318,8 +2610,8 @@ static void init_data(args_t *args)
         if ( !args->columns ) error("The -c option not given\n");
         if ( args->chr_idx==-1 ) error("The -c CHROM option not given\n");
         if ( args->beg_idx==-1 ) error("The -c POS option not given\n");
-        if ( args->single_overlaps && args->merge_method_str ) error("The options --merge-logic and --single-overlaps cannot be combined\n");
-        if ( args->end_idx==-1 || (args->single_overlaps && !args->merge_method_str) )
+        if ( args->single_overlaps && args->merge_method_str.l ) error("The options --merge-logic and --single-overlaps cannot be combined\n");
+        if ( args->end_idx==-1 || (args->single_overlaps && !args->merge_method_str.l) )
         {
             args->end_idx = -args->beg_idx - 1;
             args->tgts = bcf_sr_regions_init(args->targets_fname,1,args->chr_idx,args->beg_idx,args->end_idx);
@@ -2363,8 +2655,9 @@ static void init_data(args_t *args)
     if ( !args->drop_header )
     {
         if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
+        if ( args->rename_annots ) rename_annots(args, args->rename_annots);
 
-        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("[%s] Error: cannot write to \"%s\": %s\n", __func__,args->output_fname, strerror(errno));
         if ( args->n_threads )
             hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p);
@@ -2386,8 +2679,15 @@ static void destroy_data(args_t *args)
         free(args->cols[i].mm_kstr.s);
         if ( args->cols[i].mm_str_hash ) khash_str2int_destroy_free(args->cols[i].mm_str_hash);
         free(args->cols[i].mm_dbl);
+        free(args->cols[i].ptr);
     }
     free(args->cols);
+    if ( args->aline_missing )
+    {
+        for (i=0; i<args->aline_missing->ncols; i++) free(args->aline_missing->cols[i]);
+        free(args->aline_missing->cols);
+        free(args->aline_missing);
+    }
     for (i=0; i<args->malines; i++)
     {
         free(args->alines[i].cols);
@@ -2395,6 +2695,7 @@ static void destroy_data(args_t *args)
         free(args->alines[i].line.s);
     }
     free(args->alines);
+    free(args->srt_alines);
     if ( args->tgt_idx )
     {
         regidx_destroy(args->tgt_idx);
@@ -2420,6 +2721,7 @@ static void destroy_data(args_t *args)
         filter_destroy(args->filter);
     if (args->out_fh) hts_close(args->out_fh);
     free(args->sample_map);
+    free(args->merge_method_str.s);
 }
 
 static void parse_annot_line(args_t *args, char *str, annot_line_t *tmp)
@@ -2483,7 +2785,6 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
         }
         else i++;
     }
-
     if ( args->ref_idx==-1 && args->nalines ) return;
 
     while ( !bcf_sr_regions_overlap(args->tgts, bcf_seqname(args->hdr,line), start_pos,end_pos) )
@@ -2504,6 +2805,36 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
     }
 }
 
+// search string in semicolon separated strings (xx vs aa;bb)
+static int str_match(char *needle, char *haystack)
+{
+    int len = strlen(needle);
+    char *ptr = haystack;
+    while ( *ptr && (ptr=strstr(ptr,needle)) )
+    {
+        if ( ptr[len]!=0 && ptr[len]!=';' ) ptr++;          // a prefix, not a match
+        else if ( ptr==haystack || ptr[-1]==';' ) return 1; // a match
+        ptr++;  // a suffix, not a match
+    }
+    return 0;
+}
+// search common string in semicolon separated strings (xx;yy;zz vs aa;bb)
+static int strstr_match(char *a, char *b)
+{
+    char *beg = a;
+    while ( *beg )
+    {
+        char *end = beg;
+        while ( *end && *end!=';' ) end++;
+        char tmp = *end;
+        if ( *end==';' ) *end = 0;
+        int ret = str_match(beg,b);
+        *end = tmp;
+        if ( ret || !*end ) return ret;
+        beg = end + 1;
+    }
+    return 0;
+}
 static void annotate(args_t *args, bcf1_t *line)
 {
     int i, j;
@@ -2511,9 +2842,9 @@ static void annotate(args_t *args, bcf1_t *line)
         args->rm[i].handler(args, line, &args->rm[i]);
 
     int has_overlap = 0;
-
     if ( args->tgt_idx )
     {
+        for (j=0; j<args->ncols; j++) args->cols[j].done = 0;
         if ( regidx_overlap(args->tgt_idx, bcf_seqname(args->hdr,line),line->pos,line->pos+line->rlen-1, args->tgt_itr) )
         {
             while ( regitr_overlap(args->tgt_itr) )
@@ -2524,49 +2855,145 @@ static void annotate(args_t *args, bcf1_t *line)
                 tmp->end   = args->tgt_itr->end;
                 parse_annot_line(args, regitr_payload(args->tgt_itr,char*), tmp);
                 for (j=0; j<args->ncols; j++)
-                    if ( args->cols[j].setter(args,line,&args->cols[j],tmp) )
+                {
+                    if ( args->cols[j].done==1 ) continue;
+                    int ret = args->cols[j].setter(args,line,&args->cols[j],tmp);
+                    if ( ret < 0 )
                         error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                    if ( ret==0 )
+                        args->cols[j].done = 1;
+                }
             }
             has_overlap = 1;
         }
         for (j=0; j<args->ncols; j++)
-            if ( args->cols[j].merge_method != MM_FIRST )
-                args->cols[j].setter(args,line,&args->cols[j],NULL);
+        {
+            if ( args->cols[j].done==1 || args->cols[j].merge_method == MM_FIRST ) continue;
+            if ( args->cols[j].setter(args,line,&args->cols[j],NULL) < 0 )
+                error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+        }
     }
     else if ( args->tgts )
     {
-        // Buffer annotation lines. When multiple ALT alleles are present in the
-        // annotation file, at least one must match one of the VCF alleles.
-        int len = 0;
-        bcf_get_variant_types(line);
-        for (i=1; i<line->n_allele; i++)
-            if ( len > line->d.var[i].n ) len = line->d.var[i].n;
-        int end_pos = len<0 ? line->pos - len : line->pos;
+        // Buffer annotation lines. When multiple ALT alleles are present in the annotation file, at least one
+        // must match some of the VCF alleles. If the append-missing mode is set (and REF+ALT is requested), the
+        // buffered lines will annotate the VCF respecting the order in ALT and when no matching line is found
+        // for an ALT, missing value is appended instead.
+        int end_pos = line->pos + line->rlen - 1;
         buffer_annot_lines(args, line, line->pos, end_pos);
+
+        args->nsrt_alines = 0;
+        hts_expand(uint32_t,args->nalines,args->msrt_alines,args->srt_alines);
+        if ( args->nalines >= 0xffff || line->n_allele >= 0xffff )
+            error("Error: too many alleles or annotation lines in the buffer at %s:%"PRId64" (todo:skip?)\n",bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+
+        // Find matching lines
         for (i=0; i<args->nalines; i++)
         {
             if ( line->pos > args->alines[i].end || end_pos < args->alines[i].start ) continue;
-            if ( args->ref_idx != -1 )
+            if ( args->ref_idx != -1 )  // REF+ALT matching requested
             {
-                if ( vcmp_set_ref(args->vcmp, line->d.allele[0], args->alines[i].als[0]) < 0 ) continue;   // refs not compatible
+                if ( line->pos!=args->alines[i].start || vcmp_set_ref(args->vcmp, line->d.allele[0], args->alines[i].als[0]) < 0 ) continue;   // refs are not compatible
                 for (j=1; j<args->alines[i].nals; j++)
                 {
-                    if ( line->n_allele==1 && args->alines[i].als[j][0]=='.' && args->alines[i].als[j][1]==0 ) break;   // no ALT allele in VCF and annot file has "."
-                    if ( vcmp_find_allele(args->vcmp, line->d.allele+1, line->n_allele - 1, args->alines[i].als[j]) >= 0 ) break;
+                    int ialt;
+                    if ( line->n_allele==1 && args->alines[i].als[j][0]=='.' && args->alines[i].als[j][1]==0 )  // match: no ALT allele in VCF and annot file has "."
+                        ialt = 0;
+                    else
+                    {
+                        ialt = vcmp_find_allele(args->vcmp, line->d.allele+1, line->n_allele - 1, args->alines[i].als[j]);
+                        if ( ialt < 0 ) continue;
+                        ialt++;
+                    }
+                    if ( args->match_id>=0 && !strstr_match(line->d.id,args->alines[i].cols[args->match_id]) ) continue;
+                    args->srt_alines[args->nsrt_alines++] = (ialt<<16) | i;
+                    has_overlap = 1;
+                    break;
                 }
-                if ( j==args->alines[i].nals ) continue;    // none of the annot alleles present in VCF's ALT
             }
-            break;
+            else    // overlap, REF+ALT matching not requested
+            {
+                args->srt_alines[args->nsrt_alines++] = (0xffff<<16) | i;
+                has_overlap = 1;
+            }
         }
-
-        if ( i<args->nalines )
+        // Sort lines if needed
+        if ( args->has_append_mode )
+        {
+            // insertion sort by VCF ALT index (top bits) and alines index (low bits)
+            uint32_t tmp;
+            for (i=1; i<args->nsrt_alines; i++)
+                for (j=i; j>0 && args->srt_alines[j] < args->srt_alines[j-1]; j--)
+                    tmp = args->srt_alines[j], args->srt_alines[j] = args->srt_alines[j-1], args->srt_alines[j-1] = tmp;
+        }
+        // Annotate
+        for (j=0; j<args->ncols; j++) args->cols[j].done = 0;
+        int ialt_exp = 1;
+        for (i=0; i<args->nsrt_alines; i++)
         {
-            // there is a matching line
+            int ialt = args->srt_alines[i] >> 16;
+            int ilin = args->srt_alines[i] & 0xffff;
+            if ( args->has_append_mode )
+            {
+                if ( ialt_exp > ialt ) continue;    // multiple annotation lines for the same position
+                if ( ialt_exp < ialt )
+                {
+                    // REF+ALT matching requested, append-missing mode: insert "." if no annotation line was found for the ALT
+                    while ( ialt_exp++ < ialt )
+                    {
+                        for (j=0; j<args->ncols; j++)
+                        {
+                            if ( args->cols[j].merge_method != MM_APPEND_MISSING ) continue;
+                            if ( args->cols[j].done==1 ) continue;
+                            int ret = args->cols[j].setter(args,line,&args->cols[j],args->aline_missing);
+                            if ( ret < 0 )
+                                error("fixme: Could not set missing %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                            if ( ret==0 )
+                                args->cols[j].done = 1;
+                        }
+                    }
+                }
+            }
             for (j=0; j<args->ncols; j++)
-                if ( args->cols[j].setter(args,line,&args->cols[j],&args->alines[i]) )
+            {
+                if ( args->cols[j].done==1 ) continue;
+                int ret = args->cols[j].setter(args,line,&args->cols[j],&args->alines[ilin]);
+                if ( ret < 0 )
                     error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                if ( ret==0 )
+                    args->cols[j].done = 1;
+            }
+            ialt_exp = ialt + 1;
+        }
+        if ( args->nsrt_alines )
+        {
+            // In the append-missing mode fill missing values to all trailing ALTs, but only if at least one
+            // record was found. Otherwise leave the row will be left without annotation.
+            if ( args->has_append_mode && ialt_exp < line->n_allele )
+            {
+                while ( ialt_exp++ < line->n_allele )
+                {
+                    for (j=0; j<args->ncols; j++)
+                    {
+                        if ( args->cols[j].merge_method != MM_APPEND_MISSING ) continue;
+                        if ( args->cols[j].done==1 ) continue;
+                        int ret = args->cols[j].setter(args,line,&args->cols[j],args->aline_missing);
+                        if ( ret < 0 )
+                            error("fixme: Could not set missing %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                        if ( ret==0 )
+                            args->cols[j].done = 1;
+                    }
+                }
+            }
+            // Flush
+            for (j=0; j<args->ncols; j++)
+            {
+                if ( args->cols[j].done==1 || args->cols[j].merge_method == MM_FIRST ) continue;
+                int ret = args->cols[j].setter(args,line,&args->cols[j],NULL);
+                if ( ret < 0 )
+                    error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+            }
         }
-        has_overlap = i<args->nalines ? 1 : 0;
     }
     else if ( args->files->nreaders == 2 )
     {
@@ -2611,28 +3038,30 @@ static void usage(args_t *args)
     fprintf(stderr, "Usage:   bcftools annotate [options] <in.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
-    fprintf(stderr, "       --collapse <string>        matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
-    fprintf(stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
-    fprintf(stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "       --force                    continue despite parsing error (at your own risk!)\n");
-    fprintf(stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
-    fprintf(stderr, "   -I, --set-id [+]<format>       set ID column, see man page for details\n");
-    fprintf(stderr, "   -i, --include <expr>           select sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "   -k, --keep-sites               leave -i/-e sites unchanged instead of discarding them\n");
-    fprintf(stderr, "   -l, --merge-logic <tag:type>   merge logic for multiple overlapping regions (see man page for details), EXPERIMENTAL\n");
-    fprintf(stderr, "   -m, --mark-sites [+-]<tag>     add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
-    fprintf(stderr, "       --no-version               do not append version and command line to the header\n");
-    fprintf(stderr, "   -o, --output <file>            write output to a file [standard output]\n");
-    fprintf(stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(stderr, "   -r, --regions <region>         restrict to comma-separated list of regions\n");
-    fprintf(stderr, "   -R, --regions-file <file>      restrict to regions listed in a file\n");
-    fprintf(stderr, "       --rename-chrs <file>       rename sequences according to map file: from\\tto\n");
-    fprintf(stderr, "   -s, --samples [^]<list>        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(stderr, "   -S, --samples-file [^]<file>   file of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(stderr, "       --single-overlaps          keep memory low by avoiding complexities arising from handling multiple overlapping intervals\n");
-    fprintf(stderr, "   -x, --remove <list>            list of annotations (e.g. ID,INFO/DP,FORMAT/DP,FILTER) to remove (or keep with \"^\" prefix). See man page for details\n");
-    fprintf(stderr, "       --threads <int>            number of extra output compression threads [0]\n");
+    fprintf(stderr, "   -a, --annotations FILE       VCF file or tabix-indexed FILE with annotations: CHR\\tPOS[\\tVALUE]+\n");
+    fprintf(stderr, "       --collapse STR           matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
+    fprintf(stderr, "   -c, --columns LIST           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
+    fprintf(stderr, "   -C, --columns-file FILE      read -c columns from FILE, one name per row, with optional --merge-logic TYPE: NAME[ TYPE]\n");
+    fprintf(stderr, "   -e, --exclude EXPR           exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "       --force                  continue despite parsing error (at your own risk!)\n");
+    fprintf(stderr, "   -h, --header-lines FILE      lines which should be appended to the VCF header\n");
+    fprintf(stderr, "   -I, --set-id [+]FORMAT       set ID column using a `bcftools query`-like expression, see man page for details\n");
+    fprintf(stderr, "   -i, --include EXPR           select sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "   -k, --keep-sites             leave -i/-e sites unchanged instead of discarding them\n");
+    fprintf(stderr, "   -l, --merge-logic TAG:TYPE   merge logic for multiple overlapping regions (see man page for details), EXPERIMENTAL\n");
+    fprintf(stderr, "   -m, --mark-sites [+-]TAG     add INFO/TAG flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+    fprintf(stderr, "       --no-version             do not append version and command line to the header\n");
+    fprintf(stderr, "   -o, --output FILE            write output to a file [standard output]\n");
+    fprintf(stderr, "   -O, --output-type [b|u|z|v]  b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(stderr, "   -r, --regions REGION         restrict to comma-separated list of regions\n");
+    fprintf(stderr, "   -R, --regions-file FILE      restrict to regions listed in FILE\n");
+    fprintf(stderr, "       --rename-annots FILE     rename annotations: TYPE/old\\tnew, where TYPE is one of FILTER,INFO,FORMAT\n");
+    fprintf(stderr, "       --rename-chrs FILE       rename sequences according to the mapping: old\\tnew\n");
+    fprintf(stderr, "   -s, --samples [^]LIST        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(stderr, "   -S, --samples-file [^]FILE   file of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(stderr, "       --single-overlaps        keep memory low by avoiding complexities arising from handling multiple overlapping intervals\n");
+    fprintf(stderr, "   -x, --remove LIST            list of annotations (e.g. ID,INFO/DP,FORMAT/DP,FILTER) to remove (or keep with \"^\" prefix). See man page for details\n");
+    fprintf(stderr, "       --threads INT            number of extra output compression threads [0]\n");
     fprintf(stderr, "\n");
     exit(1);
 }
@@ -2649,6 +3078,7 @@ int main_vcfannotate(int argc, char *argv[])
     args->record_cmd_line = 1;
     args->ref_idx = args->alt_idx = args->chr_idx = args->beg_idx = args->end_idx = -1;
     args->set_ids_replace = 1;
+    args->match_id = -1;
     int regions_is_file = 0, collapse = 0;
 
     static struct option loptions[] =
@@ -2667,7 +3097,9 @@ int main_vcfannotate(int argc, char *argv[])
         {"regions",required_argument,NULL,'r'},
         {"regions-file",required_argument,NULL,'R'},
         {"remove",required_argument,NULL,'x'},
+        {"columns-file",required_argument,NULL,'C'},
         {"columns",required_argument,NULL,'c'},
+        {"rename-annots",required_argument,NULL,11},
         {"rename-chrs",required_argument,NULL,1},
         {"header-lines",required_argument,NULL,'h'},
         {"samples",required_argument,NULL,'s'},
@@ -2677,7 +3109,7 @@ int main_vcfannotate(int argc, char *argv[])
         {"force",no_argument,NULL,'f'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:kl:f",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:C:i:e:S:s:I:m:kl:f",loptions,NULL)) >= 0)
     {
         switch (c) {
             case 'f': args->force = 1; break;
@@ -2688,11 +3120,15 @@ int main_vcfannotate(int argc, char *argv[])
                 else if ( optarg[0]=='-' ) { args->mark_sites = optarg+1; args->mark_sites_logic = MARK_UNLISTED; }
                 else args->mark_sites = optarg; 
                 break;
-            case 'l': args->merge_method_str = optarg; break;
+            case 'l': 
+                if ( args->merge_method_str.l ) kputc(',',&args->merge_method_str);
+                kputs(optarg,&args->merge_method_str);
+                break;
             case 'I': args->set_ids_fmt = optarg; break;
             case 's': args->sample_names = optarg; break;
             case 'S': args->sample_names = optarg; args->sample_is_file = 1; break;
             case 'c': args->columns = strdup(optarg); break;
+            case 'C': args->columns = strdup(optarg); args->columns_is_file = 1; break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
                 switch (optarg[0]) {
@@ -2703,8 +3139,12 @@ int main_vcfannotate(int argc, char *argv[])
                     default: error("The output type \"%s\" not recognised\n", optarg);
                 };
                 break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'x': args->remove_annots = optarg; break;
             case 'a': args->targets_fname = optarg; break;
             case 'r': args->regions_list = optarg; break;
@@ -2724,6 +3164,7 @@ int main_vcfannotate(int argc, char *argv[])
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case 10 : args->single_overlaps = 1; break;
+            case 11 : args->rename_annots = optarg; break;
             case '?': usage(args); break;
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcfannotate.c.pysam.c b/bcftools/vcfannotate.c.pysam.c
index e9d31bf..b7e707b 100644
--- a/bcftools/vcfannotate.c.pysam.c
+++ b/bcftools/vcfannotate.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfannotate.c -- Annotate and edit VCF/BCF files.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -28,6 +28,7 @@ THE SOFTWARE.  */
 #include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -72,6 +73,7 @@ annot_line_t;
 #define REPLACE_ALL      1      // replace both missing and existing values
 #define REPLACE_NON_MISSING 2   // replace only if tgt is not missing
 #define SET_OR_APPEND    3      // set new value if missing or non-existent, append otherwise
+#define MATCH_VALUE      4      // do not set, just match the value -c ~ID
 #define MM_FIRST   0    // if multiple annotation lines overlap a VCF record, use the first, discarding the rest
 #define MM_APPEND  1    // append, possibly multiple times
 #define MM_UNIQUE  2    // append, only unique values
@@ -79,19 +81,26 @@ annot_line_t;
 #define MM_AVG     4
 #define MM_MIN     5
 #define MM_MAX     6
+#define MM_APPEND_MISSING 7     // missing values will be transferred as well
 typedef struct _annot_col_t
 {
     int icol, replace, number;  // number: one of BCF_VL_* types
     char *hdr_key_src, *hdr_key_dst;
-    int (*setter)(struct _args_t *, bcf1_t *, struct _annot_col_t *, void*);
+    // The setters return 0 on successful update of the bcf record, negative value (bcf_update_* return status) on errors,
+    // or 1 on (repeated partial updates) concluded with a src=NULL call
+    int (*setter)(struct _args_t *, bcf1_t *dst, struct _annot_col_t *, void *src); // the last is the annotation line, either src bcf1_t or annot_line_t
+    int (*getter)(struct _args_t *, bcf1_t *src, struct _annot_col_t *, void **ptr, int *mptr);
     int merge_method;               // one of the MM_* defines
     khash_t(str2int) *mm_str_hash;  // lookup table to ensure uniqueness of added string values
     kstring_t mm_kstr;
-    double
+    size_t
         mm_dbl_nalloc,  // the allocated size --merge-logic values array
         mm_dbl_nused,   // the number of used elements in the mm_dbl array
-        mm_dbl_ndat,    // the number of merged rows (for calculating the average)
+        mm_dbl_ndat;    // the number of merged rows (for calculating the average)
+    double
         *mm_dbl;
+    void *ptr;
+    int mptr, done;
 }
 annot_col_t;
 
@@ -105,12 +114,12 @@ annot_col_t;
 typedef struct _args_t
 {
     bcf_srs_t *files;
-    bcf_hdr_t *hdr, *hdr_out;
+    bcf_hdr_t *hdr, *hdr_out, *tgts_hdr;
     htsFile *out_fh;
     int output_type, n_threads;
     bcf_sr_regions_t *tgts;
 
-    regidx_t *tgt_idx;
+    regidx_t *tgt_idx;  // keep everything in memory only with .tab annotation file and -c BEG,END columns
     regitr_t *tgt_itr;
     int tgt_is_bed;
 
@@ -125,10 +134,13 @@ typedef struct _args_t
 
     vcmp_t *vcmp;           // for matching annotation and VCF lines by allele
     annot_line_t *alines;   // buffered annotation lines
-    int nalines, malines;
+    annot_line_t *aline_missing;
+    uint32_t *srt_alines;   // sorted indexes (iALT<<16 || iAline)
+    int nalines, malines, nsrt_alines, msrt_alines;
     int ref_idx, alt_idx, chr_idx, beg_idx, end_idx;   // -1 if not present
     annot_col_t *cols;      // column indexes and setters
     int ncols;
+    int match_id;           // set iff `-c ~ID` given
 
     char *set_ids_fmt;
     convert_t *set_ids;
@@ -146,9 +158,10 @@ typedef struct _args_t
     kstring_t tmpks;
 
     char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
-    char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
-    char *merge_method_str;
+    char *remove_annots, *columns, *rename_chrs, *rename_annots, *sample_names, *mark_sites;
+    kstring_t merge_method_str;
     int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic, force, single_overlaps;
+    int columns_is_file, has_append_mode;
 }
 args_t;
 
@@ -197,6 +210,8 @@ void remove_info(args_t *args, bcf1_t *line, rm_tag_t *tag)
     for (i=0; i<line->n_info; i++)
     {
         bcf_info_t *inf = &line->d.info[i];
+        if (  !strcmp("END",bcf_hdr_int2id(args->hdr,BCF_DT_ID,inf->key)) )
+            line->rlen = line->n_allele ? strlen(line->d.allele[0]) : 0;
         if ( inf->vptr_free )
         {
             free(inf->vptr - inf->vptr_off);
@@ -376,6 +391,10 @@ static void init_remove_annots(args_t *args)
         }
         else if ( str.l )
         {
+            int id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, str.s);
+            if ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,id) ) error("Error: did you mean INFO/%s?\n",str.s);
+            if ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FMT,id) ) error("Error: did you mean FORMAT/%s?\n",str.s);
+
             if ( !args->keep_sites )
             {
                 if ( str.s[0]=='#' && str.s[1]=='#' )
@@ -443,6 +462,42 @@ static void init_header_lines(args_t *args)
     if (bcf_hdr_sync(args->hdr) < 0)
         error_errno("[%s] Failed to update input header", __func__);
 }
+static int vcf_getter_info_str2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    return bcf_get_info_string(args->tgts_hdr,rec,col->hdr_key_src,ptr,mptr); 
+}
+static int vcf_getter_id2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    char *str = *((char**)ptr);
+    int len = strlen(rec->d.id);
+    if ( len >= *mptr ) str = realloc(str, len+1);
+    strcpy(str, rec->d.id);
+    *((char**)ptr) = str;
+    *mptr = len+1;
+    return len;
+}
+static int vcf_getter_filter2str(args_t *args, bcf1_t *rec, annot_col_t *col, void **ptr, int *mptr)
+{
+    kstring_t str;
+    str.s = *((char**)ptr);
+    str.m = *mptr;
+    str.l = 0;
+
+    int i;
+    if ( rec->d.n_flt )
+    {
+        for (i=0; i<rec->d.n_flt; i++)
+        {
+            if (i) kputc(';', &str);
+            kputs(bcf_hdr_int2id(args->tgts_hdr,BCF_DT_ID,rec->d.flt[i]), &str);
+        }
+    }
+    else kputc('.', &str);
+
+    *((char**)ptr) = str.s;
+    *mptr = str.m;
+    return str.l;
+}
 static int setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     if ( !data ) error("Error: the --merge-logic option cannot be used with FILTER (yet?)\n");
@@ -452,24 +507,24 @@ static int setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *dat
     if ( tab->cols[col->icol] && tab->cols[col->icol][0]=='.' && !tab->cols[col->icol][1] ) return 0; // don't replace with "."
     hts_expand(int,1,args->mtmpi,args->tmpi);
     args->tmpi[0] = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, tab->cols[col->icol]);
-    if ( args->tmpi[0]<0 ) error("The FILTER is not defined in the header: %s\n", tab->cols[col->icol]);
-    if ( col->replace==SET_OR_APPEND ) { bcf_add_filter(args->hdr_out,line,args->tmpi[0]); return 0; }
+    if ( args->tmpi[0]<0 ) error("The FILTER \"%s\" is not defined in the header, was the -h option provided?\n", tab->cols[col->icol]);
+    if ( col->replace==SET_OR_APPEND ) return bcf_add_filter(args->hdr_out,line,args->tmpi[0]);
     if ( col->replace!=REPLACE_MISSING )
     {
         bcf_update_filter(args->hdr_out,line,NULL,0);
-        bcf_update_filter(args->hdr_out,line,args->tmpi,1); 
-        return 0; 
+        return bcf_update_filter(args->hdr_out,line,args->tmpi,1); 
     }
     
     // only update missing FILTER
     if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT);
     if ( !line->d.n_flt )
-        bcf_update_filter(args->hdr_out,line,args->tmpi,1);
+        return bcf_update_filter(args->hdr_out,line,args->tmpi,1);
+
     return 0;
 }
 static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
-    int i;
+    int i, ret = 0;
     bcf1_t *rec = (bcf1_t*) data;
     if ( !(rec->unpacked & BCF_UN_FLT) ) bcf_unpack(rec, BCF_UN_FLT);
     if ( !(line->unpacked & BCF_UN_FLT) ) bcf_unpack(line, BCF_UN_FLT);
@@ -480,9 +535,9 @@ static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void
         for (i=0; i<rec->d.n_flt; i++)
         {
             const char *flt = bcf_hdr_int2id(args->files->readers[1].header, BCF_DT_ID, rec->d.flt[i]);
-            bcf_add_filter(args->hdr_out,line,bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt));
+            if ( bcf_add_filter(args->hdr_out,line,bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt)) < 0 ) ret = -1;
         }
-        return 0;
+        return ret;
     }
     hts_expand(int,rec->d.n_flt,args->mtmpi,args->tmpi);
     for (i=0; i<rec->d.n_flt; i++)
@@ -491,12 +546,12 @@ static int vcf_setter_filter(args_t *args, bcf1_t *line, annot_col_t *col, void
         args->tmpi[i] = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, flt);
     }
     bcf_update_filter(args->hdr_out,line,NULL,0);
-    bcf_update_filter(args->hdr_out,line,args->tmpi,rec->d.n_flt);
-    return 0;
+    return bcf_update_filter(args->hdr_out,line,args->tmpi,rec->d.n_flt);
 }
 static int setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     if ( !data ) error("Error: the --merge-logic option cannot be used with ID (yet?)\n");
+    if ( col->replace==MATCH_VALUE ) return 0;
 
     // possible cases:
     //      IN  ANNOT   OUT     ACHIEVED_BY
@@ -519,14 +574,28 @@ static int setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 }
 static int vcf_setter_id(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
+    if ( col->replace==MATCH_VALUE ) return 0;
+
     bcf1_t *rec = (bcf1_t*) data;
-    if ( rec->d.id && rec->d.id[0]=='.' && !rec->d.id[1] ) return 0;    // don't replace with "."
-    if ( col->replace==SET_OR_APPEND ) return bcf_add_id(args->hdr_out,line,rec->d.id);
-    if ( col->replace!=REPLACE_MISSING ) return bcf_update_id(args->hdr_out,line,rec->d.id);
+
+    char *id;
+    if ( col->getter )
+    {
+        int nret = col->getter(args,rec,col,&col->ptr,&col->mptr);
+        id = (char*) col->ptr;
+        if ( nret<=0 || (nret==1 && *id=='.') ) return 0;   // don't replace with "."
+    }
+    else
+    {
+        if ( rec->d.id && rec->d.id[0]=='.' && !rec->d.id[1] ) return 0;    // don't replace with "."
+        id = rec->d.id;
+    }
+    if ( col->replace==SET_OR_APPEND ) return bcf_add_id(args->hdr_out,line,id);
+    if ( col->replace!=REPLACE_MISSING ) return bcf_update_id(args->hdr_out,line,id);
 
     // running with +ID, only update missing ids
     if ( !line->d.id || (line->d.id[0]=='.' && !line->d.id[1]) )
-        return bcf_update_id(args->hdr_out,line,rec->d.id);
+        return bcf_update_id(args->hdr_out,line,id);
     return 0;
 }
 static int vcf_setter_ref(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -537,9 +606,9 @@ static int vcf_setter_ref(args_t *args, bcf1_t *line, annot_col_t *col, void *da
     als[0] = rec->d.allele[0];
     int i;
     for (i=1; i<line->n_allele; i++) als[i] = line->d.allele[i];
-    bcf_update_alleles(args->hdr_out, line, als, line->n_allele);
+    int ret = bcf_update_alleles(args->hdr_out, line, als, line->n_allele);
     free(als);
-    return 0;
+    return ret;
 }
 static int vcf_setter_alt(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -553,9 +622,9 @@ static int vcf_setter_alt(args_t *args, bcf1_t *line, annot_col_t *col, void *da
     const char **als = (const char**) malloc(sizeof(char*)*rec->n_allele);
     als[0] = line->d.allele[0];
     for (i=1; i<rec->n_allele; i++) als[i] = rec->d.allele[i];
-    bcf_update_alleles(args->hdr_out, line, als, rec->n_allele);
+    int ret = bcf_update_alleles(args->hdr_out, line, als, rec->n_allele);
     free(als);
-    return 0;
+    return ret;
 }
 static int setter_qual(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -629,34 +698,51 @@ static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpi2[i] = args->tmpi[ map[i] ];
     }
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi2,ndst);
 }
 static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     annot_line_t *tab = (annot_line_t*) data;
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:APPEND for integers
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_APPEND;
+
     if ( !tab )
     {
-        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG && col->merge_method!=MM_MIN && col->merge_method!=MM_MAX && col->merge_method!=MM_APPEND )
-            error("Error: at the moment only the sum,avg,min,max,append options are supported with --merge-logic for INFO type=Integer\n");
+        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG &&
+             col->merge_method!=MM_MIN && col->merge_method!=MM_MAX &&
+             col->merge_method!=MM_APPEND && 
+             col->merge_method!=MM_APPEND_MISSING )
+            error("Error: at the moment only the sum,avg,min,max,append,append-missing options are supported with --merge-logic for INFO type=Integer\n");
     }
 
     int i,ntmpi = 0;
-    if ( tab )
+    if ( tab )  // has data, not flushing yet
     {
         char *str = tab->cols[col->icol], *end = str;
-        if ( str[0]=='.' && str[1]==0 ) return 0;
+        if ( str[0]=='.' && str[1]==0 && col->merge_method!=MM_APPEND_MISSING ) return 1;
 
         while ( *end )
         {
-            int val = strtol(str, &end, 10); 
-            if ( end==str )
-                error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
             ntmpi++;
             hts_expand(int32_t,ntmpi,args->mtmpi,args->tmpi);
-            args->tmpi[ntmpi-1] = val;
-            str = end+1;
+            if ( str[0]=='.' && (str[1]==0 || str[1]==',') )
+            {
+                if ( col->merge_method==MM_APPEND_MISSING )
+                    args->tmpi[ntmpi-1] = bcf_int32_missing;
+                else
+                    ntmpi--;
+                if ( str[1]==0 ) end = str+1;
+                str += 2;
+            }
+            else
+            {
+                args->tmpi[ntmpi-1] = strtol(str, &end, 10); 
+                if ( end==str )
+                    error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
+                str = end+1;
+            }
         }
         if ( col->merge_method!=MM_FIRST )
         {
@@ -669,7 +755,7 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
             }
             else
             {
-                if ( col->merge_method==MM_APPEND )
+                if ( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
                 {
                     int nori = col->mm_dbl_nused;
                     col->mm_dbl_nused += ntmpi;
@@ -689,9 +775,10 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
                 }
             }
             col->mm_dbl_ndat++;
+            return 1;
         }
     }
-    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND )
+    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
     {
         ntmpi = col->mm_dbl_nused;
         hts_expand(int32_t,ntmpi,args->mtmpi,args->tmpi);
@@ -715,8 +802,7 @@ static int setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
 }
 static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -733,8 +819,7 @@ static int vcf_setter_info_int(args_t *args, bcf1_t *line, annot_col_t *col, voi
         if ( ret>0 && args->tmpi2[0]!=bcf_int32_missing ) return 0;
     }
 
-    bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
-    return 0;
+    return bcf_update_info_int32(args->hdr_out,line,col->hdr_key_dst,args->tmpi,ntmpi);
 }
 static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als, int ntmpf)
 {
@@ -765,34 +850,51 @@ static int setter_ARinfo_real(args_t *args, bcf1_t *line, annot_col_t *col, int
 
         args->tmpf2[i] = args->tmpf[ map[i] ];
     }
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,ndst);
 }
 static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     annot_line_t *tab = (annot_line_t*) data;
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:APPEND for floats
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_APPEND;
+
     if ( !tab )
     {
-        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG && col->merge_method!=MM_MIN && col->merge_method!=MM_MAX && col->merge_method!=MM_APPEND )
-            error("Error: at the moment only the sum,avg,min,max,append options are supported with --merge-logic for INFO type=Float\n");
+        if ( col->merge_method!=MM_SUM && col->merge_method!=MM_AVG &&
+             col->merge_method!=MM_MIN && col->merge_method!=MM_MAX &&
+             col->merge_method!=MM_APPEND &&
+             col->merge_method!=MM_APPEND_MISSING )
+            error("Error: at the moment only the sum,avg,min,max,append,append-missing options are supported with --merge-logic for INFO type=Float\n");
     }
 
     int i,ntmpf = 0;
     if ( tab )
     {
         char *str = tab->cols[col->icol], *end = str;
-        if ( str[0]=='.' && str[1]==0 ) return 0;
+        if ( str[0]=='.' && str[1]==0 && col->merge_method!=MM_APPEND_MISSING ) return 1;
 
         while ( *end )
         {
-            double val = strtod(str, &end);
-            if ( end==str )
-                error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
             ntmpf++;
             hts_expand(float,ntmpf,args->mtmpf,args->tmpf);
-            args->tmpf[ntmpf-1] = val;
-            str = end+1;
+            if ( str[0]=='.' && (str[1]==0 || str[1]==',') )
+            {
+                if ( col->merge_method==MM_APPEND_MISSING ) 
+                    bcf_float_set_missing(args->tmpf[ntmpf-1]);
+                else
+                    ntmpf--;
+                if ( str[1]==0 ) end = str+1;
+                str += 2;
+            }
+            else
+            {
+                args->tmpf[ntmpf-1] = strtod(str, &end);
+                if ( end==str )
+                    error("Could not parse %s at %s:%"PRId64" .. [%s]\n", col->hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1,tab->cols[col->icol]);
+                str = end+1;
+            }
         }
         if ( col->merge_method!=MM_FIRST )
         {
@@ -801,17 +903,27 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
                 col->mm_dbl_nused = ntmpf;
                 hts_expand(double,col->mm_dbl_nused,col->mm_dbl_nalloc,col->mm_dbl);
                 for (i=0; i<ntmpf; i++)
-                    col->mm_dbl[i] = args->tmpf[i];
+                {
+                    if ( bcf_float_is_missing(args->tmpf[i]) )
+                        bcf_double_set_missing(col->mm_dbl[i]);
+                    else
+                        col->mm_dbl[i] = args->tmpf[i];
+                }
             }
             else
             {
-                if ( col->merge_method==MM_APPEND )
+                if ( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
                 {
                     int nori = col->mm_dbl_nused;
                     col->mm_dbl_nused += ntmpf;
                     hts_expand(double,col->mm_dbl_nused,col->mm_dbl_nalloc,col->mm_dbl);
                     for (i=0; i<ntmpf; i++)
-                        col->mm_dbl[i+nori] = args->tmpf[i];
+                    {
+                        if ( bcf_float_is_missing(args->tmpf[i]) )
+                            bcf_double_set_missing(col->mm_dbl[i+nori]);
+                        else
+                            col->mm_dbl[i+nori] = args->tmpf[i];
+                    }
                 }
                 else
                 {
@@ -825,13 +937,20 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
                 }
             }
             col->mm_dbl_ndat++;
+            return 1;
         }
     }
-    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND )
+    else if ( col->merge_method==MM_SUM || col->merge_method==MM_MIN || col->merge_method==MM_MAX || col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING )
     {
         ntmpf = col->mm_dbl_nused;
         hts_expand(int32_t,ntmpf,args->mtmpf,args->tmpf);
-        for (i=0; i<ntmpf; i++) args->tmpf[i] = col->mm_dbl[i];
+        for (i=0; i<ntmpf; i++)
+        {
+            if ( bcf_double_is_missing(col->mm_dbl[i]) )
+                bcf_float_set_missing(args->tmpf[i]);
+            else
+                args->tmpf[i] = col->mm_dbl[i];
+        }
         col->mm_dbl_nused = col->mm_dbl_ndat = 0;
     }
     else if ( col->merge_method==MM_AVG )
@@ -851,8 +970,7 @@ static int setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
 }
 static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
@@ -869,8 +987,7 @@ static int vcf_setter_info_real(args_t *args, bcf1_t *line, annot_col_t *col, vo
         if ( ret>0 && !bcf_float_is_missing(args->tmpf2[0]) ) return 0;
     }
 
-    bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
-    return 0;
+    return bcf_update_info_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf,ntmpf);
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
 static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, int nals, char **als)
@@ -925,10 +1042,9 @@ static int setter_ARinfo_string(args_t *args, bcf1_t *line, annot_col_t *col, in
             if ( str[0]!='.' || (str[1]!=',' && str[1]!=0) ) continue;  // value already set
         }
         int ret = copy_string_field(args->tmps,map[i],lsrc,&args->tmpks,i);
-        assert( ret==0 );
+        if ( ret!=0 ) error("[%s:%d %s] Failed to copy a string field\n",  __FILE__,__LINE__,__func__);
     }
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmpks.s);
 }
 void khash_str2int_clear_free(void *_hash)
 {
@@ -947,14 +1063,18 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
+    // This is a bit hacky, only to reuse existing code with minimal changes:
+    //      -c =TAG will now behave as -l TAG:unique for strings
+    if ( col->replace==SET_OR_APPEND ) col->merge_method=MM_UNIQUE;
+
     annot_line_t *tab = (annot_line_t*) data;
-    
+
     int len = 0;
     if ( tab )
     {
         len = strlen(tab->cols[col->icol]);
         if ( !len ) return 0;
-        if ( len==1 && tab->cols[col->icol][0]=='.' ) return 0;
+        if ( len==1 && tab->cols[col->icol][0]=='.' && col->merge_method!=MM_APPEND_MISSING ) return 1;
     }
 
     if ( col->merge_method!=MM_FIRST )
@@ -964,17 +1084,17 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
 
         if ( data )
         {
-            assert( col->merge_method==MM_APPEND || col->merge_method==MM_UNIQUE );
+            assert( col->merge_method==MM_APPEND || col->merge_method==MM_APPEND_MISSING || col->merge_method==MM_UNIQUE );
             if ( col->merge_method==MM_UNIQUE )
             {
                 if ( !col->mm_str_hash ) col->mm_str_hash = (khash_t(str2int)*)khash_str2int_init();
-                if ( khash_str2int_has_key(col->mm_str_hash, tab->cols[col->icol]) ) return 0;
+                if ( khash_str2int_has_key(col->mm_str_hash, tab->cols[col->icol]) ) return 1;
                 khash_str2int_inc(col->mm_str_hash, strdup(tab->cols[col->icol]));
             }
 
             if ( col->mm_kstr.l ) kputc(',',&col->mm_kstr);
             kputs(tab->cols[col->icol], &col->mm_kstr);
-            return 0;
+            return 1;
         }
 
         if ( col->mm_kstr.l )
@@ -985,12 +1105,10 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
         else
             return 0;
 
-        if ( !data )    // flush the line
-        {
-            if ( col->merge_method==MM_UNIQUE )
-                khash_str2int_clear_free(col->mm_str_hash);
-            col->mm_kstr.l = 0;
-        }
+        // flush the line
+        if ( col->merge_method==MM_UNIQUE )
+            khash_str2int_clear_free(col->mm_str_hash);
+        col->mm_kstr.l = 0;
     }
     else
     {
@@ -1002,14 +1120,19 @@ static int setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *d
             return setter_ARinfo_string(args,line,col,tab->nals,tab->als);
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
 }
 static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
 {
     bcf1_t *rec = (bcf1_t*) data;
-    int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
-    if ( ntmps < 0 ) return 0;    // nothing to add
+
+    if ( col->getter )
+        col->getter(args,rec,col,(void**)&args->tmps, &args->mtmps);
+    else
+    {
+        int ntmps = bcf_get_info_string(args->files->readers[1].header,rec,col->hdr_key_src,&args->tmps,&args->mtmps);
+        if ( ntmps < 0 ) return 0;    // nothing to add
+    }
 
     if ( col->number==BCF_VL_A || col->number==BCF_VL_R ) 
         return setter_ARinfo_string(args,line,col,rec->n_allele,rec->d.allele);
@@ -1020,8 +1143,7 @@ static int vcf_setter_info_str(args_t *args, bcf1_t *line, annot_col_t *col, voi
         if ( ret>0 && (args->tmps2[0]!='.' || args->tmps2[1]!=0) ) return 0;
     }
 
-    bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
-    return 0;
+    return bcf_update_info_string(args->hdr_out,line,col->hdr_key_dst,args->tmps);
 }
 static int genotypes_to_string(args_t *args, int nsrc1, int32_t *src, int nsmpl_dst, kstring_t *str)
 {
@@ -1691,7 +1813,6 @@ static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col,
         }
     }
     return bcf_update_format_float(args->hdr_out,line,col->hdr_key_dst,args->tmpf2,nsmpl_dst*ndst1);
-
 }
 
 static int vcf_setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
@@ -1773,17 +1894,12 @@ static int init_sample_map(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst)
         // tab annotation file, expecting that all samples are present: sample map not needed
         if ( !src ) return 0;
 
-        int nmatch = 0, order_ok = 1;
+        int nmatch = 0;
         for (i=0; i<bcf_hdr_nsamples(src); i++)
         {
             int id = bcf_hdr_id2int(dst, BCF_DT_SAMPLE, src->samples[i]);
-            if ( id!=-1 ) 
-            {
-                nmatch++;
-                if ( i!=id ) order_ok = 0;
-            }
+            if ( id!=-1 ) nmatch++;
         }
-        if ( bcf_hdr_nsamples(src)==bcf_hdr_nsamples(dst) && nmatch==bcf_hdr_nsamples(src) && order_ok ) return 0;  // not needed
         if ( !nmatch ) return -1;   // No matching samples found in the source and the destination file
 
         args->nsample_map = bcf_hdr_nsamples(dst);
@@ -1902,11 +2018,45 @@ static void init_columns(args_t *args)
     int need_sample_map = 0;
     int sample_map_ok = init_sample_map(args, args->tgts_is_vcf?args->files->readers[1].header:NULL, args->hdr);
 
+    kstring_t tmp = {0,0,0};
+    if ( args->columns_is_file )
+    {
+        int i,n;
+        char **str = hts_readlist(args->columns, args->columns_is_file, &n);
+        if ( !str ) error("Could not parse %s\n", args->columns);
+        for (i=0; i<n; i++)
+        {
+            char *ptr = str[i];
+            while ( *ptr && !isspace(*ptr) ) ptr++;
+            if ( *ptr )
+            {
+                *ptr = 0;
+                ptr++;
+                while ( *ptr && isspace(*ptr) ) ptr++;
+                if ( *ptr )
+                {
+                    if ( args->merge_method_str.l ) kputc(',',&args->merge_method_str);
+                    kputs(str[i],&args->merge_method_str);
+                    kputc(':',&args->merge_method_str);
+                    kputs(ptr,&args->merge_method_str);
+                }
+            }
+            if ( tmp.l ) kputc(',',&tmp);
+            kputs(str[i],&tmp);
+            free(str[i]);
+        }
+        free(str);
+        free(args->columns);
+        args->columns = tmp.s;
+        tmp.l = tmp.m = 0;
+        tmp.s = NULL;
+    }
+
     void *skip_fmt = NULL, *skip_info = NULL;
     if ( args->tgts_is_vcf )
         args->columns = columns_complement(args->columns, &skip_info, &skip_fmt);
 
-    kstring_t str = {0,0,0}, tmp = {0,0,0};
+    kstring_t str = {0,0,0};
     char *ss = args->columns, *se = ss;
     args->ncols = 0;
     int icol = -1, has_fmt_str = 0;
@@ -1931,6 +2081,7 @@ static void init_columns(args_t *args)
             {
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->setter = vcf_setter_ref;
                 col->hdr_key_src = strdup(str.s);
                 col->hdr_key_dst = strdup(str.s);
@@ -1943,28 +2094,54 @@ static void init_columns(args_t *args)
             {
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->setter = vcf_setter_alt;
                 col->hdr_key_src = strdup(str.s);
                 col->hdr_key_dst = strdup(str.s);
             }
             else args->alt_idx = icol;
         }
-        else if ( !strcasecmp("ID",str.s) )
+        else if ( !strcasecmp("ID",str.s) || !strcasecmp("~ID",str.s) )
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            if ( str.s[0]=='~' ) replace = MATCH_VALUE;
+            if ( args->tgts_is_vcf && replace==MATCH_VALUE ) error("todo: -c ~ID with -a VCF?\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
             col->hdr_key_src = strdup(str.s);
             col->hdr_key_dst = strdup(str.s);
+            if ( replace==MATCH_VALUE ) args->match_id = icol;
+        }
+        else if ( !strncasecmp("ID:=",str.s,4) )    // transfer a tag from INFO to ID column
+        {
+            if ( !args->tgts_is_vcf ) error("The annotation source must be a VCF for \"%s\"\n",str.s);
+            if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
+            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
+            annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
+            col->icol = icol;
+            col->replace = replace;
+            col->setter = vcf_setter_id;
+            col->getter = vcf_getter_info_str2str;
+            str.s[2] = 0;
+            col->hdr_key_dst = strdup(str.s);
+            col->hdr_key_src = strncasecmp("INFO/",str.s+4,5) ? strdup(str.s+4) : strdup(str.s+4+5);
+            int hdr_id = bcf_hdr_id2int(args->tgts_hdr, BCF_DT_ID,col->hdr_key_src);
+            if ( !bcf_hdr_idinfo_exists(args->tgts_hdr,BCF_HL_INFO,hdr_id) ) 
+                error("The INFO tag \"%s\" is not defined in %s\n", col->hdr_key_src, args->targets_fname);
+            if ( bcf_hdr_id2type(args->tgts_hdr,BCF_HL_INFO,hdr_id)!=BCF_HT_STR )
+                error("Only Type=String tags can be used to annotate the ID column\n");
         }
         else if ( !strcasecmp("FILTER",str.s) )
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
@@ -1979,7 +2156,7 @@ static void init_columns(args_t *args)
                     bcf_hrec_t *hrec = tgts_hdr->hrec[j];
                     if ( hrec->type!=BCF_HL_FLT ) continue;
                     int k = bcf_hrec_find_key(hrec,"ID");
-                    assert( k>=0 ); // this should always be true for valid VCFs
+                    if ( k<0 ) error("[%s] Failed to parse the header, the ID attribute not found", __func__);
                     tmp.l = 0;
                     bcf_hrec_format(hrec, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
@@ -1994,6 +2171,7 @@ static void init_columns(args_t *args)
             if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             col->icol = icol;
             col->replace = replace;
             col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
@@ -2003,7 +2181,7 @@ static void init_columns(args_t *args)
         else if ( args->tgts_is_vcf && !strcasecmp("INFO",str.s) ) // All INFO fields
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
-            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
+            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO feature has not been implemented yet.\n");
             bcf_hdr_t *tgts_hdr = args->files->readers[1].header;
             int j;
             for (j=0; j<tgts_hdr->nhrec; j++)
@@ -2021,6 +2199,7 @@ static void init_columns(args_t *args)
                 int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, hrec->vals[k]);
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->icol = -1;
                 col->replace = replace;
                 col->hdr_key_src = strdup(hrec->vals[k]);
@@ -2056,11 +2235,16 @@ static void init_columns(args_t *args)
                 int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, hrec->vals[k]);
                 args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
                 annot_col_t *col = &args->cols[args->ncols-1];
+                memset(col,0,sizeof(*col));
                 col->icol = -1;
                 col->replace = replace;
                 col->hdr_key_src = strdup(hrec->vals[k]);
                 col->hdr_key_dst = strdup(hrec->vals[k]);
-                if ( !strcasecmp("GT",col->hdr_key_src) ) col->setter = vcf_setter_format_gt;
+                if ( !strcasecmp("GT",col->hdr_key_src) )
+                {
+                    if ( !args->tgts_is_vcf ) error("The FORMAT/GT field can be currently populated only from a VCF\n");
+                    col->setter = vcf_setter_format_gt;
+                }
                 else
                     switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                     {
@@ -2099,9 +2283,10 @@ static void init_columns(args_t *args)
             }
             int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
-                error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
+                error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", str.s, args->targets_fname);
             args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
             annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
             if ( !args->tgts_is_vcf )
             {
                 col->icol = icol;
@@ -2112,7 +2297,11 @@ static void init_columns(args_t *args)
             col->replace = replace;
             col->hdr_key_src = strdup(key_src);
             col->hdr_key_dst = strdup(key_dst);
-            if ( !strcasecmp("GT",key_src) ) col->setter = vcf_setter_format_gt;
+            if ( !strcasecmp("GT",key_src) )
+            {
+                if ( !args->tgts_is_vcf ) error("The FORMAT/GT field can be currently populated only from a VCF\n");
+                col->setter = vcf_setter_format_gt;
+            }
             else
                 switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
                 {
@@ -2131,13 +2320,20 @@ static void init_columns(args_t *args)
         else
         {
             if ( replace==REPLACE_NON_MISSING ) error("Apologies, the -INFO/TAG feature has not been implemented yet.\n");
-            if ( replace==SET_OR_APPEND ) error("Apologies, the =INFO/TAG feature has not been implemented yet.\n");
-            int explicit_info = 0;
+            if ( replace==SET_OR_APPEND )
+            {
+                if ( args->tgts_is_vcf )
+                    error("Error: the =INFO/TAG feature is currently supported only with TAB annotation files and has limitations\n"
+                          "       (the annotation type is modified to \"Number=.\" and allele ordering is disregarded)\n");
+                fprintf(bcftools_stderr,"Warning: the =INFO/TAG feature modifies the annotation to \"Number=.\" and disregards allele ordering\n");
+            }
+            int explicit_src_info = 0;
+            int explicit_dst_info = 0;
             char *key_dst;
             if ( !strncasecmp("INFO/",str.s,5) )
             {
                 key_dst = str.s + 5;
-                explicit_info = 1;
+                explicit_dst_info = 1;
             }
             else
                 key_dst = str.s;
@@ -2149,7 +2345,7 @@ static void init_columns(args_t *args)
                 if ( !strncasecmp("INFO/",key_src,5) )
                 {
                     key_src += 5;
-                    explicit_info = 1;
+                    explicit_src_info = 1;
                 }
                 else if ( !strncasecmp("FMT/",key_src,4) || !strncasecmp("FORMAT/",key_src,5) )
                 {
@@ -2159,38 +2355,65 @@ static void init_columns(args_t *args)
             }
             else
                 key_src = key_dst;
+
+            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
+            annot_col_t *col = &args->cols[args->ncols-1];
+            memset(col,0,sizeof(*col));
+            col->icol = icol;
+            col->replace = replace;
+            col->hdr_key_src = strdup(key_src);
+            col->hdr_key_dst = strdup(key_dst);
+
             int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
             if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 if ( args->tgts_is_vcf ) // reading annotations from a VCF, add a new header line
                 {
-                    bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
-                    if ( !hrec )
+                    if ( !strcasecmp("ID",key_src) && !explicit_src_info )
                     {
-                        if ( !explicit_info && bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL) )
-                            error("Did you mean \"FMT/%s\" rather than \"%s\"?\n",str.s,str.s);
-                    fprintf(bcftools_stderr,"[%s] %d\n",key_src,explicit_info);
-                        error("The tag \"%s\" is not defined in %s\n", key_src,args->files->readers[1].fname);
+                        // transferring ID column into a new INFO tag
+                        tmp.l = 0;
+                        ksprintf(&tmp,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Transferred ID column\">",key_dst);
+                    }
+                    else if ( !strcasecmp("FILTER",key_src) && !explicit_src_info )
+                    {
+                        // transferring FILTER column into a new INFO tag
+                        tmp.l = 0;
+                        ksprintf(&tmp,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Transferred FILTER column\">",key_dst);
+                    }
+                    else
+                    {
+                        bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_INFO, "ID", key_src, NULL);
+                        if ( !hrec )
+                        {
+                            if ( explicit_dst_info+explicit_src_info==0 && bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key_src, NULL) )
+                                error("Did you mean \"FMT/%s\" rather than \"%s\"?\n",str.s,str.s);
+                            char *ptr = strchr(key_src,'=');
+                            if ( ptr )
+                            {
+                                *ptr = 0; tmp.l = 0; ksprintf(&tmp,"%s:=%s",key_src,ptr+1); *ptr = '=';
+                                error("The tag \"%s\" is not defined, is this what you want \"%s\" ?\n",key_src,tmp.s);
+                            }
+                            error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", key_src,args->files->readers[1].fname);
+                        }
+                        tmp.l = 0;
+                        bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     }
-                    tmp.l = 0;
-                    bcf_hrec_format_rename(hrec, key_dst, &tmp);
                     bcf_hdr_append(args->hdr_out, tmp.s);
                     if (bcf_hdr_sync(args->hdr_out) < 0)
                         error_errno("[%s] Failed to update header", __func__);
                     hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key_dst);
                 }
                 else
-                    error("The tag \"%s\" is not defined in %s\n", key_src, args->targets_fname);
+                    error("The tag \"%s\" is not defined in %s, was the -h option provided?\n", key_src, args->targets_fname);
                 assert( bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_INFO,hdr_id) );
             }
-
-            args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
-            annot_col_t *col = &args->cols[args->ncols-1];
-            col->icol = icol;
-            col->replace = replace;
-            col->hdr_key_src = strdup(key_src);
-            col->hdr_key_dst = strdup(key_dst);
-            col->number  = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
+            if  ( args->tgts_is_vcf )
+            {
+                if ( !strcasecmp("ID",key_src) && !explicit_src_info ) col->getter = vcf_getter_id2str;
+                else if ( !strcasecmp("FILTER",key_src) && !explicit_src_info ) col->getter = vcf_getter_filter2str;
+            }
+            col->number = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
             switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id) )
             {
                 case BCF_HT_FLAG:   col->setter = args->tgts_is_vcf ? vcf_setter_info_flag : setter_info_flag; break;
@@ -2199,6 +2422,18 @@ static void init_columns(args_t *args)
                 case BCF_HT_STR:    col->setter = args->tgts_is_vcf ? vcf_setter_info_str  : setter_info_str; break;
                 default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_INFO,hdr_id));
             }
+            if ( replace==SET_OR_APPEND )   // change to Number=.
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->hdr_out, BCF_HL_INFO, "ID", key_dst, NULL);
+                if ( !hrec ) error("Uh, could not find the new tag \"%s\" in the header\n", key_dst);
+                hrec = bcf_hrec_dup(hrec);
+                int j = bcf_hrec_find_key(hrec, "Number");
+                if ( j<0 ) error("Uh, could not find the entry Number in the header record of %s\n",key_dst);
+                free(hrec->vals[j]);
+                hrec->vals[j] = strdup(".");
+                bcf_hdr_remove(args->hdr_out,BCF_HL_INFO, key_dst);
+                bcf_hdr_add_hrec(args->hdr_out, hrec);
+            }
         }
         if ( !*se ) break;
         ss = ++se;
@@ -2234,10 +2469,10 @@ static void init_merge_method(args_t *args)
         args->cols[i].mm_dbl_nalloc = args->cols[i].mm_dbl_nused = args->cols[i].mm_dbl_ndat = 0;
         memset(&args->cols[i].mm_kstr, 0, sizeof(args->cols[i].mm_kstr));
     }
-    if ( !args->merge_method_str ) return;
+    if ( !args->merge_method_str.l ) return;
     if ( args->tgts_is_vcf ) error("Error: the --merge-logic is intended for use with BED or TAB-delimited files only.\n");
-    if ( !args->tgt_idx ) error("Error: BEG,END (or FROM,TO) columns are expected with the --merge-logic option.\n");
-    char *sb = args->merge_method_str;
+    if ( !args->tgt_idx && !args->tgts ) error("Error: BEG,END (or FROM,TO) columns or REF,ALT columns are expected with the --merge-logic option.\n");
+    char *sb = args->merge_method_str.s;
     while ( *sb )
     {
         char *se = sb;
@@ -2248,21 +2483,27 @@ static void init_merge_method(args_t *args)
         char *mm_type_str = args->tmpks.s + args->tmpks.l;
         while ( *mm_type_str!=':' && mm_type_str > args->tmpks.s ) mm_type_str--;
         if ( *mm_type_str!=':' )
-            error("Error: could not parse the argument to --merge-logic: %s\n", args->merge_method_str);
+            error("Error: could not parse the argument to --merge-logic: %s\n", args->merge_method_str.s);
         *mm_type_str = 0;
         mm_type_str++;
         int mm_type = MM_FIRST;
         if ( !strcasecmp("unique",mm_type_str) ) mm_type = MM_UNIQUE;
+        else if ( !strcasecmp("first",mm_type_str) ) mm_type = MM_FIRST;
         else if ( !strcasecmp("append",mm_type_str) ) mm_type = MM_APPEND;
+        else if ( !strcasecmp("append-missing",mm_type_str) )
+        {
+            mm_type = MM_APPEND_MISSING;
+            if ( args->ref_idx!=-1 ) args->has_append_mode = 1;
+        }
         else if ( !strcasecmp("sum",mm_type_str) ) mm_type = MM_SUM;
         else if ( !strcasecmp("avg",mm_type_str) ) mm_type = MM_AVG;
         else if ( !strcasecmp("min",mm_type_str) ) mm_type = MM_MIN;
         else if ( !strcasecmp("max",mm_type_str) ) mm_type = MM_MAX;
-        else error("Error: could not parse --merge-logic %s, the logic \"%s\" is not recognised\n", args->merge_method_str,mm_type_str);
+        else error("Error: could not parse --merge-logic %s, the logic \"%s\" is not recognised\n", args->merge_method_str.s,mm_type_str);
         for (i=0; i<args->ncols; i++)
         {
             if ( strcmp(args->cols[i].hdr_key_dst,args->tmpks.s) ) continue;
-            if ( mm_type==MM_APPEND && args->cols[i].number!=BCF_VL_VAR )
+            if ( (mm_type==MM_APPEND || mm_type==MM_APPEND_MISSING) && args->cols[i].number!=BCF_VL_VAR )
                 error("Error: --merge-logic append can be requested only for tags of variable length (Number=.)\n");
             args->cols[i].merge_method = mm_type;
             break;
@@ -2270,6 +2511,20 @@ static void init_merge_method(args_t *args)
         if ( i==args->ncols ) error("No such tag in the destination file: %s\n", args->tmpks.s);
         sb = *se ? se + 1 : se;
     }
+    if ( args->has_append_mode )
+    {
+        // create a missing line to insert missing values when VCF ALT finds no match in the annotation file
+        args->aline_missing = (annot_line_t*)calloc(1,sizeof(*args->aline_missing));
+        int ncol = 0;
+        for (i=0; i<args->ncols; i++)
+            if ( ncol < args->cols[i].icol + 1 ) ncol = args->cols[i].icol + 1;
+        if ( ncol < args->ref_idx + 1 ) ncol = args->ref_idx + 1;
+        args->aline_missing->mcols = ncol;
+        args->aline_missing->ncols = ncol;
+        args->aline_missing->cols = (char**) malloc(ncol*sizeof(char*));
+        for (i=0; i<ncol; i++)
+            args->aline_missing->cols[i] = strdup(".");
+    }
 }
 
 static void rename_chrs(args_t *args, char *fname)
@@ -2301,6 +2556,42 @@ static void rename_chrs(args_t *args, char *fname)
     free(map);
 }
 
+static void rename_annots(args_t *args, char *fname)
+{
+    int n, i;
+    char **map = hts_readlist(fname, 1, &n);
+    if ( !map ) error("Could not read: %s\n", fname);
+    for (i=0; i<n; i++)
+    {
+        char *sb = NULL, *ss = map[i];
+        while ( *ss && !isspace(*ss) ) ss++;
+        if ( !*ss ) error("Could not parse: %s\n", fname);
+        *ss = 0;
+        int type;
+        if ( !strncasecmp("info/",map[i],5) ) type = BCF_HL_INFO, sb = map[i] + 5;
+        else if ( !strncasecmp("format/",map[i],7) ) type = BCF_HL_FMT, sb = map[i] + 7;
+        else if ( !strncasecmp("fmt/",map[i],4) ) type = BCF_HL_FMT, sb = map[i] + 4;
+        else if ( !strncasecmp("filter/",map[i],7) ) type = BCF_HL_FLT, sb = map[i] + 7;
+        else error("Could not parse \"%s\", expected INFO, FORMAT, or FILTER prefix for each line: %s\n",map[i],fname);
+        int id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, sb);
+        if ( id<0 ) continue;
+        bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->hdr_out, type, "ID", sb, NULL);
+        if ( !hrec ) continue;  // the sequence not present
+        int j = bcf_hrec_find_key(hrec, "ID");
+        assert( j>=0 );
+        free(hrec->vals[j]);
+        ss++;
+        while ( *ss && isspace(*ss) ) ss++;
+        char *se = ss;
+        while ( *se && !isspace(*se) ) se++;
+        *se = 0;
+        hrec->vals[j] = strdup(ss);
+        args->hdr_out->id[BCF_DT_ID][id].key = hrec->vals[j];
+    }
+    for (i=0; i<n; i++) free(map[i]);
+    free(map);
+}
+
 static void init_data(args_t *args)
 {
     args->hdr = args->files->readers[0].header;
@@ -2313,6 +2604,7 @@ static void init_data(args_t *args)
         // reading annots from a VCF
         if ( !bcf_sr_add_reader(args->files, args->targets_fname) )
             error("Failed to open %s: %s\n", args->targets_fname,bcf_sr_strerror(args->files->errnum));
+        args->tgts_hdr = args->files->readers[1].header;
     }
     if ( args->columns ) init_columns(args);
     if ( args->targets_fname && !args->tgts_is_vcf )
@@ -2320,8 +2612,8 @@ static void init_data(args_t *args)
         if ( !args->columns ) error("The -c option not given\n");
         if ( args->chr_idx==-1 ) error("The -c CHROM option not given\n");
         if ( args->beg_idx==-1 ) error("The -c POS option not given\n");
-        if ( args->single_overlaps && args->merge_method_str ) error("The options --merge-logic and --single-overlaps cannot be combined\n");
-        if ( args->end_idx==-1 || (args->single_overlaps && !args->merge_method_str) )
+        if ( args->single_overlaps && args->merge_method_str.l ) error("The options --merge-logic and --single-overlaps cannot be combined\n");
+        if ( args->end_idx==-1 || (args->single_overlaps && !args->merge_method_str.l) )
         {
             args->end_idx = -args->beg_idx - 1;
             args->tgts = bcf_sr_regions_init(args->targets_fname,1,args->chr_idx,args->beg_idx,args->end_idx);
@@ -2365,8 +2657,9 @@ static void init_data(args_t *args)
     if ( !args->drop_header )
     {
         if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
+        if ( args->rename_annots ) rename_annots(args, args->rename_annots);
 
-        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("[%s] Error: cannot write to \"%s\": %s\n", __func__,args->output_fname, strerror(errno));
         if ( args->n_threads )
             hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p);
@@ -2388,8 +2681,15 @@ static void destroy_data(args_t *args)
         free(args->cols[i].mm_kstr.s);
         if ( args->cols[i].mm_str_hash ) khash_str2int_destroy_free(args->cols[i].mm_str_hash);
         free(args->cols[i].mm_dbl);
+        free(args->cols[i].ptr);
     }
     free(args->cols);
+    if ( args->aline_missing )
+    {
+        for (i=0; i<args->aline_missing->ncols; i++) free(args->aline_missing->cols[i]);
+        free(args->aline_missing->cols);
+        free(args->aline_missing);
+    }
     for (i=0; i<args->malines; i++)
     {
         free(args->alines[i].cols);
@@ -2397,6 +2697,7 @@ static void destroy_data(args_t *args)
         free(args->alines[i].line.s);
     }
     free(args->alines);
+    free(args->srt_alines);
     if ( args->tgt_idx )
     {
         regidx_destroy(args->tgt_idx);
@@ -2422,6 +2723,7 @@ static void destroy_data(args_t *args)
         filter_destroy(args->filter);
     if (args->out_fh) hts_close(args->out_fh);
     free(args->sample_map);
+    free(args->merge_method_str.s);
 }
 
 static void parse_annot_line(args_t *args, char *str, annot_line_t *tmp)
@@ -2485,7 +2787,6 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
         }
         else i++;
     }
-
     if ( args->ref_idx==-1 && args->nalines ) return;
 
     while ( !bcf_sr_regions_overlap(args->tgts, bcf_seqname(args->hdr,line), start_pos,end_pos) )
@@ -2506,6 +2807,36 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
     }
 }
 
+// search string in semicolon separated strings (xx vs aa;bb)
+static int str_match(char *needle, char *haystack)
+{
+    int len = strlen(needle);
+    char *ptr = haystack;
+    while ( *ptr && (ptr=strstr(ptr,needle)) )
+    {
+        if ( ptr[len]!=0 && ptr[len]!=';' ) ptr++;          // a prefix, not a match
+        else if ( ptr==haystack || ptr[-1]==';' ) return 1; // a match
+        ptr++;  // a suffix, not a match
+    }
+    return 0;
+}
+// search common string in semicolon separated strings (xx;yy;zz vs aa;bb)
+static int strstr_match(char *a, char *b)
+{
+    char *beg = a;
+    while ( *beg )
+    {
+        char *end = beg;
+        while ( *end && *end!=';' ) end++;
+        char tmp = *end;
+        if ( *end==';' ) *end = 0;
+        int ret = str_match(beg,b);
+        *end = tmp;
+        if ( ret || !*end ) return ret;
+        beg = end + 1;
+    }
+    return 0;
+}
 static void annotate(args_t *args, bcf1_t *line)
 {
     int i, j;
@@ -2513,9 +2844,9 @@ static void annotate(args_t *args, bcf1_t *line)
         args->rm[i].handler(args, line, &args->rm[i]);
 
     int has_overlap = 0;
-
     if ( args->tgt_idx )
     {
+        for (j=0; j<args->ncols; j++) args->cols[j].done = 0;
         if ( regidx_overlap(args->tgt_idx, bcf_seqname(args->hdr,line),line->pos,line->pos+line->rlen-1, args->tgt_itr) )
         {
             while ( regitr_overlap(args->tgt_itr) )
@@ -2526,49 +2857,145 @@ static void annotate(args_t *args, bcf1_t *line)
                 tmp->end   = args->tgt_itr->end;
                 parse_annot_line(args, regitr_payload(args->tgt_itr,char*), tmp);
                 for (j=0; j<args->ncols; j++)
-                    if ( args->cols[j].setter(args,line,&args->cols[j],tmp) )
+                {
+                    if ( args->cols[j].done==1 ) continue;
+                    int ret = args->cols[j].setter(args,line,&args->cols[j],tmp);
+                    if ( ret < 0 )
                         error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                    if ( ret==0 )
+                        args->cols[j].done = 1;
+                }
             }
             has_overlap = 1;
         }
         for (j=0; j<args->ncols; j++)
-            if ( args->cols[j].merge_method != MM_FIRST )
-                args->cols[j].setter(args,line,&args->cols[j],NULL);
+        {
+            if ( args->cols[j].done==1 || args->cols[j].merge_method == MM_FIRST ) continue;
+            if ( args->cols[j].setter(args,line,&args->cols[j],NULL) < 0 )
+                error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+        }
     }
     else if ( args->tgts )
     {
-        // Buffer annotation lines. When multiple ALT alleles are present in the
-        // annotation file, at least one must match one of the VCF alleles.
-        int len = 0;
-        bcf_get_variant_types(line);
-        for (i=1; i<line->n_allele; i++)
-            if ( len > line->d.var[i].n ) len = line->d.var[i].n;
-        int end_pos = len<0 ? line->pos - len : line->pos;
+        // Buffer annotation lines. When multiple ALT alleles are present in the annotation file, at least one
+        // must match some of the VCF alleles. If the append-missing mode is set (and REF+ALT is requested), the
+        // buffered lines will annotate the VCF respecting the order in ALT and when no matching line is found
+        // for an ALT, missing value is appended instead.
+        int end_pos = line->pos + line->rlen - 1;
         buffer_annot_lines(args, line, line->pos, end_pos);
+
+        args->nsrt_alines = 0;
+        hts_expand(uint32_t,args->nalines,args->msrt_alines,args->srt_alines);
+        if ( args->nalines >= 0xffff || line->n_allele >= 0xffff )
+            error("Error: too many alleles or annotation lines in the buffer at %s:%"PRId64" (todo:skip?)\n",bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+
+        // Find matching lines
         for (i=0; i<args->nalines; i++)
         {
             if ( line->pos > args->alines[i].end || end_pos < args->alines[i].start ) continue;
-            if ( args->ref_idx != -1 )
+            if ( args->ref_idx != -1 )  // REF+ALT matching requested
             {
-                if ( vcmp_set_ref(args->vcmp, line->d.allele[0], args->alines[i].als[0]) < 0 ) continue;   // refs not compatible
+                if ( line->pos!=args->alines[i].start || vcmp_set_ref(args->vcmp, line->d.allele[0], args->alines[i].als[0]) < 0 ) continue;   // refs are not compatible
                 for (j=1; j<args->alines[i].nals; j++)
                 {
-                    if ( line->n_allele==1 && args->alines[i].als[j][0]=='.' && args->alines[i].als[j][1]==0 ) break;   // no ALT allele in VCF and annot file has "."
-                    if ( vcmp_find_allele(args->vcmp, line->d.allele+1, line->n_allele - 1, args->alines[i].als[j]) >= 0 ) break;
+                    int ialt;
+                    if ( line->n_allele==1 && args->alines[i].als[j][0]=='.' && args->alines[i].als[j][1]==0 )  // match: no ALT allele in VCF and annot file has "."
+                        ialt = 0;
+                    else
+                    {
+                        ialt = vcmp_find_allele(args->vcmp, line->d.allele+1, line->n_allele - 1, args->alines[i].als[j]);
+                        if ( ialt < 0 ) continue;
+                        ialt++;
+                    }
+                    if ( args->match_id>=0 && !strstr_match(line->d.id,args->alines[i].cols[args->match_id]) ) continue;
+                    args->srt_alines[args->nsrt_alines++] = (ialt<<16) | i;
+                    has_overlap = 1;
+                    break;
                 }
-                if ( j==args->alines[i].nals ) continue;    // none of the annot alleles present in VCF's ALT
             }
-            break;
+            else    // overlap, REF+ALT matching not requested
+            {
+                args->srt_alines[args->nsrt_alines++] = (0xffff<<16) | i;
+                has_overlap = 1;
+            }
         }
-
-        if ( i<args->nalines )
+        // Sort lines if needed
+        if ( args->has_append_mode )
+        {
+            // insertion sort by VCF ALT index (top bits) and alines index (low bits)
+            uint32_t tmp;
+            for (i=1; i<args->nsrt_alines; i++)
+                for (j=i; j>0 && args->srt_alines[j] < args->srt_alines[j-1]; j--)
+                    tmp = args->srt_alines[j], args->srt_alines[j] = args->srt_alines[j-1], args->srt_alines[j-1] = tmp;
+        }
+        // Annotate
+        for (j=0; j<args->ncols; j++) args->cols[j].done = 0;
+        int ialt_exp = 1;
+        for (i=0; i<args->nsrt_alines; i++)
         {
-            // there is a matching line
+            int ialt = args->srt_alines[i] >> 16;
+            int ilin = args->srt_alines[i] & 0xffff;
+            if ( args->has_append_mode )
+            {
+                if ( ialt_exp > ialt ) continue;    // multiple annotation lines for the same position
+                if ( ialt_exp < ialt )
+                {
+                    // REF+ALT matching requested, append-missing mode: insert "." if no annotation line was found for the ALT
+                    while ( ialt_exp++ < ialt )
+                    {
+                        for (j=0; j<args->ncols; j++)
+                        {
+                            if ( args->cols[j].merge_method != MM_APPEND_MISSING ) continue;
+                            if ( args->cols[j].done==1 ) continue;
+                            int ret = args->cols[j].setter(args,line,&args->cols[j],args->aline_missing);
+                            if ( ret < 0 )
+                                error("fixme: Could not set missing %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                            if ( ret==0 )
+                                args->cols[j].done = 1;
+                        }
+                    }
+                }
+            }
             for (j=0; j<args->ncols; j++)
-                if ( args->cols[j].setter(args,line,&args->cols[j],&args->alines[i]) )
+            {
+                if ( args->cols[j].done==1 ) continue;
+                int ret = args->cols[j].setter(args,line,&args->cols[j],&args->alines[ilin]);
+                if ( ret < 0 )
                     error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                if ( ret==0 )
+                    args->cols[j].done = 1;
+            }
+            ialt_exp = ialt + 1;
+        }
+        if ( args->nsrt_alines )
+        {
+            // In the append-missing mode fill missing values to all trailing ALTs, but only if at least one
+            // record was found. Otherwise leave the row will be left without annotation.
+            if ( args->has_append_mode && ialt_exp < line->n_allele )
+            {
+                while ( ialt_exp++ < line->n_allele )
+                {
+                    for (j=0; j<args->ncols; j++)
+                    {
+                        if ( args->cols[j].merge_method != MM_APPEND_MISSING ) continue;
+                        if ( args->cols[j].done==1 ) continue;
+                        int ret = args->cols[j].setter(args,line,&args->cols[j],args->aline_missing);
+                        if ( ret < 0 )
+                            error("fixme: Could not set missing %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+                        if ( ret==0 )
+                            args->cols[j].done = 1;
+                    }
+                }
+            }
+            // Flush
+            for (j=0; j<args->ncols; j++)
+            {
+                if ( args->cols[j].done==1 || args->cols[j].merge_method == MM_FIRST ) continue;
+                int ret = args->cols[j].setter(args,line,&args->cols[j],NULL);
+                if ( ret < 0 )
+                    error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
+            }
         }
-        has_overlap = i<args->nalines ? 1 : 0;
     }
     else if ( args->files->nreaders == 2 )
     {
@@ -2613,30 +3040,32 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "Usage:   bcftools annotate [options] <in.vcf.gz>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "   -a, --annotations <file>       VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
-    fprintf(bcftools_stderr, "       --collapse <string>        matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
-    fprintf(bcftools_stderr, "   -c, --columns <list>           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
-    fprintf(bcftools_stderr, "   -e, --exclude <expr>           exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "       --force                    continue despite parsing error (at your own risk!)\n");
-    fprintf(bcftools_stderr, "   -h, --header-lines <file>      lines which should be appended to the VCF header\n");
-    fprintf(bcftools_stderr, "   -I, --set-id [+]<format>       set ID column, see man page for details\n");
-    fprintf(bcftools_stderr, "   -i, --include <expr>           select sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "   -k, --keep-sites               leave -i/-e sites unchanged instead of discarding them\n");
-    fprintf(bcftools_stderr, "   -l, --merge-logic <tag:type>   merge logic for multiple overlapping regions (see man page for details), EXPERIMENTAL\n");
-    fprintf(bcftools_stderr, "   -m, --mark-sites [+-]<tag>     add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
-    fprintf(bcftools_stderr, "       --no-version               do not append version and command line to the header\n");
-    fprintf(bcftools_stderr, "   -o, --output <file>            write output to a file [standard output]\n");
-    fprintf(bcftools_stderr, "   -O, --output-type <b|u|z|v>    b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(bcftools_stderr, "   -r, --regions <region>         restrict to comma-separated list of regions\n");
-    fprintf(bcftools_stderr, "   -R, --regions-file <file>      restrict to regions listed in a file\n");
-    fprintf(bcftools_stderr, "       --rename-chrs <file>       rename sequences according to map file: from\\tto\n");
-    fprintf(bcftools_stderr, "   -s, --samples [^]<list>        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(bcftools_stderr, "   -S, --samples-file [^]<file>   file of samples to annotate (or exclude with \"^\" prefix)\n");
-    fprintf(bcftools_stderr, "       --single-overlaps          keep memory low by avoiding complexities arising from handling multiple overlapping intervals\n");
-    fprintf(bcftools_stderr, "   -x, --remove <list>            list of annotations (e.g. ID,INFO/DP,FORMAT/DP,FILTER) to remove (or keep with \"^\" prefix). See man page for details\n");
-    fprintf(bcftools_stderr, "       --threads <int>            number of extra output compression threads [0]\n");
+    fprintf(bcftools_stderr, "   -a, --annotations FILE       VCF file or tabix-indexed FILE with annotations: CHR\\tPOS[\\tVALUE]+\n");
+    fprintf(bcftools_stderr, "       --collapse STR           matching records by <snps|indels|both|all|some|none>, see man page for details [some]\n");
+    fprintf(bcftools_stderr, "   -c, --columns LIST           list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
+    fprintf(bcftools_stderr, "   -C, --columns-file FILE      read -c columns from FILE, one name per row, with optional --merge-logic TYPE: NAME[ TYPE]\n");
+    fprintf(bcftools_stderr, "   -e, --exclude EXPR           exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "       --force                  continue despite parsing error (at your own risk!)\n");
+    fprintf(bcftools_stderr, "   -h, --header-lines FILE      lines which should be appended to the VCF header\n");
+    fprintf(bcftools_stderr, "   -I, --set-id [+]FORMAT       set ID column using a `bcftools query`-like expression, see man page for details\n");
+    fprintf(bcftools_stderr, "   -i, --include EXPR           select sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "   -k, --keep-sites             leave -i/-e sites unchanged instead of discarding them\n");
+    fprintf(bcftools_stderr, "   -l, --merge-logic TAG:TYPE   merge logic for multiple overlapping regions (see man page for details), EXPERIMENTAL\n");
+    fprintf(bcftools_stderr, "   -m, --mark-sites [+-]TAG     add INFO/TAG flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+    fprintf(bcftools_stderr, "       --no-version             do not append version and command line to the header\n");
+    fprintf(bcftools_stderr, "   -o, --output FILE            write output to a file [standard output]\n");
+    fprintf(bcftools_stderr, "   -O, --output-type [b|u|z|v]  b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+    fprintf(bcftools_stderr, "   -r, --regions REGION         restrict to comma-separated list of regions\n");
+    fprintf(bcftools_stderr, "   -R, --regions-file FILE      restrict to regions listed in FILE\n");
+    fprintf(bcftools_stderr, "       --rename-annots FILE     rename annotations: TYPE/old\\tnew, where TYPE is one of FILTER,INFO,FORMAT\n");
+    fprintf(bcftools_stderr, "       --rename-chrs FILE       rename sequences according to the mapping: old\\tnew\n");
+    fprintf(bcftools_stderr, "   -s, --samples [^]LIST        comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(bcftools_stderr, "   -S, --samples-file [^]FILE   file of samples to annotate (or exclude with \"^\" prefix)\n");
+    fprintf(bcftools_stderr, "       --single-overlaps        keep memory low by avoiding complexities arising from handling multiple overlapping intervals\n");
+    fprintf(bcftools_stderr, "   -x, --remove LIST            list of annotations (e.g. ID,INFO/DP,FORMAT/DP,FILTER) to remove (or keep with \"^\" prefix). See man page for details\n");
+    fprintf(bcftools_stderr, "       --threads INT            number of extra output compression threads [0]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfannotate(int argc, char *argv[])
@@ -2651,6 +3080,7 @@ int main_vcfannotate(int argc, char *argv[])
     args->record_cmd_line = 1;
     args->ref_idx = args->alt_idx = args->chr_idx = args->beg_idx = args->end_idx = -1;
     args->set_ids_replace = 1;
+    args->match_id = -1;
     int regions_is_file = 0, collapse = 0;
 
     static struct option loptions[] =
@@ -2669,7 +3099,9 @@ int main_vcfannotate(int argc, char *argv[])
         {"regions",required_argument,NULL,'r'},
         {"regions-file",required_argument,NULL,'R'},
         {"remove",required_argument,NULL,'x'},
+        {"columns-file",required_argument,NULL,'C'},
         {"columns",required_argument,NULL,'c'},
+        {"rename-annots",required_argument,NULL,11},
         {"rename-chrs",required_argument,NULL,1},
         {"header-lines",required_argument,NULL,'h'},
         {"samples",required_argument,NULL,'s'},
@@ -2679,7 +3111,7 @@ int main_vcfannotate(int argc, char *argv[])
         {"force",no_argument,NULL,'f'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:kl:f",loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:C:i:e:S:s:I:m:kl:f",loptions,NULL)) >= 0)
     {
         switch (c) {
             case 'f': args->force = 1; break;
@@ -2690,11 +3122,15 @@ int main_vcfannotate(int argc, char *argv[])
                 else if ( optarg[0]=='-' ) { args->mark_sites = optarg+1; args->mark_sites_logic = MARK_UNLISTED; }
                 else args->mark_sites = optarg; 
                 break;
-            case 'l': args->merge_method_str = optarg; break;
+            case 'l': 
+                if ( args->merge_method_str.l ) kputc(',',&args->merge_method_str);
+                kputs(optarg,&args->merge_method_str);
+                break;
             case 'I': args->set_ids_fmt = optarg; break;
             case 's': args->sample_names = optarg; break;
             case 'S': args->sample_names = optarg; args->sample_is_file = 1; break;
             case 'c': args->columns = strdup(optarg); break;
+            case 'C': args->columns = strdup(optarg); args->columns_is_file = 1; break;
             case 'o': args->output_fname = optarg; break;
             case 'O':
                 switch (optarg[0]) {
@@ -2705,8 +3141,12 @@ int main_vcfannotate(int argc, char *argv[])
                     default: error("The output type \"%s\" not recognised\n", optarg);
                 };
                 break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'x': args->remove_annots = optarg; break;
             case 'a': args->targets_fname = optarg; break;
             case 'r': args->regions_list = optarg; break;
@@ -2726,6 +3166,7 @@ int main_vcfannotate(int argc, char *argv[])
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case 10 : args->single_overlaps = 1; break;
+            case 11 : args->rename_annots = optarg; break;
             case '?': usage(args); break;
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcfbuf.c b/bcftools/vcfbuf.c
index ffdfd40..71916bb 100644
--- a/bcftools/vcfbuf.c
+++ b/bcftools/vcfbuf.c
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2016-2019 Genome Research Ltd.
+   Copyright (c) 2016-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -24,16 +24,19 @@
 
  */
 
+#include <assert.h>
+#include <strings.h>
 #include <htslib/vcf.h>
 #include <htslib/vcfutils.h>
+#include <htslib/hts_os.h>
 #include "bcftools.h"
 #include "vcfbuf.h"
 #include "rbuf.h"
 
 typedef struct
 {
-    double max;
-    int rand_missing, skip_filter;
+    double max[VCFBUF_LD_N];
+    int rand_missing, filter1;
 }
 ld_t;
 
@@ -41,13 +44,16 @@ typedef struct
 {
     bcf1_t *rec;
     double af;
-    int af_set:1, idx:31;
+    int af_set:1, filter:1, idx:30;
 }
 vcfrec_t;
 
+#define PRUNE_MODE_MAX_AF 1
+#define PRUNE_MODE_1ST    2
+#define PRUNE_MODE_RAND   3
 typedef struct
 {
-    int max_sites, mvrec, mac, mfarr;
+    int max_sites, mvrec, mac, mfarr, mode;
     int *ac, *idx;
     float *farr;
     char *af_tag;
@@ -85,6 +91,8 @@ vcfbuf_t *vcfbuf_init(bcf_hdr_t *hdr, int win)
     buf->hdr = hdr;
     buf->win = win;
     buf->overlap.rid = -1;
+    int i;
+    for (i=0; i<VCFBUF_LD_N; i++) buf->ld.max[i] = HUGE_VAL;
     rbuf_init(&buf->rbuf, 0);
     return buf;
 }
@@ -104,13 +112,30 @@ void vcfbuf_destroy(vcfbuf_t *buf)
 
 void vcfbuf_set(vcfbuf_t *buf, vcfbuf_opt_t key, void *value)
 {
-    if ( key==VCFBUF_LD_MAX ) { buf->ld.max = *((double*)value); return; }
-    if ( key==VCFBUF_RAND_MISSING ) { buf->ld.rand_missing = *((int*)value); return; }
-    if ( key==VCFBUF_SKIP_FILTER ) { buf->ld.skip_filter = *((int*)value); return; }
-    if ( key==VCFBUF_NSITES ) { buf->prune.max_sites = *((int*)value); return; }
+    if ( key==LD_FILTER1 ) { buf->ld.filter1 = *((int*)value); return; }
+    if ( key==LD_RAND_MISSING ) { buf->ld.rand_missing = *((int*)value); return; }
+    if ( key==LD_MAX_R2 ) { buf->ld.max[VCFBUF_LD_IDX_R2] = *((double*)value); return; }
+    if ( key==LD_MAX_LD ) { buf->ld.max[VCFBUF_LD_IDX_LD] = *((double*)value); return; }
+    if ( key==LD_MAX_HD ) { buf->ld.max[VCFBUF_LD_IDX_HD] = *((double*)value); return; }
+
+    if ( key==VCFBUF_NSITES )
+    {
+        buf->prune.max_sites = *((int*)value);
+        if ( !buf->prune.mode ) buf->prune.mode = PRUNE_MODE_MAX_AF;
+        return;
+    }
     if ( key==VCFBUF_AF_TAG ) { buf->prune.af_tag = *((char**)value); return; }
     if ( key==VCFBUF_OVERLAP_WIN ) { buf->overlap.active = *((int*)value); return; }
     if ( key==VCFBUF_RMDUP) { buf->rmdup.active = *((int*)value); return; }
+
+    if ( key==VCFBUF_NSITES_MODE )
+    {
+        char *mode = *((char**)value);
+        if ( !strcasecmp(mode,"maxAF") ) buf->prune.mode = PRUNE_MODE_MAX_AF;
+        else if ( !strcasecmp(mode,"1st") ) buf->prune.mode = PRUNE_MODE_1ST;
+        else if ( !strcasecmp(mode,"rand") ) buf->prune.mode = PRUNE_MODE_RAND;
+        else error("The mode \"%s\" is not recognised\n",mode);
+    }
 }
 
 int vcfbuf_nsites(vcfbuf_t *buf)
@@ -118,10 +143,8 @@ int vcfbuf_nsites(vcfbuf_t *buf)
     return buf->rbuf.n;
 }
 
-bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec, int swap)
+bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec)
 {
-    if ( !swap ) error("todo: swap=%d\n", swap);
-
     rbuf_expand0(&buf->rbuf, vcfrec_t, buf->rbuf.n+1, buf->vcf);
 
     int i = rbuf_append(&buf->rbuf);
@@ -130,6 +153,8 @@ bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec, int swap)
     bcf1_t *ret = buf->vcf[i].rec;
     buf->vcf[i].rec = rec;
     buf->vcf[i].af_set = 0;
+    buf->vcf[i].filter = buf->ld.filter1;
+    buf->ld.filter1 = 0;
 
     return ret;
 }
@@ -170,6 +195,26 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
 {
     int nbuf = flush_all ? buf->rbuf.n : buf->rbuf.n - 1;
 
+    int nprune = nbuf - buf->prune.max_sites;
+    int i,k,irec = 0;
+    if ( buf->prune.mode==PRUNE_MODE_1ST )
+    {
+        int eoff = flush_all ? 1 : 2;
+        for (i=0; i<nprune; i++)
+            rbuf_remove_kth(&buf->rbuf, vcfrec_t, buf->rbuf.n - eoff, buf->vcf);
+        return;
+    }
+    if ( buf->prune.mode==PRUNE_MODE_RAND )
+    {
+        int eoff = flush_all ? 0 : 1;
+        for (i=0; i<nprune; i++)
+        {
+            int j = (buf->rbuf.n - eoff) * hts_drand48();
+            rbuf_remove_kth(&buf->rbuf, vcfrec_t, j, buf->vcf);
+        }
+        return;
+    }
+
     if ( nbuf > buf->prune.mvrec )
     {
         buf->prune.idx   = (int*) realloc(buf->prune.idx, nbuf*sizeof(int));
@@ -178,7 +223,6 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
     }
 
     // set allele frequency and prepare buffer for sorting
-    int i,k,irec = 0;
     for (i=-1; rbuf_next(&buf->rbuf,&i) && irec<nbuf; )
     {
         bcf1_t *line = buf->vcf[i].rec;
@@ -211,7 +255,6 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
 
     // sort the rbuf indexes to be pruned descendently so that j-th rbuf index
     // is removed before i-th index if i<j
-    int nprune = nbuf - buf->prune.max_sites;
     for (i=0; i<nprune; i++)
         buf->prune.idx[i] = buf->prune.vrec[i]->idx;
 
@@ -333,10 +376,21 @@ static double _estimate_af(int8_t *ptr, int size, int nvals, int nsamples)
 }
 
 /*
-    For unphased genotypes D is approximated as suggested in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710162/
+    The `ld` is set to D approximated as suggested in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710162/
         D =~ (GT correlation) * sqrt(Pa*(1-Pa)*Pb*(1-Pb))
+
+    and `hd` as proposed in Ragsdale, A. P., & Gravel, S. (2019). Unbiased estimation of linkage
+    disequilibrium from unphased data.  Molecular Biology and Evolution. doi:10.1093/molbev/msz265 
+
+        \hat{D} = 1/[n*(n+1)]*[
+                             (n1 + n2/2 + n4/2 + n5/4)*(n5/4 + n6/2 + n8/2 + n9)
+                            -(n2/2 + n3 + n5/4 + n6/2)*(n4/2 + n5/4 + n7 + n8/2)
+                        ]
+    where n1,n2,..n9 are counts of RR/RR,RR/RA,..,AA/AA genotypes.
+
+    Returns 0 on success, -1 if the values could not be determined (missing genotypes)
 */
-static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
+static int _calc_r2_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec, vcfbuf_ld_t *ld)
 {
     if ( arec->n_sample!=brec->n_sample ) error("Different number of samples: %d vs %d\n",arec->n_sample,brec->n_sample);
     assert( arec->n_sample );
@@ -365,21 +419,24 @@ static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
         baf = _estimate_af((int8_t*)bfmt->p, bfmt->size, bfmt->n, brec->n_sample);
     }
 
-    // Calculate correlation 
+    // Calculate r2, lf, hd
+    double nhd[] = {0,0,0,0,0,0,0,0,0};
     double ab = 0, aa = 0, bb = 0, a = 0, b = 0;
-    int nab = 0, na = 0, nb = 0, ndiff = 0;
+    int nab = 0, ndiff = 0;
+    int an_tot = 0, bn_tot = 0; 
     for (i=0; i<arec->n_sample; i++)
     {
         int8_t *aptr = (int8_t*) (afmt->p + i*afmt->size);
         int8_t *bptr = (int8_t*) (bfmt->p + i*bfmt->size);
-        int adsg = 0, bdsg = 0, an = 0, bn = 0;
+        int adsg = 0, bdsg = 0;     // dosages (0,1,2) at sites (a,b)
+        int an = 0, bn = 0;         // number of alleles at sites (a,b)
         for (j=0; j<afmt->n; j++)
         {
             if ( aptr[j]==bcf_int8_vector_end ) break;
             if ( aptr[j]==bcf_gt_missing )
             {
                 if ( !buf->ld.rand_missing ) break;
-                if ( rand()/RAND_MAX >= aaf ) adsg += 1;
+                if ( hts_drand48() >= aaf ) adsg += 1;
             }
             else if ( bcf_gt_allele(aptr[j]) ) adsg += 1;
             an++;
@@ -390,89 +447,112 @@ static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
             if ( bptr[j]==bcf_gt_missing )
             {
                 if ( !buf->ld.rand_missing ) break;
-                if ( rand()/RAND_MAX >= baf ) bdsg += 1;
+                if ( hts_drand48() >= baf ) bdsg += 1;
             }
             else if ( bcf_gt_allele(bptr[j]) ) bdsg += 1;
             bn++;
         }
-        if ( an )
+        if ( an && bn )
         {
+            an_tot += an;
             aa += adsg*adsg;
             a  += adsg;
-            na++;
-        }
-        if ( bn )
-        {
+
+            bn_tot += bn;
             bb += bdsg*bdsg;
             b  += bdsg;
-            nb++;
-        }
-        if ( an && bn )
-        {
+
             if ( adsg!=bdsg ) ndiff++;
             ab += adsg*bdsg;
             nab++;
         }
+        if ( an==2 && bn==2 )   // for now only diploid genotypes
+        {
+            assert( adsg<=2 && bdsg<=2 );
+            nhd[ bdsg*3 + adsg ]++;
+        }
     }
-    if ( !nab ) return -1;
+    if ( !nab ) return -1;  // no data in common for the two sites
 
+    double pa = a/an_tot;
+    double pb = b/bn_tot;
     double cor;
     if ( !ndiff ) cor = 1;
     else
     {
-        // Don't know how to deal with zero variance. Since this the purpose is filtering,
-        // it is not enough to say the value is undefined. Therefore an artificial noise is
-        // added to make the denominator non-zero.
-        if ( aa == a*a/na || bb == b*b/nb )
+        if ( aa == a*a/nab || bb == b*b/nab )     // zero variance, add small noise
         {
-            aa += 3*3;
-            bb += 3*3;
-            ab += 3*3;
-            a  += 3;
-            b  += 3;
-            na++;
-            nb++;
+            aa += 1e-4;
+            bb += 1e-4;
+            ab += 1e-4;
+            a  += 1e-2;
+            b  += 1e-2;
             nab++;
         }
-        cor = (ab/nab - a/na*b/nb) / sqrt(aa/na - a/na*a/na) / sqrt(bb/nb - b/nb*b/nb);
+        cor = (ab - a*b/nab) / sqrt(aa - a*a/nab) / sqrt(bb - b*b/nab);
     }
-    return cor*cor;
+
+    ld->val[VCFBUF_LD_IDX_R2] = cor * cor;
+
+    // Lewontin's normalization of D. Also we cap at 1 as the calculation
+    // can result in values bigger than 1 for high AFs.
+    ld->val[VCFBUF_LD_IDX_LD] = cor * sqrt(pa*(1-pa)*pb*(1-pb));
+    double norm;
+    if ( ld->val[VCFBUF_LD_IDX_LD] < 0 )
+        norm = -pa*pb > -(1-pa)*(1-pb) ? -pa*pb : -(1-pa)*(1-pb);
+    else
+        norm = pa*(1-pb) > (1-pa)*pb ? pa*(1-pb) : (1-pa)*pb;
+    if ( norm )
+        ld->val[VCFBUF_LD_IDX_LD] = fabs(norm) > fabs(ld->val[VCFBUF_LD_IDX_LD]) ? ld->val[VCFBUF_LD_IDX_LD]/norm : 1;
+    if ( !ld->val[VCFBUF_LD_IDX_LD] )
+        ld->val[VCFBUF_LD_IDX_LD] = fabs(ld->val[VCFBUF_LD_IDX_LD]);    // avoid "-0" on output
+
+    ld->val[VCFBUF_LD_IDX_HD] =
+        (nhd[0] + nhd[1]/2. + nhd[3]/2. + nhd[4]/4.)*(nhd[4]/4. + nhd[5]/2. + nhd[7]/2. + nhd[8]) 
+        - (nhd[1]/2. + nhd[2] + nhd[4]/4. + nhd[5]/2.)*(nhd[3]/2. + nhd[4]/4. + nhd[6] + nhd[7]/2.);
+    ld->val[VCFBUF_LD_IDX_HD] /= nab;
+    ld->val[VCFBUF_LD_IDX_HD] /= nab+1;
+
+    return 0;
 }
 
-bcf1_t *vcfbuf_max_ld(vcfbuf_t *buf, bcf1_t *rec, double *ld)
+int vcfbuf_ld(vcfbuf_t *buf, bcf1_t *rec, vcfbuf_ld_t *ld)
 {
-    *ld = -1;
-    if ( !buf->rbuf.n ) return NULL;
+    int ret = -1;
+    if ( !buf->rbuf.n ) return ret;
 
-    int i = buf->rbuf.f;
+    int j, i = buf->rbuf.f;
 
     // Relying on vcfbuf being properly flushed - all sites in the buffer
     // must come from the same chromosome
-    if ( buf->vcf[i].rec->rid != rec->rid ) return NULL;
+    if ( buf->vcf[i].rec->rid != rec->rid ) return ret;
+
+    vcfbuf_ld_t tmp;
+    for (j=0; j<VCFBUF_LD_N; j++)
+    {
+        ld->val[j] = -HUGE_VAL;
+        ld->rec[j] = NULL;
+    }
 
-    int imax = 0;
-    double max = 0;
     for (i=-1; rbuf_next(&buf->rbuf,&i); )
     {   
-        if ( buf->ld.skip_filter )
-        {
-            if ( buf->vcf[i].rec->d.n_flt > 1 ) continue;   // multiple filters are set
-            if ( buf->vcf[i].rec->d.n_flt==1 && buf->vcf[i].rec->d.flt[0]!=0 ) continue;    // not PASS
-        }
-        double val = _calc_ld(buf, buf->vcf[i].rec, rec);
-        if ( buf->ld.max && buf->ld.max < val ) 
-        {
-            *ld = val;
-            return buf->vcf[i].rec;
-        }
-        if ( val > max )
+        if ( buf->vcf[i].filter ) continue;
+        if ( _calc_r2_ld(buf, buf->vcf[i].rec, rec, &tmp) < 0 ) continue;   // missing genotypes
+
+        int done = 0;
+        for (j=0; j<VCFBUF_LD_N; j++)
         {
-            max  = val;
-            imax = i;
+            if ( ld->val[j] < tmp.val[j] )
+            {
+                ld->val[j] = tmp.val[j];
+                ld->rec[j] = buf->vcf[i].rec;
+            }
+            if ( buf->ld.max[j] < tmp.val[j] ) done = 1;
+            ret = 0;
         }
+        if ( done ) return ret;
     }
-    *ld = max;
-    return buf->vcf[imax].rec;
+    return ret;
 }
 
 
diff --git a/bcftools/vcfbuf.c.pysam.c b/bcftools/vcfbuf.c.pysam.c
index d1dcf99..50df73d 100644
--- a/bcftools/vcfbuf.c.pysam.c
+++ b/bcftools/vcfbuf.c.pysam.c
@@ -2,7 +2,7 @@
 
 /* The MIT License
 
-   Copyright (c) 2016-2019 Genome Research Ltd.
+   Copyright (c) 2016-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -26,16 +26,19 @@
 
  */
 
+#include <assert.h>
+#include <strings.h>
 #include <htslib/vcf.h>
 #include <htslib/vcfutils.h>
+#include <htslib/hts_os.h>
 #include "bcftools.h"
 #include "vcfbuf.h"
 #include "rbuf.h"
 
 typedef struct
 {
-    double max;
-    int rand_missing, skip_filter;
+    double max[VCFBUF_LD_N];
+    int rand_missing, filter1;
 }
 ld_t;
 
@@ -43,13 +46,16 @@ typedef struct
 {
     bcf1_t *rec;
     double af;
-    int af_set:1, idx:31;
+    int af_set:1, filter:1, idx:30;
 }
 vcfrec_t;
 
+#define PRUNE_MODE_MAX_AF 1
+#define PRUNE_MODE_1ST    2
+#define PRUNE_MODE_RAND   3
 typedef struct
 {
-    int max_sites, mvrec, mac, mfarr;
+    int max_sites, mvrec, mac, mfarr, mode;
     int *ac, *idx;
     float *farr;
     char *af_tag;
@@ -87,6 +93,8 @@ vcfbuf_t *vcfbuf_init(bcf_hdr_t *hdr, int win)
     buf->hdr = hdr;
     buf->win = win;
     buf->overlap.rid = -1;
+    int i;
+    for (i=0; i<VCFBUF_LD_N; i++) buf->ld.max[i] = HUGE_VAL;
     rbuf_init(&buf->rbuf, 0);
     return buf;
 }
@@ -106,13 +114,30 @@ void vcfbuf_destroy(vcfbuf_t *buf)
 
 void vcfbuf_set(vcfbuf_t *buf, vcfbuf_opt_t key, void *value)
 {
-    if ( key==VCFBUF_LD_MAX ) { buf->ld.max = *((double*)value); return; }
-    if ( key==VCFBUF_RAND_MISSING ) { buf->ld.rand_missing = *((int*)value); return; }
-    if ( key==VCFBUF_SKIP_FILTER ) { buf->ld.skip_filter = *((int*)value); return; }
-    if ( key==VCFBUF_NSITES ) { buf->prune.max_sites = *((int*)value); return; }
+    if ( key==LD_FILTER1 ) { buf->ld.filter1 = *((int*)value); return; }
+    if ( key==LD_RAND_MISSING ) { buf->ld.rand_missing = *((int*)value); return; }
+    if ( key==LD_MAX_R2 ) { buf->ld.max[VCFBUF_LD_IDX_R2] = *((double*)value); return; }
+    if ( key==LD_MAX_LD ) { buf->ld.max[VCFBUF_LD_IDX_LD] = *((double*)value); return; }
+    if ( key==LD_MAX_HD ) { buf->ld.max[VCFBUF_LD_IDX_HD] = *((double*)value); return; }
+
+    if ( key==VCFBUF_NSITES )
+    {
+        buf->prune.max_sites = *((int*)value);
+        if ( !buf->prune.mode ) buf->prune.mode = PRUNE_MODE_MAX_AF;
+        return;
+    }
     if ( key==VCFBUF_AF_TAG ) { buf->prune.af_tag = *((char**)value); return; }
     if ( key==VCFBUF_OVERLAP_WIN ) { buf->overlap.active = *((int*)value); return; }
     if ( key==VCFBUF_RMDUP) { buf->rmdup.active = *((int*)value); return; }
+
+    if ( key==VCFBUF_NSITES_MODE )
+    {
+        char *mode = *((char**)value);
+        if ( !strcasecmp(mode,"maxAF") ) buf->prune.mode = PRUNE_MODE_MAX_AF;
+        else if ( !strcasecmp(mode,"1st") ) buf->prune.mode = PRUNE_MODE_1ST;
+        else if ( !strcasecmp(mode,"rand") ) buf->prune.mode = PRUNE_MODE_RAND;
+        else error("The mode \"%s\" is not recognised\n",mode);
+    }
 }
 
 int vcfbuf_nsites(vcfbuf_t *buf)
@@ -120,10 +145,8 @@ int vcfbuf_nsites(vcfbuf_t *buf)
     return buf->rbuf.n;
 }
 
-bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec, int swap)
+bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec)
 {
-    if ( !swap ) error("todo: swap=%d\n", swap);
-
     rbuf_expand0(&buf->rbuf, vcfrec_t, buf->rbuf.n+1, buf->vcf);
 
     int i = rbuf_append(&buf->rbuf);
@@ -132,6 +155,8 @@ bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec, int swap)
     bcf1_t *ret = buf->vcf[i].rec;
     buf->vcf[i].rec = rec;
     buf->vcf[i].af_set = 0;
+    buf->vcf[i].filter = buf->ld.filter1;
+    buf->ld.filter1 = 0;
 
     return ret;
 }
@@ -172,6 +197,26 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
 {
     int nbuf = flush_all ? buf->rbuf.n : buf->rbuf.n - 1;
 
+    int nprune = nbuf - buf->prune.max_sites;
+    int i,k,irec = 0;
+    if ( buf->prune.mode==PRUNE_MODE_1ST )
+    {
+        int eoff = flush_all ? 1 : 2;
+        for (i=0; i<nprune; i++)
+            rbuf_remove_kth(&buf->rbuf, vcfrec_t, buf->rbuf.n - eoff, buf->vcf);
+        return;
+    }
+    if ( buf->prune.mode==PRUNE_MODE_RAND )
+    {
+        int eoff = flush_all ? 0 : 1;
+        for (i=0; i<nprune; i++)
+        {
+            int j = (buf->rbuf.n - eoff) * hts_drand48();
+            rbuf_remove_kth(&buf->rbuf, vcfrec_t, j, buf->vcf);
+        }
+        return;
+    }
+
     if ( nbuf > buf->prune.mvrec )
     {
         buf->prune.idx   = (int*) realloc(buf->prune.idx, nbuf*sizeof(int));
@@ -180,7 +225,6 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
     }
 
     // set allele frequency and prepare buffer for sorting
-    int i,k,irec = 0;
     for (i=-1; rbuf_next(&buf->rbuf,&i) && irec<nbuf; )
     {
         bcf1_t *line = buf->vcf[i].rec;
@@ -213,7 +257,6 @@ static void _prune_sites(vcfbuf_t *buf, int flush_all)
 
     // sort the rbuf indexes to be pruned descendently so that j-th rbuf index
     // is removed before i-th index if i<j
-    int nprune = nbuf - buf->prune.max_sites;
     for (i=0; i<nprune; i++)
         buf->prune.idx[i] = buf->prune.vrec[i]->idx;
 
@@ -335,10 +378,21 @@ static double _estimate_af(int8_t *ptr, int size, int nvals, int nsamples)
 }
 
 /*
-    For unphased genotypes D is approximated as suggested in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710162/
+    The `ld` is set to D approximated as suggested in https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710162/
         D =~ (GT correlation) * sqrt(Pa*(1-Pa)*Pb*(1-Pb))
+
+    and `hd` as proposed in Ragsdale, A. P., & Gravel, S. (2019). Unbiased estimation of linkage
+    disequilibrium from unphased data.  Molecular Biology and Evolution. doi:10.1093/molbev/msz265 
+
+        \hat{D} = 1/[n*(n+1)]*[
+                             (n1 + n2/2 + n4/2 + n5/4)*(n5/4 + n6/2 + n8/2 + n9)
+                            -(n2/2 + n3 + n5/4 + n6/2)*(n4/2 + n5/4 + n7 + n8/2)
+                        ]
+    where n1,n2,..n9 are counts of RR/RR,RR/RA,..,AA/AA genotypes.
+
+    Returns 0 on success, -1 if the values could not be determined (missing genotypes)
 */
-static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
+static int _calc_r2_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec, vcfbuf_ld_t *ld)
 {
     if ( arec->n_sample!=brec->n_sample ) error("Different number of samples: %d vs %d\n",arec->n_sample,brec->n_sample);
     assert( arec->n_sample );
@@ -367,21 +421,24 @@ static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
         baf = _estimate_af((int8_t*)bfmt->p, bfmt->size, bfmt->n, brec->n_sample);
     }
 
-    // Calculate correlation 
+    // Calculate r2, lf, hd
+    double nhd[] = {0,0,0,0,0,0,0,0,0};
     double ab = 0, aa = 0, bb = 0, a = 0, b = 0;
-    int nab = 0, na = 0, nb = 0, ndiff = 0;
+    int nab = 0, ndiff = 0;
+    int an_tot = 0, bn_tot = 0; 
     for (i=0; i<arec->n_sample; i++)
     {
         int8_t *aptr = (int8_t*) (afmt->p + i*afmt->size);
         int8_t *bptr = (int8_t*) (bfmt->p + i*bfmt->size);
-        int adsg = 0, bdsg = 0, an = 0, bn = 0;
+        int adsg = 0, bdsg = 0;     // dosages (0,1,2) at sites (a,b)
+        int an = 0, bn = 0;         // number of alleles at sites (a,b)
         for (j=0; j<afmt->n; j++)
         {
             if ( aptr[j]==bcf_int8_vector_end ) break;
             if ( aptr[j]==bcf_gt_missing )
             {
                 if ( !buf->ld.rand_missing ) break;
-                if ( rand()/RAND_MAX >= aaf ) adsg += 1;
+                if ( hts_drand48() >= aaf ) adsg += 1;
             }
             else if ( bcf_gt_allele(aptr[j]) ) adsg += 1;
             an++;
@@ -392,89 +449,112 @@ static double _calc_ld(vcfbuf_t *buf, bcf1_t *arec, bcf1_t *brec)
             if ( bptr[j]==bcf_gt_missing )
             {
                 if ( !buf->ld.rand_missing ) break;
-                if ( rand()/RAND_MAX >= baf ) bdsg += 1;
+                if ( hts_drand48() >= baf ) bdsg += 1;
             }
             else if ( bcf_gt_allele(bptr[j]) ) bdsg += 1;
             bn++;
         }
-        if ( an )
+        if ( an && bn )
         {
+            an_tot += an;
             aa += adsg*adsg;
             a  += adsg;
-            na++;
-        }
-        if ( bn )
-        {
+
+            bn_tot += bn;
             bb += bdsg*bdsg;
             b  += bdsg;
-            nb++;
-        }
-        if ( an && bn )
-        {
+
             if ( adsg!=bdsg ) ndiff++;
             ab += adsg*bdsg;
             nab++;
         }
+        if ( an==2 && bn==2 )   // for now only diploid genotypes
+        {
+            assert( adsg<=2 && bdsg<=2 );
+            nhd[ bdsg*3 + adsg ]++;
+        }
     }
-    if ( !nab ) return -1;
+    if ( !nab ) return -1;  // no data in common for the two sites
 
+    double pa = a/an_tot;
+    double pb = b/bn_tot;
     double cor;
     if ( !ndiff ) cor = 1;
     else
     {
-        // Don't know how to deal with zero variance. Since this the purpose is filtering,
-        // it is not enough to say the value is undefined. Therefore an artificial noise is
-        // added to make the denominator non-zero.
-        if ( aa == a*a/na || bb == b*b/nb )
+        if ( aa == a*a/nab || bb == b*b/nab )     // zero variance, add small noise
         {
-            aa += 3*3;
-            bb += 3*3;
-            ab += 3*3;
-            a  += 3;
-            b  += 3;
-            na++;
-            nb++;
+            aa += 1e-4;
+            bb += 1e-4;
+            ab += 1e-4;
+            a  += 1e-2;
+            b  += 1e-2;
             nab++;
         }
-        cor = (ab/nab - a/na*b/nb) / sqrt(aa/na - a/na*a/na) / sqrt(bb/nb - b/nb*b/nb);
+        cor = (ab - a*b/nab) / sqrt(aa - a*a/nab) / sqrt(bb - b*b/nab);
     }
-    return cor*cor;
+
+    ld->val[VCFBUF_LD_IDX_R2] = cor * cor;
+
+    // Lewontin's normalization of D. Also we cap at 1 as the calculation
+    // can result in values bigger than 1 for high AFs.
+    ld->val[VCFBUF_LD_IDX_LD] = cor * sqrt(pa*(1-pa)*pb*(1-pb));
+    double norm;
+    if ( ld->val[VCFBUF_LD_IDX_LD] < 0 )
+        norm = -pa*pb > -(1-pa)*(1-pb) ? -pa*pb : -(1-pa)*(1-pb);
+    else
+        norm = pa*(1-pb) > (1-pa)*pb ? pa*(1-pb) : (1-pa)*pb;
+    if ( norm )
+        ld->val[VCFBUF_LD_IDX_LD] = fabs(norm) > fabs(ld->val[VCFBUF_LD_IDX_LD]) ? ld->val[VCFBUF_LD_IDX_LD]/norm : 1;
+    if ( !ld->val[VCFBUF_LD_IDX_LD] )
+        ld->val[VCFBUF_LD_IDX_LD] = fabs(ld->val[VCFBUF_LD_IDX_LD]);    // avoid "-0" on output
+
+    ld->val[VCFBUF_LD_IDX_HD] =
+        (nhd[0] + nhd[1]/2. + nhd[3]/2. + nhd[4]/4.)*(nhd[4]/4. + nhd[5]/2. + nhd[7]/2. + nhd[8]) 
+        - (nhd[1]/2. + nhd[2] + nhd[4]/4. + nhd[5]/2.)*(nhd[3]/2. + nhd[4]/4. + nhd[6] + nhd[7]/2.);
+    ld->val[VCFBUF_LD_IDX_HD] /= nab;
+    ld->val[VCFBUF_LD_IDX_HD] /= nab+1;
+
+    return 0;
 }
 
-bcf1_t *vcfbuf_max_ld(vcfbuf_t *buf, bcf1_t *rec, double *ld)
+int vcfbuf_ld(vcfbuf_t *buf, bcf1_t *rec, vcfbuf_ld_t *ld)
 {
-    *ld = -1;
-    if ( !buf->rbuf.n ) return NULL;
+    int ret = -1;
+    if ( !buf->rbuf.n ) return ret;
 
-    int i = buf->rbuf.f;
+    int j, i = buf->rbuf.f;
 
     // Relying on vcfbuf being properly flushed - all sites in the buffer
     // must come from the same chromosome
-    if ( buf->vcf[i].rec->rid != rec->rid ) return NULL;
+    if ( buf->vcf[i].rec->rid != rec->rid ) return ret;
+
+    vcfbuf_ld_t tmp;
+    for (j=0; j<VCFBUF_LD_N; j++)
+    {
+        ld->val[j] = -HUGE_VAL;
+        ld->rec[j] = NULL;
+    }
 
-    int imax = 0;
-    double max = 0;
     for (i=-1; rbuf_next(&buf->rbuf,&i); )
     {   
-        if ( buf->ld.skip_filter )
-        {
-            if ( buf->vcf[i].rec->d.n_flt > 1 ) continue;   // multiple filters are set
-            if ( buf->vcf[i].rec->d.n_flt==1 && buf->vcf[i].rec->d.flt[0]!=0 ) continue;    // not PASS
-        }
-        double val = _calc_ld(buf, buf->vcf[i].rec, rec);
-        if ( buf->ld.max && buf->ld.max < val ) 
-        {
-            *ld = val;
-            return buf->vcf[i].rec;
-        }
-        if ( val > max )
+        if ( buf->vcf[i].filter ) continue;
+        if ( _calc_r2_ld(buf, buf->vcf[i].rec, rec, &tmp) < 0 ) continue;   // missing genotypes
+
+        int done = 0;
+        for (j=0; j<VCFBUF_LD_N; j++)
         {
-            max  = val;
-            imax = i;
+            if ( ld->val[j] < tmp.val[j] )
+            {
+                ld->val[j] = tmp.val[j];
+                ld->rec[j] = buf->vcf[i].rec;
+            }
+            if ( buf->ld.max[j] < tmp.val[j] ) done = 1;
+            ret = 0;
         }
+        if ( done ) return ret;
     }
-    *ld = max;
-    return buf->vcf[imax].rec;
+    return ret;
 }
 
 
diff --git a/bcftools/vcfbuf.h b/bcftools/vcfbuf.h
index 9ede5b5..d3be6c5 100644
--- a/bcftools/vcfbuf.h
+++ b/bcftools/vcfbuf.h
@@ -1,6 +1,6 @@
 /* The MIT License
 
-   Copyright (c) 2017-2019 Genome Research Ltd.
+   Copyright (c) 2017-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -38,13 +38,18 @@ typedef struct _vcfbuf_t vcfbuf_t;
 // Modes of operation
 typedef enum
 {
-    VCFBUF_LD_MAX,          // vcfbuf_max_ld() stops at the first record that exceeds the threshold
-    VCFBUF_RAND_MISSING,    // randomize rather than ignore missing genotypes
-    VCFBUF_SKIP_FILTER,     // skip sites with FILTER diferent from "PASS" or "."
-    VCFBUF_NSITES,          // leave at max this many sites in the window
-    VCFBUF_AF_TAG,          // use this INFO tag with LD_NSITES
     VCFBUF_OVERLAP_WIN,     // keep only overlapping variants in the window
     VCFBUF_RMDUP,           // remove duplicate sites (completely)
+    VCFBUF_NSITES,          // leave at max this many sites in the window
+    VCFBUF_NSITES_MODE,     // one of: maxAF (keep sites with max AF), 1st (sites that come first), rand (pick randomly)
+    VCFBUF_AF_TAG,          // use this INFO tag with VCFBUF_NSITES
+
+    // LD related options
+    LD_RAND_MISSING,        // randomize rather than ignore missing genotypes
+    LD_FILTER1,             // exclude the next record inserted by vcfbuf_push() from LD analysis
+    LD_MAX_R2,              // If set, vcfbuf_ld() will stop at the first record that exceeds the R2,
+    LD_MAX_LD,              //      LD, or HD threshold. When multiple are set, the OR logic is applied
+    LD_MAX_HD,              //      
 }
 vcfbuf_opt_t;
 
@@ -61,9 +66,8 @@ void vcfbuf_destroy(vcfbuf_t *buf);
 
 /*
  *  vcfbuf_push() - push a new site for analysis
- *  @swap:  if set, do not create a copy, but return a substitute
  */
-bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec, int swap);
+bcf1_t *vcfbuf_push(vcfbuf_t *buf, bcf1_t *rec);
 
 /*
  *  vcfbuf_peek() - return pointer to i-th record in the buffer but do not remove it from the buffer
@@ -85,10 +89,28 @@ bcf1_t *vcfbuf_flush(vcfbuf_t *buf, int flush_all);
 int vcfbuf_nsites(vcfbuf_t *buf);
 
 /*
- *  vcfbuf_max_ld() - return a record that has maximum D or first record exceeding the threshold
- *  @ld:        will be filled with the maximum D found
+ *  vcfbuf_ld() - find records with maximum LD values or the values in first record that exceeds thresholds
+ *                set by vcfbuf_set_opt(..,LD_MAX*,..)
+ *
+ *  Returns 0 on success or -1 if no values were filled.
+ *
+ *  @val:  will be filled with the values
+ *          .. correlation coefficient r-squared
+ *          .. Lewontin's D' (PMID: 19433632)
+ *          .. Ragsdale's \hat{D} (doi:10.1093/molbev/msz265)
+ *  @rec: corresponding positions or NULL if the value(s) has not been set
  */
-bcf1_t *vcfbuf_max_ld(vcfbuf_t *buf, bcf1_t *rec, double *ld);
+#define VCFBUF_LD_N 3
+#define VCFBUF_LD_IDX_R2 0
+#define VCFBUF_LD_IDX_LD 1
+#define VCFBUF_LD_IDX_HD 2
+typedef struct
+{
+    double val[VCFBUF_LD_N];    // r2, ld, hd
+    bcf1_t *rec[VCFBUF_LD_N];   // record with max r2, ld, hd
+}
+vcfbuf_ld_t;
+int vcfbuf_ld(vcfbuf_t *buf, bcf1_t *rec, vcfbuf_ld_t *ld);
 
 #endif
 
diff --git a/bcftools/vcfcall.c b/bcftools/vcfcall.c
index f546542..e2aab3f 100644
--- a/bcftools/vcfcall.c
+++ b/bcftools/vcfcall.c
@@ -1,6 +1,6 @@
 /*  vcfcall.c -- SNP/indel variant calling from VCF/BCF.
 
-    Copyright (C) 2013-2016 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -189,6 +190,11 @@ static ploidy_predef_t ploidy_predefs[] =
       .ploidy =
           "*  * *     * 1\n"
     },
+    { .alias  = "2",
+      .about  = "Treat all samples as diploid",
+      .ploidy =
+          "*  * *     * 2\n"
+    },
     {
         .alias  = NULL,
         .about  = NULL,
@@ -536,7 +542,7 @@ bcf1_t *next_line(args_t *args)
             bcf_unpack(rec, BCF_UN_STR);
             if ( !rec0 ) rec0 = rec;
             recN = rec;
-            args->aux.srs->readers[0].buffer[0] = vcfbuf_push(args->vcfbuf, rec, 1);
+            args->aux.srs->readers[0].buffer[0] = vcfbuf_push(args->vcfbuf, rec);
             if ( rec0->rid!=recN->rid || rec0->pos!=recN->pos ) break;
         }
     }
@@ -611,7 +617,7 @@ static void init_data(args_t *args)
     // Open files for input and output, initialize structures
     if ( args->targets )
     {
-        args->tgt_idx = regidx_init(args->targets, tgt_parse, args->aux.flag&CALL_CONSTR_ALLELES ? tgt_free : NULL, sizeof(tgt_als_t), args->aux.flag&CALL_CONSTR_ALLELES ? args : NULL);
+        args->tgt_idx = regidx_init(args->targets, tgt_parse, args->aux.flag&CALL_CONSTR_ALLELES ? tgt_free : (regidx_free_f) NULL, sizeof(tgt_als_t), args->aux.flag&CALL_CONSTR_ALLELES ? args : NULL);
         args->tgt_itr = regitr_init(args->tgt_idx);
         args->tgt_itr_tmp = regitr_init(args->tgt_idx);
     }
@@ -686,7 +692,7 @@ static void init_data(args_t *args)
     if ( args->aux.flag & CALL_CONSTR_ALLELES )
         args->vcfbuf = vcfbuf_init(args->aux.hdr, 0);
 
-    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Error: cannot write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
 
@@ -768,7 +774,20 @@ void parse_novel_rate(args_t *args, const char *str)
     else error("Could not parse --novel-rate %s\n", str);
 }
 
-static int parse_format_flag(const char *str)
+static void list_annotations(FILE *fp)
+{
+    fprintf(fp,
+        "\n"
+        "Optional INFO annotations available with -m (\"INFO/\" prefix is optional):\n"
+        "  INFO/PV4   .. P-values for strand bias, baseQ bias, mapQ bias and tail distance bias (Number=4,Type=Float)\n"
+        "\n"
+        "Optional FORMAT annotations available with -m (\"FORMAT/\" prefix is optional):\n"
+        "  FORMAT/GQ  .. Phred-scaled genotype quality (Number=1,Type=Integer)\n"
+        "  FORMAT/GP  .. Phred-scaled genotype posterior probabilities (Number=G,Type=Float)\n"
+        "\n");
+}
+
+static int parse_output_tags(const char *str)
 {
     int flag = 0;
     const char *ss = str;
@@ -776,8 +795,9 @@ static int parse_format_flag(const char *str)
     {
         const char *se = ss;
         while ( *se && *se!=',' ) se++;
-        if ( !strncasecmp(ss,"GQ",se-ss) ) flag |= CALL_FMT_GQ;
-        else if ( !strncasecmp(ss,"GP",se-ss) ) flag |= CALL_FMT_GP;
+        if ( !strncasecmp(ss,"GQ",se-ss) || !strncasecmp(ss,"FORMAT/GQ",se-ss) || !strncasecmp(ss,"FMT/GQ",se-ss)  ) flag |= CALL_FMT_GQ;
+        else if ( !strncasecmp(ss,"GP",se-ss) || !strncasecmp(ss,"FORMAT/GP",se-ss) || !strncasecmp(ss,"FMT/GP",se-ss) ) flag |= CALL_FMT_GP;
+        else if ( !strncasecmp(ss,"PV4",se-ss) || !strncasecmp(ss,"INFO/PV4",se-ss) ) flag |= CALL_FMT_PV4;
         else
         {
             fprintf(stderr,"Could not parse \"%s\"\n", str);
@@ -856,41 +876,46 @@ static void usage(args_t *args)
     fprintf(stderr, "Usage:   bcftools call [options] <in.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "File format options:\n");
-    fprintf(stderr, "       --no-version                do not append version and command line to the header\n");
-    fprintf(stderr, "   -o, --output <file>             write output to a file [standard output]\n");
-    fprintf(stderr, "   -O, --output-type <b|u|z|v>     output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(stderr, "       --ploidy <assembly>[?]      predefined ploidy, 'list' to print available settings, append '?' for details\n");
-    fprintf(stderr, "       --ploidy-file <file>        space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
-    fprintf(stderr, "   -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(stderr, "   -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(stderr, "   -s, --samples <list>            list of samples to include [all samples]\n");
-    fprintf(stderr, "   -S, --samples-file <file>       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
-    fprintf(stderr, "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(stderr, "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "       --threads <int>             use multithreading with <int> worker threads [0]\n");
+    fprintf(stderr, "       --no-version              Do not append version and command line to the header\n");
+    fprintf(stderr, "   -o, --output FILE             Write output to a file [standard output]\n");
+    fprintf(stderr, "   -O, --output-type b|u|z|v     Output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(stderr, "       --ploidy ASSEMBLY[?]      Predefined ploidy, 'list' to print available settings, append '?' for details [2]\n");
+    fprintf(stderr, "       --ploidy-file FILE        Space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
+    fprintf(stderr, "   -r, --regions REGION          Restrict to comma-separated list of regions\n");
+    fprintf(stderr, "   -R, --regions-file FILE       Restrict to regions listed in a file\n");
+    fprintf(stderr, "   -s, --samples LIST            List of samples to include [all samples]\n");
+    fprintf(stderr, "   -S, --samples-file FILE       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
+    fprintf(stderr, "   -t, --targets REGION          Similar to -r but streams rather than index-jumps\n");
+    fprintf(stderr, "   -T, --targets-file FILE       Similar to -R but streams rather than index-jumps\n");
+    fprintf(stderr, "       --threads INT             Use multithreading with INT worker threads [0]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Input/output options:\n");
-    fprintf(stderr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
-    fprintf(stderr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
-    fprintf(stderr, "   -F, --prior-freqs <AN,AC>       use prior allele frequencies\n");
-    fprintf(stderr, "   -G, --group-samples <file|->    group samples by population (file with \"sample\\tgroup\") or \"-\" for single-sample calling\n");
-    fprintf(stderr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
-    fprintf(stderr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
-    fprintf(stderr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
-    fprintf(stderr, "   -V, --skip-variants <type>      skip indels/snps\n");
-    fprintf(stderr, "   -v, --variants-only             output variant sites only\n");
+    fprintf(stderr, "   -A, --keep-alts               Keep all possible alternate alleles at variant sites\n");
+    fprintf(stderr, "   -a, --annotate LIST           Optional tags to output (lowercase allowed); '?' to list available tags\n");
+//todo?    
+//    fprintf(stderr, "   -a, --annots LIST             Add annotations: GQ,GP,PV4 (lowercase allowed). Prefixed with ^ indicates a request for\n");
+//    fprintf(stderr, "                                 tag removal [^I16,^QS,^FMT/QS]\n");
+    fprintf(stderr, "   -F, --prior-freqs AN,AC       Use prior allele frequencies, determined from these pre-filled tags\n");
+    fprintf(stderr, "   -G, --group-samples FILE|-    Group samples by population (file with \"sample\\tgroup\") or \"-\" for single-sample calling.\n");
+    fprintf(stderr, "                                 This requires FORMAT/QS or other Number=R,Type=Integer tag such as FORMAT/AD\n"); 
+    fprintf(stderr, "       --group-samples-tag TAG   The tag to use with -G, by default FORMAT/QS and FORMAT/AD are checked automatically\n");
+    fprintf(stderr, "   -g, --gvcf INT,[...]          Group non-variant sites into gVCF blocks by minimum per-sample DP\n");
+    fprintf(stderr, "   -i, --insert-missed           Output also sites missed by mpileup but present in -T\n");
+    fprintf(stderr, "   -M, --keep-masked-ref         Keep sites with masked reference allele (REF=N)\n");
+    fprintf(stderr, "   -V, --skip-variants TYPE      Skip indels/snps\n");
+    fprintf(stderr, "   -v, --variants-only           Output variant sites only\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Consensus/variant calling options:\n");
-    fprintf(stderr, "   -c, --consensus-caller          the original calling method (conflicts with -m)\n");
-    fprintf(stderr, "   -C, --constrain <str>           one of: alleles, trio (see manual)\n");
-    fprintf(stderr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
-    fprintf(stderr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
-    fprintf(stderr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
-    fprintf(stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
+    fprintf(stderr, "   -c, --consensus-caller        The original calling method (conflicts with -m)\n");
+    fprintf(stderr, "   -C, --constrain STR           One of: alleles, trio (see manual)\n");
+    fprintf(stderr, "   -m, --multiallelic-caller     Alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
+    fprintf(stderr, "   -n, --novel-rate FLOAT,[...]  Likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
+    fprintf(stderr, "   -p, --pval-threshold FLOAT    Variant if P(ref|D)<FLOAT with -c [0.5]\n");
+    fprintf(stderr, "   -P, --prior FLOAT             Mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Example:\n");
     fprintf(stderr, "   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n");
-    fprintf(stderr, "   bcftools mpileup -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n");
+    fprintf(stderr, "   bcftools mpileup -Ou -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n");
 
     // todo (and more)
     // fprintf(stderr, "\nContrast calling and association test options:\n");
@@ -927,9 +952,11 @@ int main_vcfcall(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"format-fields",required_argument,NULL,'f'},
+        {"annotate",required_argument,NULL,'a'},
         {"prior-freqs",required_argument,NULL,'F'},
         {"gvcf",required_argument,NULL,'g'},
         {"group-samples",required_argument,NULL,'G'},
+        {"group-samples-tag",required_argument,NULL,3},
         {"output",required_argument,NULL,'o'},
         {"output-type",required_argument,NULL,'O'},
         {"regions",required_argument,NULL,'r'},
@@ -960,7 +987,7 @@ int main_vcfcall(int argc, char *argv[])
     };
 
     char *tmp = NULL;
-    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XYF:G:", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:a:ig:XYF:G:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
@@ -969,7 +996,12 @@ int main_vcfcall(int argc, char *argv[])
             case 'X': ploidy = "X"; fprintf(stderr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
             case 'Y': ploidy = "Y"; fprintf(stderr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
             case 'G': args.aux.sample_groups = optarg; break;
-            case 'f': args.aux.output_tags |= parse_format_flag(optarg); break;
+            case  3 : args.aux.sample_groups_tag = optarg; break;
+            case 'f': fprintf(stderr,"Warning: -f, --format-fields will be deprecated, please use -a, --annotate instead.\n");
+            case 'a':
+                      if (optarg[0]=='?') { list_annotations(stderr); return 1; }
+                      args.aux.output_tags |= parse_output_tags(optarg);
+                      break;
             case 'M': args.flag &= ~CF_ACGT_ONLY; break;     // keep sites where REF is N
             case 'N': args.flag |= CF_ACGT_ONLY; break;      // omit sites where first base in REF is N (the new default)
             case 'A': args.aux.flag |= CALL_KEEPALT; break;
diff --git a/bcftools/vcfcall.c.pysam.c b/bcftools/vcfcall.c.pysam.c
index 8caf510..b5bedb9 100644
--- a/bcftools/vcfcall.c.pysam.c
+++ b/bcftools/vcfcall.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfcall.c -- SNP/indel variant calling from VCF/BCF.
 
-    Copyright (C) 2013-2016 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -191,6 +192,11 @@ static ploidy_predef_t ploidy_predefs[] =
       .ploidy =
           "*  * *     * 1\n"
     },
+    { .alias  = "2",
+      .about  = "Treat all samples as diploid",
+      .ploidy =
+          "*  * *     * 2\n"
+    },
     {
         .alias  = NULL,
         .about  = NULL,
@@ -538,7 +544,7 @@ bcf1_t *next_line(args_t *args)
             bcf_unpack(rec, BCF_UN_STR);
             if ( !rec0 ) rec0 = rec;
             recN = rec;
-            args->aux.srs->readers[0].buffer[0] = vcfbuf_push(args->vcfbuf, rec, 1);
+            args->aux.srs->readers[0].buffer[0] = vcfbuf_push(args->vcfbuf, rec);
             if ( rec0->rid!=recN->rid || rec0->pos!=recN->pos ) break;
         }
     }
@@ -613,7 +619,7 @@ static void init_data(args_t *args)
     // Open files for input and output, initialize structures
     if ( args->targets )
     {
-        args->tgt_idx = regidx_init(args->targets, tgt_parse, args->aux.flag&CALL_CONSTR_ALLELES ? tgt_free : NULL, sizeof(tgt_als_t), args->aux.flag&CALL_CONSTR_ALLELES ? args : NULL);
+        args->tgt_idx = regidx_init(args->targets, tgt_parse, args->aux.flag&CALL_CONSTR_ALLELES ? tgt_free : (regidx_free_f) NULL, sizeof(tgt_als_t), args->aux.flag&CALL_CONSTR_ALLELES ? args : NULL);
         args->tgt_itr = regitr_init(args->tgt_idx);
         args->tgt_itr_tmp = regitr_init(args->tgt_idx);
     }
@@ -688,7 +694,7 @@ static void init_data(args_t *args)
     if ( args->aux.flag & CALL_CONSTR_ALLELES )
         args->vcfbuf = vcfbuf_init(args->aux.hdr, 0);
 
-    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Error: cannot write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
 
@@ -770,7 +776,20 @@ void parse_novel_rate(args_t *args, const char *str)
     else error("Could not parse --novel-rate %s\n", str);
 }
 
-static int parse_format_flag(const char *str)
+static void list_annotations(FILE *fp)
+{
+    fprintf(fp,
+        "\n"
+        "Optional INFO annotations available with -m (\"INFO/\" prefix is optional):\n"
+        "  INFO/PV4   .. P-values for strand bias, baseQ bias, mapQ bias and tail distance bias (Number=4,Type=Float)\n"
+        "\n"
+        "Optional FORMAT annotations available with -m (\"FORMAT/\" prefix is optional):\n"
+        "  FORMAT/GQ  .. Phred-scaled genotype quality (Number=1,Type=Integer)\n"
+        "  FORMAT/GP  .. Phred-scaled genotype posterior probabilities (Number=G,Type=Float)\n"
+        "\n");
+}
+
+static int parse_output_tags(const char *str)
 {
     int flag = 0;
     const char *ss = str;
@@ -778,12 +797,13 @@ static int parse_format_flag(const char *str)
     {
         const char *se = ss;
         while ( *se && *se!=',' ) se++;
-        if ( !strncasecmp(ss,"GQ",se-ss) ) flag |= CALL_FMT_GQ;
-        else if ( !strncasecmp(ss,"GP",se-ss) ) flag |= CALL_FMT_GP;
+        if ( !strncasecmp(ss,"GQ",se-ss) || !strncasecmp(ss,"FORMAT/GQ",se-ss) || !strncasecmp(ss,"FMT/GQ",se-ss)  ) flag |= CALL_FMT_GQ;
+        else if ( !strncasecmp(ss,"GP",se-ss) || !strncasecmp(ss,"FORMAT/GP",se-ss) || !strncasecmp(ss,"FMT/GP",se-ss) ) flag |= CALL_FMT_GP;
+        else if ( !strncasecmp(ss,"PV4",se-ss) || !strncasecmp(ss,"INFO/PV4",se-ss) ) flag |= CALL_FMT_PV4;
         else
         {
             fprintf(bcftools_stderr,"Could not parse \"%s\"\n", str);
-            exit(1);
+            bcftools_exit(1);
         }
         if ( !*se ) break;
         ss = se + 1;
@@ -837,12 +857,12 @@ ploidy_t *init_ploidy(char *alias)
         fprintf(bcftools_stderr,"Run as --ploidy <alias> (e.g. --ploidy GRCh37).\n");
         fprintf(bcftools_stderr,"To see the detailed ploidy definition, append a question mark (e.g. --ploidy GRCh37?).\n");
         fprintf(bcftools_stderr,"\n");
-        exit(-1);
+        bcftools_exit(-1);
     }
     else if ( detailed )
     {
         fprintf(bcftools_stderr,"%s", pld->ploidy);
-        exit(-1);
+        bcftools_exit(-1);
     }
     return ploidy_init_string(pld->ploidy,2);
 }
@@ -858,41 +878,46 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "Usage:   bcftools call [options] <in.vcf.gz>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "File format options:\n");
-    fprintf(bcftools_stderr, "       --no-version                do not append version and command line to the header\n");
-    fprintf(bcftools_stderr, "   -o, --output <file>             write output to a file [standard output]\n");
-    fprintf(bcftools_stderr, "   -O, --output-type <b|u|z|v>     output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(bcftools_stderr, "       --ploidy <assembly>[?]      predefined ploidy, 'list' to print available settings, append '?' for details\n");
-    fprintf(bcftools_stderr, "       --ploidy-file <file>        space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
-    fprintf(bcftools_stderr, "   -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(bcftools_stderr, "   -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(bcftools_stderr, "   -s, --samples <list>            list of samples to include [all samples]\n");
-    fprintf(bcftools_stderr, "   -S, --samples-file <file>       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
-    fprintf(bcftools_stderr, "   -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "   -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "       --threads <int>             use multithreading with <int> worker threads [0]\n");
+    fprintf(bcftools_stderr, "       --no-version              Do not append version and command line to the header\n");
+    fprintf(bcftools_stderr, "   -o, --output FILE             Write output to a file [standard output]\n");
+    fprintf(bcftools_stderr, "   -O, --output-type b|u|z|v     Output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(bcftools_stderr, "       --ploidy ASSEMBLY[?]      Predefined ploidy, 'list' to print available settings, append '?' for details [2]\n");
+    fprintf(bcftools_stderr, "       --ploidy-file FILE        Space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
+    fprintf(bcftools_stderr, "   -r, --regions REGION          Restrict to comma-separated list of regions\n");
+    fprintf(bcftools_stderr, "   -R, --regions-file FILE       Restrict to regions listed in a file\n");
+    fprintf(bcftools_stderr, "   -s, --samples LIST            List of samples to include [all samples]\n");
+    fprintf(bcftools_stderr, "   -S, --samples-file FILE       PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
+    fprintf(bcftools_stderr, "   -t, --targets REGION          Similar to -r but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "   -T, --targets-file FILE       Similar to -R but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "       --threads INT             Use multithreading with INT worker threads [0]\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Input/output options:\n");
-    fprintf(bcftools_stderr, "   -A, --keep-alts                 keep all possible alternate alleles at variant sites\n");
-    fprintf(bcftools_stderr, "   -f, --format-fields <list>      output format fields: GQ,GP (lowercase allowed) []\n");
-    fprintf(bcftools_stderr, "   -F, --prior-freqs <AN,AC>       use prior allele frequencies\n");
-    fprintf(bcftools_stderr, "   -G, --group-samples <file|->    group samples by population (file with \"sample\\tgroup\") or \"-\" for single-sample calling\n");
-    fprintf(bcftools_stderr, "   -g, --gvcf <int>,[...]          group non-variant sites into gVCF blocks by minimum per-sample DP\n");
-    fprintf(bcftools_stderr, "   -i, --insert-missed             output also sites missed by mpileup but present in -T\n");
-    fprintf(bcftools_stderr, "   -M, --keep-masked-ref           keep sites with masked reference allele (REF=N)\n");
-    fprintf(bcftools_stderr, "   -V, --skip-variants <type>      skip indels/snps\n");
-    fprintf(bcftools_stderr, "   -v, --variants-only             output variant sites only\n");
+    fprintf(bcftools_stderr, "   -A, --keep-alts               Keep all possible alternate alleles at variant sites\n");
+    fprintf(bcftools_stderr, "   -a, --annotate LIST           Optional tags to output (lowercase allowed); '?' to list available tags\n");
+//todo?    
+//    fprintf(bcftools_stderr, "   -a, --annots LIST             Add annotations: GQ,GP,PV4 (lowercase allowed). Prefixed with ^ indicates a request for\n");
+//    fprintf(bcftools_stderr, "                                 tag removal [^I16,^QS,^FMT/QS]\n");
+    fprintf(bcftools_stderr, "   -F, --prior-freqs AN,AC       Use prior allele frequencies, determined from these pre-filled tags\n");
+    fprintf(bcftools_stderr, "   -G, --group-samples FILE|-    Group samples by population (file with \"sample\\tgroup\") or \"-\" for single-sample calling.\n");
+    fprintf(bcftools_stderr, "                                 This requires FORMAT/QS or other Number=R,Type=Integer tag such as FORMAT/AD\n"); 
+    fprintf(bcftools_stderr, "       --group-samples-tag TAG   The tag to use with -G, by default FORMAT/QS and FORMAT/AD are checked automatically\n");
+    fprintf(bcftools_stderr, "   -g, --gvcf INT,[...]          Group non-variant sites into gVCF blocks by minimum per-sample DP\n");
+    fprintf(bcftools_stderr, "   -i, --insert-missed           Output also sites missed by mpileup but present in -T\n");
+    fprintf(bcftools_stderr, "   -M, --keep-masked-ref         Keep sites with masked reference allele (REF=N)\n");
+    fprintf(bcftools_stderr, "   -V, --skip-variants TYPE      Skip indels/snps\n");
+    fprintf(bcftools_stderr, "   -v, --variants-only           Output variant sites only\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Consensus/variant calling options:\n");
-    fprintf(bcftools_stderr, "   -c, --consensus-caller          the original calling method (conflicts with -m)\n");
-    fprintf(bcftools_stderr, "   -C, --constrain <str>           one of: alleles, trio (see manual)\n");
-    fprintf(bcftools_stderr, "   -m, --multiallelic-caller       alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
-    fprintf(bcftools_stderr, "   -n, --novel-rate <float>,[...]  likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
-    fprintf(bcftools_stderr, "   -p, --pval-threshold <float>    variant if P(ref|D)<FLOAT with -c [0.5]\n");
-    fprintf(bcftools_stderr, "   -P, --prior <float>             mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
+    fprintf(bcftools_stderr, "   -c, --consensus-caller        The original calling method (conflicts with -m)\n");
+    fprintf(bcftools_stderr, "   -C, --constrain STR           One of: alleles, trio (see manual)\n");
+    fprintf(bcftools_stderr, "   -m, --multiallelic-caller     Alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
+    fprintf(bcftools_stderr, "   -n, --novel-rate FLOAT,[...]  Likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
+    fprintf(bcftools_stderr, "   -p, --pval-threshold FLOAT    Variant if P(ref|D)<FLOAT with -c [0.5]\n");
+    fprintf(bcftools_stderr, "   -P, --prior FLOAT             Mutation rate (use bigger for greater sensitivity), use with -m [1.1e-3]\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Example:\n");
     fprintf(bcftools_stderr, "   # See also http://samtools.github.io/bcftools/howtos/variant-calling.html\n");
-    fprintf(bcftools_stderr, "   bcftools mpileup -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n");
+    fprintf(bcftools_stderr, "   bcftools mpileup -Ou -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf\n");
 
     // todo (and more)
     // fprintf(bcftools_stderr, "\nContrast calling and association test options:\n");
@@ -901,7 +926,7 @@ static void usage(args_t *args)
     // fprintf(bcftools_stderr, "       -U INT    number of permutations for association testing (effective with -1) [0]\n");
     // fprintf(bcftools_stderr, "       -X FLOAT  only perform permutations for P(chi^2)<FLOAT [%g]\n", args->aux.min_perm_p);
     fprintf(bcftools_stderr, "\n");
-    exit(-1);
+    bcftools_exit(-1);
 }
 
 int main_vcfcall(int argc, char *argv[])
@@ -929,9 +954,11 @@ int main_vcfcall(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"format-fields",required_argument,NULL,'f'},
+        {"annotate",required_argument,NULL,'a'},
         {"prior-freqs",required_argument,NULL,'F'},
         {"gvcf",required_argument,NULL,'g'},
         {"group-samples",required_argument,NULL,'G'},
+        {"group-samples-tag",required_argument,NULL,3},
         {"output",required_argument,NULL,'o'},
         {"output-type",required_argument,NULL,'O'},
         {"regions",required_argument,NULL,'r'},
@@ -962,7 +989,7 @@ int main_vcfcall(int argc, char *argv[])
     };
 
     char *tmp = NULL;
-    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:ig:XYF:G:", loptions, NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "h?o:O:r:R:s:S:t:T:ANMV:vcmp:C:n:P:f:a:ig:XYF:G:", loptions, NULL)) >= 0)
     {
         switch (c)
         {
@@ -971,7 +998,12 @@ int main_vcfcall(int argc, char *argv[])
             case 'X': ploidy = "X"; fprintf(bcftools_stderr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
             case 'Y': ploidy = "Y"; fprintf(bcftools_stderr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
             case 'G': args.aux.sample_groups = optarg; break;
-            case 'f': args.aux.output_tags |= parse_format_flag(optarg); break;
+            case  3 : args.aux.sample_groups_tag = optarg; break;
+            case 'f': fprintf(bcftools_stderr,"Warning: -f, --format-fields will be deprecated, please use -a, --annotate instead.\n");
+            case 'a':
+                      if (optarg[0]=='?') { list_annotations(bcftools_stderr); return 1; }
+                      args.aux.output_tags |= parse_output_tags(optarg);
+                      break;
             case 'M': args.flag &= ~CF_ACGT_ONLY; break;     // keep sites where REF is N
             case 'N': args.flag |= CF_ACGT_ONLY; break;      // omit sites where first base in REF is N (the new default)
             case 'A': args.aux.flag |= CALL_KEEPALT; break;
diff --git a/bcftools/vcfcnv.c b/bcftools/vcfcnv.c
index 2d8a94c..02d610d 100644
--- a/bcftools/vcfcnv.c
+++ b/bcftools/vcfcnv.c
@@ -32,6 +32,7 @@
 
 #include <stdio.h>
 #include <unistd.h>
+#include <assert.h>
 #include <getopt.h>
 #include <math.h>
 #include <inttypes.h>
diff --git a/bcftools/vcfcnv.c.pysam.c b/bcftools/vcfcnv.c.pysam.c
index 21b9e9d..d74486d 100644
--- a/bcftools/vcfcnv.c.pysam.c
+++ b/bcftools/vcfcnv.c.pysam.c
@@ -34,6 +34,7 @@
 
 #include <stdio.h>
 #include <unistd.h>
+#include <assert.h>
 #include <getopt.h>
 #include <math.h>
 #include <inttypes.h>
@@ -1236,7 +1237,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "    -P, --same-prob <float>            prior probability of -s/-c being the same [0.5]\n");
     fprintf(bcftools_stderr, "    -x, --xy-prob <float>              P(x|y) transition probability [1e-9]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfcnv(int argc, char *argv[])
diff --git a/bcftools/vcfconcat.c b/bcftools/vcfconcat.c
index dce17f9..0781a60 100644
--- a/bcftools/vcfconcat.c
+++ b/bcftools/vcfconcat.c
@@ -1,6 +1,6 @@
 /*  vcfconcat.c -- Concatenate or combine VCF/BCF files.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
 #include <unistd.h>
 #include <getopt.h>
 #include <string.h>
+#include <assert.h>
 #include <errno.h>
 #include <math.h>
 #include <inttypes.h>
@@ -115,7 +116,7 @@ static void init_data(args_t *args)
         bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
     }
     if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
-    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->allow_overlaps || args->phased_concat )
     {
@@ -154,6 +155,7 @@ static void init_data(args_t *args)
             else if ( !strcmp(args->remove_dups,"any") ) args->files->collapse |= COLLAPSE_ANY;
             else if ( !strcmp(args->remove_dups,"all") ) args->files->collapse |= COLLAPSE_ANY;
             else if ( !strcmp(args->remove_dups,"none") ) args->files->collapse = COLLAPSE_NONE;
+            else if ( !strcmp(args->remove_dups,"exact") ) args->files->collapse = COLLAPSE_NONE;
             else error("The -D string \"%s\" not recognised.\n", args->remove_dups);
         }
         for (i=0; i<args->nfnames; i++)
@@ -233,6 +235,7 @@ static void phase_update(args_t *args, bcf_hdr_t *hdr, bcf1_t *rec)
         if ( !args->swap_phase[i] ) continue;
         int *gt = &args->GTa[i*2];
         if ( bcf_gt_is_missing(gt[0]) || gt[1]==bcf_int32_vector_end ) continue;
+        if ( !bcf_gt_is_phased(gt[1]) ) continue;
         SWAP(int, gt[0], gt[1]);
         gt[1] |= 1;
     }
@@ -845,8 +848,8 @@ static void usage(args_t *args)
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "   -a, --allow-overlaps           First coordinate of the next file can precede last record of the current file.\n");
     fprintf(stderr, "   -c, --compact-PS               Do not output PS tag at each site, only at the start of a new phase set block.\n");
-    fprintf(stderr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
-    fprintf(stderr, "   -D, --remove-duplicates        Alias for -d none\n");
+    fprintf(stderr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|exact>\n");
+    fprintf(stderr, "   -D, --remove-duplicates        Alias for -d exact\n");
     fprintf(stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
     fprintf(stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
     fprintf(stderr, "       --no-version               Do not append version and command line to the header\n");
@@ -903,7 +906,7 @@ int main_vcfconcat(int argc, char *argv[])
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; args->regions_is_file = 1; break;
             case 'd': args->remove_dups = optarg; break;
-            case 'D': args->remove_dups = "none"; break;
+            case 'D': args->remove_dups = "exact"; break;
             case 'q': 
                 args->min_PQ = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
diff --git a/bcftools/vcfconcat.c.pysam.c b/bcftools/vcfconcat.c.pysam.c
index 0004a55..0cd061e 100644
--- a/bcftools/vcfconcat.c.pysam.c
+++ b/bcftools/vcfconcat.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfconcat.c -- Concatenate or combine VCF/BCF files.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -28,6 +28,7 @@ THE SOFTWARE.  */
 #include <unistd.h>
 #include <getopt.h>
 #include <string.h>
+#include <assert.h>
 #include <errno.h>
 #include <math.h>
 #include <inttypes.h>
@@ -117,7 +118,7 @@ static void init_data(args_t *args)
         bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
     }
     if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
-    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->allow_overlaps || args->phased_concat )
     {
@@ -156,6 +157,7 @@ static void init_data(args_t *args)
             else if ( !strcmp(args->remove_dups,"any") ) args->files->collapse |= COLLAPSE_ANY;
             else if ( !strcmp(args->remove_dups,"all") ) args->files->collapse |= COLLAPSE_ANY;
             else if ( !strcmp(args->remove_dups,"none") ) args->files->collapse = COLLAPSE_NONE;
+            else if ( !strcmp(args->remove_dups,"exact") ) args->files->collapse = COLLAPSE_NONE;
             else error("The -D string \"%s\" not recognised.\n", args->remove_dups);
         }
         for (i=0; i<args->nfnames; i++)
@@ -235,6 +237,7 @@ static void phase_update(args_t *args, bcf_hdr_t *hdr, bcf1_t *rec)
         if ( !args->swap_phase[i] ) continue;
         int *gt = &args->GTa[i*2];
         if ( bcf_gt_is_missing(gt[0]) || gt[1]==bcf_int32_vector_end ) continue;
+        if ( !bcf_gt_is_phased(gt[1]) ) continue;
         SWAP(int, gt[0], gt[1]);
         gt[1] |= 1;
     }
@@ -847,8 +850,8 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "Options:\n");
     fprintf(bcftools_stderr, "   -a, --allow-overlaps           First coordinate of the next file can precede last record of the current file.\n");
     fprintf(bcftools_stderr, "   -c, --compact-PS               Do not output PS tag at each site, only at the start of a new phase set block.\n");
-    fprintf(bcftools_stderr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
-    fprintf(bcftools_stderr, "   -D, --remove-duplicates        Alias for -d none\n");
+    fprintf(bcftools_stderr, "   -d, --rm-dups <string>         Output duplicate records present in multiple files only once: <snps|indels|both|all|exact>\n");
+    fprintf(bcftools_stderr, "   -D, --remove-duplicates        Alias for -d exact\n");
     fprintf(bcftools_stderr, "   -f, --file-list <file>         Read the list of files from a file.\n");
     fprintf(bcftools_stderr, "   -l, --ligate                   Ligate phased VCFs by matching phase at overlapping haplotypes\n");
     fprintf(bcftools_stderr, "       --no-version               Do not append version and command line to the header\n");
@@ -862,7 +865,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "       --threads <int>            Use multithreading with <int> worker threads [0]\n");
     fprintf(bcftools_stderr, "   -v, --verbose <0|1>            Set verbosity level [1]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfconcat(int argc, char *argv[])
@@ -905,7 +908,7 @@ int main_vcfconcat(int argc, char *argv[])
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; args->regions_is_file = 1; break;
             case 'd': args->remove_dups = optarg; break;
-            case 'D': args->remove_dups = "none"; break;
+            case 'D': args->remove_dups = "exact"; break;
             case 'q': 
                 args->min_PQ = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
diff --git a/bcftools/vcfconvert.c b/bcftools/vcfconvert.c
index 445a894..a48e85c 100644
--- a/bcftools/vcfconvert.c
+++ b/bcftools/vcfconvert.c
@@ -1,6 +1,6 @@
 /*  vcfconvert.c -- convert between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -68,7 +68,7 @@ struct _args_t
     int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
     char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
     char *outfname, *infname, *ref_fname, *sex_fname;
-    int argc, n_threads, record_cmd_line;
+    int argc, n_threads, record_cmd_line, keep_duplicates;
 };
 
 static void destroy_data(args_t *args)
@@ -153,6 +153,15 @@ static int tsv_setter_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
     if ( ss==se+1 ) error("Could not parse POS in CHROM:POS_REF_ALT: %s\n", tsv->ss);
     rec->pos--;
 
+    // ID
+    if ( args->output_vcf_ids )
+    {
+        char tmp = *tsv->se;
+        *tsv->se = 0;
+        bcf_update_id(args->header, rec, tsv->ss);
+        *tsv->se = tmp;
+    }
+
     // REF,ALT
     args->str.l = 0;
     se = ++ss;
@@ -385,7 +394,7 @@ static void gensample_to_vcf(args_t *args)
     for (i=0; i<nsamples; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write the header to %s\n", __func__,args->outfname);
@@ -513,7 +522,7 @@ static void haplegendsample_to_vcf(args_t *args)
     for (i=0; i<nrows; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write the header to %s\n", __func__,args->outfname);
@@ -627,7 +636,7 @@ static void hapsample_to_vcf(args_t *args)
     for (i=0; i<nsamples; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->outfname);
@@ -802,7 +811,7 @@ static void vcf_to_gensample(args_t *args)
         }
 
         // skip duplicate lines, or otherwise shapeit complains
-        if ( prev_rid==line->rid && prev_pos==line->pos ) { ndup++; continue; }
+        if ( !args->keep_duplicates && prev_rid==line->rid && prev_pos==line->pos ) { ndup++; continue; }
         prev_rid = line->rid;
         prev_pos = line->pos;
 
@@ -977,7 +986,7 @@ static void vcf_to_hapsample(args_t *args)
     if ( args->output_vcf_ids )
         kputs("%CHROM %ID %POS %REF %FIRST_ALT ", &str);
     else
-        kputs("%CHROM %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT ", &str);
+        kputs("%CHROM:%POS\\_%REF\\_%FIRST_ALT %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT ", &str);
     
     if ( args->hap2dip )
         kputs("%_GT_TO_HAP2\n", &str);
@@ -994,7 +1003,7 @@ static void vcf_to_hapsample(args_t *args)
     if ( n_files==1 )
     {
         int l = str.l;
-        kputs(".sample",&str);
+        kputs(".samples",&str);
         sample_fname = strdup(str.s);
         str.l = l;
         kputs(".hap.gz",&str);
@@ -1215,7 +1224,7 @@ static void tsv_to_vcf(args_t *args)
     bcf_hdr_add_sample(args->header, NULL);
     args->gts = (int32_t *) malloc(sizeof(int32_t)*n*2);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->outfname);
@@ -1267,7 +1276,7 @@ static void tsv_to_vcf(args_t *args)
 static void vcf_to_vcf(args_t *args)
 {
     open_vcf(args,NULL);
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
 
@@ -1296,7 +1305,7 @@ static void gvcf_to_vcf(args_t *args)
     if ( !args->ref ) error("Could not load the fai index for reference %s\n", args->ref_fname);
 
     open_vcf(args,NULL);
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
 
@@ -1395,6 +1404,7 @@ static void usage(void)
     fprintf(stderr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
     fprintf(stderr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
     fprintf(stderr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+    fprintf(stderr, "       --keep-duplicates       keep duplicate positions\n");
     fprintf(stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(stderr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
     fprintf(stderr, "\n");
@@ -1473,12 +1483,17 @@ int main_vcfconvert(int argc, char *argv[])
         {"columns",required_argument,NULL,'c'},
         {"fasta-ref",required_argument,NULL,'f'},
         {"no-version",no_argument,NULL,10},
+        {"keep-duplicates",no_argument,NULL,12},
         {NULL,0,NULL,0}
     };
     while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
         switch (c) {
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; args->regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
@@ -1512,6 +1527,7 @@ int main_vcfconvert(int argc, char *argv[])
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 10 : args->record_cmd_line = 0; break;
             case 11 : args->sex_fname = optarg; break;
+            case 12 : args->keep_duplicates = 1; break;
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcfconvert.c.pysam.c b/bcftools/vcfconvert.c.pysam.c
index abdfbec..358e404 100644
--- a/bcftools/vcfconvert.c.pysam.c
+++ b/bcftools/vcfconvert.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfconvert.c -- convert between VCF/BCF and related formats.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -70,7 +70,7 @@ struct _args_t
     int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
     char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
     char *outfname, *infname, *ref_fname, *sex_fname;
-    int argc, n_threads, record_cmd_line;
+    int argc, n_threads, record_cmd_line, keep_duplicates;
 };
 
 static void destroy_data(args_t *args)
@@ -155,6 +155,15 @@ static int tsv_setter_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
     if ( ss==se+1 ) error("Could not parse POS in CHROM:POS_REF_ALT: %s\n", tsv->ss);
     rec->pos--;
 
+    // ID
+    if ( args->output_vcf_ids )
+    {
+        char tmp = *tsv->se;
+        *tsv->se = 0;
+        bcf_update_id(args->header, rec, tsv->ss);
+        *tsv->se = tmp;
+    }
+
     // REF,ALT
     args->str.l = 0;
     se = ++ss;
@@ -387,7 +396,7 @@ static void gensample_to_vcf(args_t *args)
     for (i=0; i<nsamples; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write the header to %s\n", __func__,args->outfname);
@@ -515,7 +524,7 @@ static void haplegendsample_to_vcf(args_t *args)
     for (i=0; i<nrows; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write the header to %s\n", __func__,args->outfname);
@@ -629,7 +638,7 @@ static void hapsample_to_vcf(args_t *args)
     for (i=0; i<nsamples; i++) free(samples[i]);
     free(samples);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->outfname);
@@ -804,7 +813,7 @@ static void vcf_to_gensample(args_t *args)
         }
 
         // skip duplicate lines, or otherwise shapeit complains
-        if ( prev_rid==line->rid && prev_pos==line->pos ) { ndup++; continue; }
+        if ( !args->keep_duplicates && prev_rid==line->rid && prev_pos==line->pos ) { ndup++; continue; }
         prev_rid = line->rid;
         prev_pos = line->pos;
 
@@ -979,7 +988,7 @@ static void vcf_to_hapsample(args_t *args)
     if ( args->output_vcf_ids )
         kputs("%CHROM %ID %POS %REF %FIRST_ALT ", &str);
     else
-        kputs("%CHROM %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT ", &str);
+        kputs("%CHROM:%POS\\_%REF\\_%FIRST_ALT %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT ", &str);
     
     if ( args->hap2dip )
         kputs("%_GT_TO_HAP2\n", &str);
@@ -996,7 +1005,7 @@ static void vcf_to_hapsample(args_t *args)
     if ( n_files==1 )
     {
         int l = str.l;
-        kputs(".sample",&str);
+        kputs(".samples",&str);
         sample_fname = strdup(str.s);
         str.l = l;
         kputs(".hap.gz",&str);
@@ -1217,7 +1226,7 @@ static void tsv_to_vcf(args_t *args)
     bcf_hdr_add_sample(args->header, NULL);
     args->gts = (int32_t *) malloc(sizeof(int32_t)*n*2);
 
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
     if ( bcf_hdr_write(out_fh,args->header)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->outfname);
@@ -1269,7 +1278,7 @@ static void tsv_to_vcf(args_t *args)
 static void vcf_to_vcf(args_t *args)
 {
     open_vcf(args,NULL);
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
 
@@ -1298,7 +1307,7 @@ static void gvcf_to_vcf(args_t *args)
     if ( !args->ref ) error("Could not load the fai index for reference %s\n", args->ref_fname);
 
     open_vcf(args,NULL);
-    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode(args->output_type));
+    htsFile *out_fh = hts_open(args->outfname,hts_bcf_wmode2(args->output_type,args->outfname));
     if ( out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->outfname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
 
@@ -1397,6 +1406,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "   -g, --gensample <...>       <prefix>|<gen-file>,<sample-file>\n");
     fprintf(bcftools_stderr, "       --tag <string>          tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
     fprintf(bcftools_stderr, "       --chrom                 output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+    fprintf(bcftools_stderr, "       --keep-duplicates       keep duplicate positions\n");
     fprintf(bcftools_stderr, "       --sex <file>            output sex column in the sample-file, input format is: Sample\\t[MF]\n");
     fprintf(bcftools_stderr, "       --vcf-ids               output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
     fprintf(bcftools_stderr, "\n");
@@ -1433,7 +1443,7 @@ static void usage(void)
     // fprintf(bcftools_stderr, "PBWT options:\n");
     // fprintf(bcftools_stderr, "   -b, --pbwt          <prefix> or <pbwt>,<sites>,<sample>,<missing>\n");
     // fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfconvert(int argc, char *argv[])
@@ -1475,12 +1485,17 @@ int main_vcfconvert(int argc, char *argv[])
         {"columns",required_argument,NULL,'c'},
         {"fasta-ref",required_argument,NULL,'f'},
         {"no-version",no_argument,NULL,10},
+        {"keep-duplicates",no_argument,NULL,12},
         {NULL,0,NULL,0}
     };
     while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
         switch (c) {
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; args->regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
@@ -1514,6 +1529,7 @@ int main_vcfconvert(int argc, char *argv[])
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 10 : args->record_cmd_line = 0; break;
             case 11 : args->sex_fname = optarg; break;
+            case 12 : args->keep_duplicates = 1; break;
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
         }
diff --git a/bcftools/vcffilter.c b/bcftools/vcffilter.c
index 257ee3f..723bcdf 100644
--- a/bcftools/vcffilter.c
+++ b/bcftools/vcffilter.c
@@ -1,6 +1,6 @@
 /*  vcffilter.c -- Apply fixed-threshold filters.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,8 +25,10 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -60,7 +62,8 @@ typedef struct _args_t
     char *soft_filter;  // drop failed sites or annotate FILTER column?
     int annot_mode;     // add to existing FILTER annotation or replace? Otherwise reset FILTER to PASS or leave as it is?
     int flt_fail, flt_pass;     // BCF ids of fail and pass filters
-    int snp_gap, indel_gap, IndelGap_id, SnpGap_id;
+    int snp_gap, snp_gap_type, indel_gap, IndelGap_id, SnpGap_id;
+    char *snp_gap_str;
     int32_t ntmpi, *tmpi, ntmp_ac, *tmp_ac;
     rbuf_t rbuf;
     bcf1_t **rbuf_lines;
@@ -77,7 +80,7 @@ args_t;
 
 static void init_data(args_t *args)
 {
-    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
 
@@ -138,7 +141,7 @@ static void init_data(args_t *args)
         args->rbuf_lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
         if ( args->snp_gap )
         {
-            bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of an indel\">", args->snp_gap);
+            bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of %s\">", args->snp_gap,args->snp_gap_str);
             args->SnpGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "SnpGap");
             assert( args->SnpGap_id>=0 );
         }
@@ -217,9 +220,9 @@ static void buffered_filters(args_t *args, bcf1_t *line)
      */
 
     // To avoid additional data structure, we abuse bcf1_t's var and var_type records.
-    const int SnpGap_set     = VCF_OTHER<<1;
-    const int IndelGap_set   = VCF_OTHER<<2;
-    const int IndelGap_flush = VCF_OTHER<<3;
+    const int SnpGap_set     = 1 << (8*sizeof(int)/2);
+    const int IndelGap_set   = 1 << (8*sizeof(int)/2-1);
+    const int IndelGap_flush = 1 << (8*sizeof(int)/2-2);
 
     int var_type = 0, i;
     if ( line )
@@ -245,15 +248,8 @@ static void buffered_filters(args_t *args, bcf1_t *line)
         // output REF=CAGAGAGAGA, ALT=CAGAGAGAGAGA where REF=C,ALT=CGA could be
         // used. This filter is therefore more strict and may remove some valid
         // SNPs.
-        int len = 1;
-        if ( var_type & VCF_INDEL )
-        {
-            for (i=1; i<line->n_allele; i++)
-                if ( len < 1-line->d.var[i].n ) len = 1-line->d.var[i].n;
-        }
-
         // Set the REF allele's length to max deletion length or to 1 if a SNP or an insertion.
-        line->d.var[0].n = len;
+        line->d.var[0].n = line->rlen;
     }
 
     int k_flush = 1;
@@ -328,13 +324,13 @@ static void buffered_filters(args_t *args, bcf1_t *line)
             int rec_to  = rec->pos + rec->d.var[0].n - 1;   // last position affected by the variant
             if ( rec_to + args->snp_gap < last_from )
                 j_flush++;
-            else if ( (var_type & VCF_INDEL) && (rec->d.var_type & VCF_SNP) && !(rec->d.var_type & SnpGap_set) )
+            else if ( (var_type & args->snp_gap_type) && (rec->d.var_type & VCF_SNP) && !(rec->d.var_type & SnpGap_set) )
             {
                 // this SNP has not been SnpGap-filtered yet
                 rec->d.var_type |= SnpGap_set;
                 bcf_add_filter(args->hdr, rec, args->SnpGap_id);
             }
-            else if ( (var_type & VCF_SNP) && (rec->d.var_type & VCF_INDEL) )
+            else if ( (var_type & VCF_SNP) && (rec->d.var_type & args->snp_gap_type) )
             {
                 // the line which we are adding is a SNP and needs to be filtered
                 line->d.var_type |= SnpGap_set;
@@ -413,7 +409,7 @@ static void usage(args_t *args)
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "    -e, --exclude <expr>          exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "    -g, --SnpGap <int>            filter SNPs within <int> base pairs of an indel\n");
+    fprintf(stderr, "    -g, --SnpGap <int>[:type]     filter SNPs within <int> base pairs of an indel (the default) or any combination of indel,mnp,bnd,other,overlap\n");
     fprintf(stderr, "    -G, --IndelGap <int>          filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
     fprintf(stderr, "    -i, --include <expr>          include only sites for which the expression is true (see man page for details\n");
     fprintf(stderr, "    -m, --mode [+x]               \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
@@ -465,9 +461,31 @@ int main_vcffilter(int argc, char *argv[])
     char *tmp;
     while ((c = getopt_long(argc, argv, "e:i:t:T:r:R:h?s:m:o:O:g:G:S:",loptions,NULL)) >= 0) {
         switch (c) {
-            case 'g': 
+            case 'g':
                 args->snp_gap = strtol(optarg,&tmp,10); 
-                if ( *tmp ) error("Could not parse argument: --SnpGap %s\n", optarg);
+                if ( *tmp && *tmp!=':' ) error("Could not parse argument: --SnpGap %s\n", optarg);
+                if ( *tmp==':' )
+                {
+                    args->snp_gap_str = tmp+1;
+                    int i,n;
+                    char **keys = hts_readlist(tmp+1,0,&n);
+                    for(i=0; i<n; i++)
+                    {
+                        if ( !strcasecmp(keys[i],"indel") ) args->snp_gap_type |= VCF_INDEL;
+                        else if ( !strcasecmp(keys[i],"mnp") ) args->snp_gap_type |= VCF_MNP;
+                        else if ( !strcasecmp(keys[i],"bnd") ) args->snp_gap_type |= VCF_BND;
+                        else if ( !strcasecmp(keys[i],"other") ) args->snp_gap_type |= VCF_OTHER;
+                        else if ( !strcasecmp(keys[i],"overlap") ) args->snp_gap_type |= VCF_OVERLAP;
+                        else error("Could not parse \"%s\" in \"--SnpGap %s\"\n", keys[i], optarg);
+                        free(keys[i]);
+                    }
+                    if ( n ) free(keys);
+                }
+                else
+                {
+                    args->snp_gap_type = VCF_INDEL;
+                    args->snp_gap_str = "indel";
+                }
                 break;
             case 'G':
                 args->indel_gap = strtol(optarg,&tmp,10);
@@ -492,8 +510,12 @@ int main_vcffilter(int argc, char *argv[])
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'S':
                 if ( !strcmp(".",optarg) ) args->set_gts = SET_GTS_MISSING;
                 else if ( !strcmp("0",optarg) ) args->set_gts = SET_GTS_REF;
diff --git a/bcftools/vcffilter.c.pysam.c b/bcftools/vcffilter.c.pysam.c
index 908c3b4..5709182 100644
--- a/bcftools/vcffilter.c.pysam.c
+++ b/bcftools/vcffilter.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcffilter.c -- Apply fixed-threshold filters.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,8 +27,10 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -62,7 +64,8 @@ typedef struct _args_t
     char *soft_filter;  // drop failed sites or annotate FILTER column?
     int annot_mode;     // add to existing FILTER annotation or replace? Otherwise reset FILTER to PASS or leave as it is?
     int flt_fail, flt_pass;     // BCF ids of fail and pass filters
-    int snp_gap, indel_gap, IndelGap_id, SnpGap_id;
+    int snp_gap, snp_gap_type, indel_gap, IndelGap_id, SnpGap_id;
+    char *snp_gap_str;
     int32_t ntmpi, *tmpi, ntmp_ac, *tmp_ac;
     rbuf_t rbuf;
     bcf1_t **rbuf_lines;
@@ -79,7 +82,7 @@ args_t;
 
 static void init_data(args_t *args)
 {
-    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
 
@@ -140,7 +143,7 @@ static void init_data(args_t *args)
         args->rbuf_lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
         if ( args->snp_gap )
         {
-            bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of an indel\">", args->snp_gap);
+            bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of %s\">", args->snp_gap,args->snp_gap_str);
             args->SnpGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "SnpGap");
             assert( args->SnpGap_id>=0 );
         }
@@ -219,9 +222,9 @@ static void buffered_filters(args_t *args, bcf1_t *line)
      */
 
     // To avoid additional data structure, we abuse bcf1_t's var and var_type records.
-    const int SnpGap_set     = VCF_OTHER<<1;
-    const int IndelGap_set   = VCF_OTHER<<2;
-    const int IndelGap_flush = VCF_OTHER<<3;
+    const int SnpGap_set     = 1 << (8*sizeof(int)/2);
+    const int IndelGap_set   = 1 << (8*sizeof(int)/2-1);
+    const int IndelGap_flush = 1 << (8*sizeof(int)/2-2);
 
     int var_type = 0, i;
     if ( line )
@@ -247,15 +250,8 @@ static void buffered_filters(args_t *args, bcf1_t *line)
         // output REF=CAGAGAGAGA, ALT=CAGAGAGAGAGA where REF=C,ALT=CGA could be
         // used. This filter is therefore more strict and may remove some valid
         // SNPs.
-        int len = 1;
-        if ( var_type & VCF_INDEL )
-        {
-            for (i=1; i<line->n_allele; i++)
-                if ( len < 1-line->d.var[i].n ) len = 1-line->d.var[i].n;
-        }
-
         // Set the REF allele's length to max deletion length or to 1 if a SNP or an insertion.
-        line->d.var[0].n = len;
+        line->d.var[0].n = line->rlen;
     }
 
     int k_flush = 1;
@@ -330,13 +326,13 @@ static void buffered_filters(args_t *args, bcf1_t *line)
             int rec_to  = rec->pos + rec->d.var[0].n - 1;   // last position affected by the variant
             if ( rec_to + args->snp_gap < last_from )
                 j_flush++;
-            else if ( (var_type & VCF_INDEL) && (rec->d.var_type & VCF_SNP) && !(rec->d.var_type & SnpGap_set) )
+            else if ( (var_type & args->snp_gap_type) && (rec->d.var_type & VCF_SNP) && !(rec->d.var_type & SnpGap_set) )
             {
                 // this SNP has not been SnpGap-filtered yet
                 rec->d.var_type |= SnpGap_set;
                 bcf_add_filter(args->hdr, rec, args->SnpGap_id);
             }
-            else if ( (var_type & VCF_SNP) && (rec->d.var_type & VCF_INDEL) )
+            else if ( (var_type & VCF_SNP) && (rec->d.var_type & args->snp_gap_type) )
             {
                 // the line which we are adding is a SNP and needs to be filtered
                 line->d.var_type |= SnpGap_set;
@@ -415,7 +411,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
     fprintf(bcftools_stderr, "    -e, --exclude <expr>          exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "    -g, --SnpGap <int>            filter SNPs within <int> base pairs of an indel\n");
+    fprintf(bcftools_stderr, "    -g, --SnpGap <int>[:type]     filter SNPs within <int> base pairs of an indel (the default) or any combination of indel,mnp,bnd,other,overlap\n");
     fprintf(bcftools_stderr, "    -G, --IndelGap <int>          filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
     fprintf(bcftools_stderr, "    -i, --include <expr>          include only sites for which the expression is true (see man page for details\n");
     fprintf(bcftools_stderr, "    -m, --mode [+x]               \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
@@ -430,7 +426,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "    -T, --targets-file <file>     similar to -R but streams rather than index-jumps\n");
     fprintf(bcftools_stderr, "        --threads <int>           use multithreading with <int> worker threads [0]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcffilter(int argc, char *argv[])
@@ -467,9 +463,31 @@ int main_vcffilter(int argc, char *argv[])
     char *tmp;
     while ((c = getopt_long(argc, argv, "e:i:t:T:r:R:h?s:m:o:O:g:G:S:",loptions,NULL)) >= 0) {
         switch (c) {
-            case 'g': 
+            case 'g':
                 args->snp_gap = strtol(optarg,&tmp,10); 
-                if ( *tmp ) error("Could not parse argument: --SnpGap %s\n", optarg);
+                if ( *tmp && *tmp!=':' ) error("Could not parse argument: --SnpGap %s\n", optarg);
+                if ( *tmp==':' )
+                {
+                    args->snp_gap_str = tmp+1;
+                    int i,n;
+                    char **keys = hts_readlist(tmp+1,0,&n);
+                    for(i=0; i<n; i++)
+                    {
+                        if ( !strcasecmp(keys[i],"indel") ) args->snp_gap_type |= VCF_INDEL;
+                        else if ( !strcasecmp(keys[i],"mnp") ) args->snp_gap_type |= VCF_MNP;
+                        else if ( !strcasecmp(keys[i],"bnd") ) args->snp_gap_type |= VCF_BND;
+                        else if ( !strcasecmp(keys[i],"other") ) args->snp_gap_type |= VCF_OTHER;
+                        else if ( !strcasecmp(keys[i],"overlap") ) args->snp_gap_type |= VCF_OVERLAP;
+                        else error("Could not parse \"%s\" in \"--SnpGap %s\"\n", keys[i], optarg);
+                        free(keys[i]);
+                    }
+                    if ( n ) free(keys);
+                }
+                else
+                {
+                    args->snp_gap_type = VCF_INDEL;
+                    args->snp_gap_str = "indel";
+                }
                 break;
             case 'G':
                 args->indel_gap = strtol(optarg,&tmp,10);
@@ -494,8 +512,12 @@ int main_vcffilter(int argc, char *argv[])
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'S':
                 if ( !strcmp(".",optarg) ) args->set_gts = SET_GTS_MISSING;
                 else if ( !strcmp("0",optarg) ) args->set_gts = SET_GTS_REF;
diff --git a/bcftools/vcfgtcheck.c b/bcftools/vcfgtcheck.c
index 8bf3223..8a96e3e 100644
--- a/bcftools/vcfgtcheck.c
+++ b/bcftools/vcfgtcheck.c
@@ -1,6 +1,6 @@
 /*  vcfgtcheck.c -- Check sample identity.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -26,8 +26,10 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -35,240 +37,46 @@ THE SOFTWARE.  */
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <htslib/kbitset.h>
+#include <htslib/hts_os.h>
 #include <inttypes.h>
+#include <sys/time.h>
 #include "bcftools.h"
-#include "hclust.h"
+#include "extsort.h"
+//#include "hclust.h"
 
 typedef struct
 {
-    bcf_srs_t *files;           // first reader is the query VCF - single sample normally or multi-sample for cross-check
-    bcf_hdr_t *gt_hdr, *sm_hdr; // VCF with genotypes to compare against and the query VCF
-    int ntmp_arr, npl_arr;
-    int32_t *tmp_arr, *pl_arr;
-    double *lks, *sites, min_inter_err, max_intra_err;
-    int *cnts, *dps, hom_only, cross_check, all_sites;
-    char *cwd, **argv, *gt_fname, *plot, *query_sample, *target_sample;
-    int argc, no_PLs, narr, nsmpl;
-}
-args_t;
-
-FILE *open_file(char **fname, const char *mode, const char *fmt, ...);
-char *msprintf(const char *fmt, ...);
-void mkdir_p(const char *fmt, ...);
-
-void py_plot(char *script)
-{
-    mkdir_p(script);
-    int len = strlen(script);
-    char *cmd = !strcmp(".py",script+len-3) ? msprintf("python %s", script) : msprintf("python %s.py", script);
-    int ret = system(cmd);
-    if ( ret ) fprintf(stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
-    free(cmd);
-}
-
-static void plot_check(args_t *args, char *target_sample, char *query_sample)
-{
-    char *fname;
-    FILE *fp = open_file(&fname, "w", "%s.py", args->plot);
-    fprintf(fp,
-            "import matplotlib as mpl\n"
-            "mpl.use('Agg')\n"
-            "import matplotlib.pyplot as plt\n"
-            "import matplotlib.gridspec as gridspec\n"
-            "import csv\n"
-            "csv.register_dialect('tab', delimiter='\\t', quoting=csv.QUOTE_NONE)\n"
-            "\n"
-            "sample_ids = False\n"
-            "\n"
-            "dat = []\n"
-            "with open('%s.tab', 'r') as f:\n"
-            "    reader = csv.reader(f, 'tab')\n"
-            "    for row in reader:\n"
-            "        if row[0][0]=='#': continue\n"
-            "        if row[0]!='CN': continue\n"
-            "        tgt = 0\n"
-            "        if row[4]=='%s': tgt = 1\n"
-            "        dat.append([float(row[1]), float(row[2]), float(row[3]), tgt, row[4]])\n"
-            "\n"
-            "dat = sorted(dat)\n"
-            "\n"
-            "iq = -1; dp = 0\n"
-            "for i in range(len(dat)):\n"
-            "    if iq==-1 and dat[i][3]==1: iq = i\n"
-            "    dp += dat[i][2]\n"
-            "dp /= len(dat)\n"
-            "\n"
-            "fig,ax1 = plt.subplots(figsize=(8,5))\n"
-            "ax2 = ax1.twinx()\n"
-            "plots  = ax1.plot([x[0] for x in dat],'o-', ms=3, color='g', mec='g', label='Discordance (total)')\n"
-            "plots += ax1.plot([x[1] for x in dat], '^', ms=3, color='r', mec='r', label='Discordance (avg per site)')\n"
-            "plots += ax2.plot([x[2] for x in dat],'v', ms=3, color='k', label='Number of sites')\n"
-            "if iq!=-1:\n"
-            "   ax1.plot([iq],[dat[iq][0]],'o',color='orange', ms=9)\n"
-            "   ax1.annotate('%s',xy=(iq,dat[iq][0]), xytext=(5,5), textcoords='offset points',fontsize='xx-small',rotation=45,va='bottom',ha='left')\n"
-            "   ax1.plot([iq],[dat[iq][1]],'^',color='red', ms=5)\n"
-            "for tl in ax1.get_yticklabels(): tl.set_color('g')\n"
-            "for tl in ax2.get_yticklabels(): tl.set_color('k'); tl.set_fontsize(9)\n"
-            "min_dp = min([x[2] for x in dat])\n"
-            "max_dp = max([x[2] for x in dat])\n"
-            "ax2.set_ylim(min_dp-1,max_dp+1)\n"
-            "ax1.set_title('Discordance with %s')\n"
-            "ax1.set_xlim(-0.05*len(dat),1.05*(len(dat)-1))\n"
-            "ax1.set_xlabel('Sample ID')\n"
-            "plt.subplots_adjust(left=0.1,right=0.9,bottom=0.1,top=0.9)\n"
-            "if sample_ids:\n"
-            "   ax1.set_xticks(range(len(dat)))\n"
-            "   ax1.set_xticklabels([x[4] for x in dat],**{'rotation':45, 'ha':'right', 'fontsize':8})\n"
-            "   plt.subplots_adjust(bottom=0.2)\n"
-            "ax1.set_ylabel('Discordance',color='g')\n"
-            "ax2.set_ylabel('Number of sites',color='k')\n"
-            "ax2.ticklabel_format(style='sci', scilimits=(-3,2), axis='y')\n"
-            "ax1.ticklabel_format(style='sci', scilimits=(-3,2), axis='y')\n"
-            "labels = [l.get_label() for l in plots]\n"
-            "plt.legend(plots,labels,numpoints=1,markerscale=1,loc='best',prop={'size':10},frameon=False)\n"
-            "plt.savefig('%s.png')\n"
-            "plt.close()\n"
-            "\n", args->plot, target_sample, target_sample, query_sample, args->plot
-           );
-    fclose(fp);
-    py_plot(fname);
-    free(fname);
-}
-
-#if 0
-static void plot_cross_check(args_t *args)
-{
-    char *fname;
-    FILE *fp = open_file(&fname, "w", "%s.py", args->plot);
-    fprintf(fp,
-            "import matplotlib as mpl\n"
-            "mpl.use('Agg')\n"
-            "import matplotlib.pyplot as plt\n"
-            "import matplotlib.gridspec as gridspec\n"
-            "import csv\n"
-            "csv.register_dialect('tab', delimiter='\\t', quoting=csv.QUOTE_NONE)\n"
-            "avg   = []\n"
-            "dp    = []\n"
-            "sm2id = {}\n"
-            "dat   = None\n"
-            "min   = None\n"
-            "max   = None\n"
-            "with open('%s.tab', 'r') as f:\n"
-            "   reader = csv.reader(f, 'tab')\n"
-            "   i = 0\n"
-            "   for row in reader:\n"
-            "       if row[0]=='SM':\n"
-            "           sm2id[row[4]] = i\n"
-            "           avg.append([i,float(row[1])])\n"
-            "           dp.append([i,float(row[2])])\n"
-            "           i += 1\n"
-            "       elif row[0]=='CN':\n"
-            "           val = 0\n"
-            "           if int(row[2])!=0: val = float(row[1])/int(row[2])\n"
-            "           if not dat:\n"
-            "               dat = [[0]*len(sm2id) for x in xrange(len(sm2id))]\n"
-            "               min = val\n"
-            "               max = val\n"
-            "           id_i = sm2id[row[4]]\n"
-            "           id_j = sm2id[row[5]]\n"
-            "           dat[id_i][id_j] = val\n"
-            "           dat[id_j][id_i] = val\n"
-            "           if min > val: min = val\n"
-            "           if max < val: max = val\n"
-            "\n"
-            "if len(sm2id)<=1: exit(1)\n"
-            "if min==max: exit(1)\n"
-            "\n"
-            "fig = plt.figure(figsize=(6,7))\n"
-            "gs  = gridspec.GridSpec(2, 1, height_ratios=[1, 1.5])\n"
-            "ax1 = plt.subplot(gs[0])\n"
-            "ax2 = plt.subplot(gs[1])\n"
-            "\n"
-            "ax1.plot([x[0] for x in avg],[x[1] for x in avg],'^-', ms=3, color='k')\n"
-            "ax3 = ax1.twinx()\n"
-            "ax3.plot([x[0] for x in dp],[x[1] for x in dp],'^-', ms=3, color='r',mec='r')\n"
-            "for tl in ax3.get_yticklabels():\n"
-            "   tl.set_color('r')\n"
-            "   tl.set_fontsize(9)\n"
-            "\n"
-            "im = ax2.imshow(dat,clim=(min),interpolation='nearest',origin='lower')\n"
-            "cb1  = plt.colorbar(im,ax=ax2)\n"
-            "cb1.set_label('Pairwise discordance')\n"
-            "for t in cb1.ax.get_yticklabels(): t.set_fontsize(9)\n"
-            "\n"
-            "ax1.tick_params(axis='both', which='major', labelsize=9)\n"
-            "ax1.tick_params(axis='both', which='minor', labelsize=9)\n"
-            "ax2.tick_params(axis='both', which='major', labelsize=9)\n"
-            "ax2.tick_params(axis='both', which='minor', labelsize=9)\n"
-            "\n"
-            "ax1.set_title('Sample Discordance Score')\n"
-            "ax2.set_ylabel('Sample ID')\n"
-            "ax2.set_xlabel('Sample ID')\n"
-            "ax3.set_ylabel('Average Depth',color='r')\n"
-            "ax1.set_xlabel('Sample ID')\n"
-            "ax1.set_ylabel('Average discordance')\n"
-            "\n"
-            "plt.subplots_adjust(left=0.15,right=0.87,bottom=0.08,top=0.93,hspace=0.25)\n"
-            "plt.savefig('%s.png')\n"
-            "plt.close()\n"
-            "\n", args->plot,args->plot
-           );
-    fclose(fp);
-    py_plot(fname);
-    free(fname);
-}
-#endif
-
-static void init_data(args_t *args)
-{
-    args->sm_hdr = args->files->readers[0].header;
-    if ( !bcf_hdr_nsamples(args->sm_hdr) ) error("No samples in %s?\n", args->files->readers[0].fname);
-
-    if ( !args->cross_check )
-    {
-        args->gt_hdr = args->files->readers[1].header;
-        int nsamples = bcf_hdr_nsamples(args->gt_hdr);
-        if ( !nsamples ) error("No samples in %s?\n", args->files->readers[1].fname);
-        args->lks   = (double*) calloc(nsamples,sizeof(double));
-        args->cnts  = (int*) calloc(nsamples,sizeof(int));
-        args->sites = (double*) calloc(nsamples,sizeof(double));
-        args->dps   = (int*) calloc(nsamples,sizeof(int));
-    }
+    int iqry, igt;
 }
+pair_t;
 
-static void destroy_data(args_t *args)
-{
-    free(args->lks); free(args->cnts); free(args->dps); free(args->cwd); free(args->sites);
-}
-
-static int allele_to_int(bcf1_t *line, char *allele)
+typedef struct
 {
-    int i;
-    for (i=0; i<line->n_allele; i++)
-        if ( !strcmp(allele,line->d.allele[i]) ) return i;
-    if ( strcmp(line->d.allele[i-1],"X") ) return -1;
-    return i-1;
-}
+    bcf_srs_t *files;           // first reader is the query VCF - single sample normally or multi-sample for cross-check
+    bcf_hdr_t *gt_hdr, *qry_hdr; // VCF with genotypes to compare against and the query VCF
+    char *cwd, **argv, *gt_samples, *qry_samples, *regions, *targets, *qry_fname, *gt_fname, *pair_samples;
+    int argc, gt_samples_is_file, qry_samples_is_file, regions_is_file, targets_is_file, pair_samples_is_file;
+    int qry_use_GT,gt_use_GT, nqry_smpl,ngt_smpl, *qry_smpl,*gt_smpl;
+    double *pdiff, *qry_prob, *gt_prob;
+    uint32_t *ndiff,*ncnt,ncmp, npairs;
+    int32_t *qry_arr,*gt_arr, nqry_arr,ngt_arr;
+    uint8_t *qry_dsg, *gt_dsg;
+    pair_t *pairs;
+    double *hwe_prob, dsg2prob[8][3], pl2prob[256];
+    double min_inter_err, max_intra_err;
+    int all_sites, hom_only, ntop, cross_check, calc_hwe_prob, sort_by_hwe, dry_run, use_PLs;
+    FILE *fp;
+    unsigned int nskip_no_match, nskip_not_ba, nskip_mono, nskip_no_data, nskip_dip_GT, nskip_dip_PL;
 
-static int init_gt2ipl(args_t *args, bcf1_t *gt_line, bcf1_t *sm_line, int *gt2ipl, int n_gt2ipl)
-{
-    int i, j;
-    for (i=0; i<n_gt2ipl; i++) gt2ipl[i] = -1;
-    for (i=0; i<gt_line->n_allele; i++)
-    {
-        // find which of the sm_alleles (k) corresponds to the gt_allele (i)
-        int k = allele_to_int(sm_line, gt_line->d.allele[i]);
-        if ( k<0 ) return 0;
-        for (j=0; j<=i; j++)
-        {
-            int l = allele_to_int(sm_line, gt_line->d.allele[j]);
-            if ( l<0 ) return 0;
-            gt2ipl[ bcf_ij2G(j,i) ] = k<=l ? bcf_ij2G(k,l) : bcf_ij2G(l,k);
-        }
-    }
-    //for (i=0; i<n_gt2ipl; i++) printf("%d .. %d\n", i,gt2ipl[i]);
-    return 1;
+    // for --distinctive-sites
+    double distinctive_sites;
+    kbitset_t *kbs_diff;
+    size_t diff_sites_size;
+    extsort_t *es;
+    char *es_tmp_prefix, *es_max_mem;
 }
+args_t;
 
 static void set_cwd(args_t *args)
 {
@@ -284,7 +92,6 @@ static void set_cwd(args_t *args)
     }
     assert(buf);
 }
-
 static void print_header(args_t *args, FILE *fp)
 {
     fprintf(fp, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version());
@@ -296,413 +103,920 @@ static void print_header(args_t *args, FILE *fp)
     fprintf(fp, "# \t %s\n#\n", args->cwd);
 }
 
-static int fake_PLs(args_t *args, bcf_hdr_t *hdr, bcf1_t *line)
+static int cmp_int(const void *_a, const void *_b)
 {
-    // PLs not present, use GTs instead.
-    int fake_PL = args->no_PLs ? args->no_PLs : 99;    // with 1, discordance is the number of non-matching GTs
-    int nsm_gt, i;
-    if ( (nsm_gt=bcf_get_genotypes(hdr, line, &args->tmp_arr, &args->ntmp_arr)) <= 0 )
-        error("GT not present at %s:%"PRId64"?\n", hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) line->pos+1);
-    nsm_gt /= bcf_hdr_nsamples(hdr);
-    int npl = line->n_allele*(line->n_allele+1)/2;
-    hts_expand(int,npl*bcf_hdr_nsamples(hdr),args->npl_arr,args->pl_arr);
-    for (i=0; i<bcf_hdr_nsamples(hdr); i++)
-    {
-        int *gt_ptr = args->tmp_arr + i*nsm_gt;
-        int j, *pl_ptr = args->pl_arr + i*npl;
-        if ( bcf_gt_is_missing(gt_ptr[0]) || bcf_gt_is_missing(gt_ptr[1]) ) // missing genotype
-        {
-            for (j=0; j<npl; j++) pl_ptr[j] = -1;
-        }
-        else
-        {
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            for (j=0; j<npl; j++) pl_ptr[j] = fake_PL;
-            int idx = bcf_alleles2gt(a,b);
-            pl_ptr[idx] = 0;
-        }
-    }
-    return npl;
+    int a = *((int*)_a);
+    int b = *((int*)_b);
+    if ( a < b ) return -1;
+    if ( a > b ) return 1;
+    return 0;
+}
+static int cmp_pair(const void *_a, const void *_b)
+{
+    pair_t *a = (pair_t*)_a;
+    pair_t *b = (pair_t*)_b;
+    if ( a->iqry < b->iqry ) return -1;
+    if ( a->iqry > b->iqry ) return 1;
+    if ( a->igt < b->igt ) return -1;
+    if ( a->igt > b->igt ) return 1;
+    return 0;
 }
 
-static int cmp_doubleptr(const void *_a, const void *_b)
+typedef struct
+{
+    uint32_t ndiff,rid,pos,rand; // rand is to shuffle sites with the same ndiff from across all chromosoms
+    unsigned long kbs_dat[1];
+}
+diff_sites_t;
+#if DBG
+static void diff_sites_debug_print(args_t *args, diff_sites_t *ds)
+{
+    int i;
+    memcpy(args->kbs_diff->b,ds->kbs_dat,args->kbs_diff->n*sizeof(unsigned long));
+    fprintf(stderr,"%s:%d\t%d\t",bcf_hdr_id2name(args->qry_hdr,ds->rid),ds->pos+1,ds->ndiff);
+    for (i=0; i<args->npairs; i++) fprintf(stderr,"%d",kbs_exists(args->kbs_diff,i)?1:0);
+    fprintf(stderr,"\n");
+}
+#endif
+static int diff_sites_cmp(const void *aptr, const void *bptr)
+{
+    diff_sites_t *a = *((diff_sites_t**)aptr);
+    diff_sites_t *b = *((diff_sites_t**)bptr);
+    if ( a->ndiff < b->ndiff ) return 1;        // descending order
+    if ( a->ndiff > b->ndiff ) return -1;
+    if ( a->rand < b->rand ) return -1;
+    if ( a->rand > b->rand ) return 1;
+    return 0;
+}
+static void diff_sites_init(args_t *args)
+{
+    int nsites = args->distinctive_sites<=1 ? args->npairs*args->distinctive_sites : args->distinctive_sites;
+    if ( nsites<=0 ) error("The value for --distinctive-sites was set too low: %d\n",nsites);
+    if ( nsites > args->npairs )
+    {
+        fprintf(stderr,"Warning: The value for --distinctive-sites is bigger than is the number of pairs, all discordant sites be printed.\n");
+        nsites = args->npairs;
+        args->distinctive_sites = args->npairs + 1;
+    }
+    else
+        args->distinctive_sites = nsites;
+    args->kbs_diff = kbs_init(args->npairs);
+    size_t n = (args->npairs + KBS_ELTBITS-1) / KBS_ELTBITS;
+    assert( n==args->kbs_diff->n );
+    args->diff_sites_size = sizeof(diff_sites_t) + (n-1)*sizeof(unsigned long);
+    args->es = extsort_alloc();
+    extsort_set_opt(args->es,size_t,DAT_SIZE,args->diff_sites_size);
+    extsort_set_opt(args->es,const char*,TMP_PREFIX,args->es_tmp_prefix);
+    extsort_set_opt(args->es,const char*,MAX_MEM,args->es_max_mem);
+    extsort_set_opt(args->es,extsort_cmp_f,FUNC_CMP,diff_sites_cmp);
+    extsort_init(args->es);
+}
+static void diff_sites_destroy(args_t *args)
 {
-    double *a = *((double**)_a);
-    double *b = *((double**)_b);
-    if ( *a < *b ) return -1;
-    else if ( *a == *b ) return 0;
+    kbs_destroy(args->kbs_diff);
+    extsort_destroy(args->es);
+}
+static inline void diff_sites_reset(args_t *args)
+{
+    kbs_clear(args->kbs_diff);
+}
+static inline void diff_sites_push(args_t *args, int ndiff, int rid, int pos)
+{
+    diff_sites_t *dat = (diff_sites_t*) malloc(args->diff_sites_size);
+    memset(dat,0,sizeof(*dat)); // for debugging: prevent warnings about uninitialized memory coming from struct padding (not needed after rand added)
+    dat->ndiff = ndiff;
+    dat->rid  = rid;
+    dat->pos  = pos;
+    dat->rand = hts_lrand48();
+    memcpy(dat->kbs_dat,args->kbs_diff->b,args->kbs_diff->n*sizeof(unsigned long));
+    extsort_push(args->es,dat);
+}
+static inline int diff_sites_shift(args_t *args, int *ndiff, int *rid, int *pos)
+{
+    diff_sites_t *dat = (diff_sites_t*) extsort_shift(args->es);
+    if ( !dat ) return 0;
+    *ndiff = dat->ndiff;
+    *rid   = dat->rid;
+    *pos   = dat->pos;
+    memcpy(args->kbs_diff->b,dat->kbs_dat,args->kbs_diff->n*sizeof(unsigned long));
     return 1;
 }
 
-static void check_gt(args_t *args)
+static void init_samples(char *list, int list_is_file, int **smpl, int *nsmpl, bcf_hdr_t *hdr, char *vcf_fname)
 {
-    int i,ret, *gt2ipl = NULL, m_gt2ipl = 0, *gt_arr = NULL, ngt_arr = 0;
-    int fake_pls = args->no_PLs;
+    int i;
+    if ( !strcmp(list,"-") )
+    {
+        *nsmpl = bcf_hdr_nsamples(hdr);
+        *smpl  = (int*) malloc(sizeof(**smpl)*(*nsmpl));
+        for (i=0; i<*nsmpl; i++) (*smpl)[i] = i;
+        return;
+    }
 
-    // Initialize things: check which tags are defined in the header, sample names etc.
-    if ( bcf_hdr_id2int(args->gt_hdr, BCF_DT_ID, "GT")<0 ) error("[E::%s] GT not present in the header of %s?\n", __func__, args->files->readers[1].fname);
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
+    char **tmp = hts_readlist(list, list_is_file, nsmpl);
+    if ( !tmp || !*nsmpl ) error("Failed to parse %s\n", list);
+    *smpl = (int*) malloc(sizeof(**smpl)*(*nsmpl));
+    for (i=0; i<*nsmpl; i++)
     {
-        if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
-            error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs )
-            fprintf(stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-        fake_pls = 1;
+        int idx = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, tmp[i]);
+        if ( idx<0 ) error("No such sample in %s: [%s]\n",vcf_fname,tmp[i]);
+        (*smpl)[i] = idx;
+        free(tmp[i]);
     }
+    free(tmp);
+    qsort(*smpl,*nsmpl,sizeof(**smpl),cmp_int);
+    // check for duplicates
+    for (i=1; i<*nsmpl; i++)
+        if ( (*smpl)[i-1]==(*smpl)[i] )
+            error("Error: the sample \"%s\" is listed twice in %s\n", hdr->samples[(*smpl)[i]],list);
+}
 
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
-    print_header(args, fp);
+static void init_data(args_t *args)
+{
+    hts_srand48(0);
 
-    int tgt_isample = -1, query_isample = 0;
-    if ( args->target_sample )
+    args->files = bcf_sr_init();
+    if ( args->regions && bcf_sr_set_regions(args->files, args->regions, args->regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions);
+    if ( args->targets && bcf_sr_set_targets(args->files, args->targets, args->targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets);
+
+    if ( args->gt_fname ) bcf_sr_set_opt(args->files, BCF_SR_REQUIRE_IDX);
+    if ( !bcf_sr_add_reader(args->files,args->qry_fname) ) error("Failed to open %s: %s\n", args->qry_fname,bcf_sr_strerror(args->files->errnum));
+    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) )
+        error("Failed to read from %s: %s\n", !strcmp("-",args->gt_fname)?"standard input":args->gt_fname,bcf_sr_strerror(args->files->errnum));
+
+    args->qry_hdr = bcf_sr_get_header(args->files,0);
+    if ( !bcf_hdr_nsamples(args->qry_hdr) ) error("No samples in %s?\n", args->qry_fname);
+    if ( args->gt_fname )
     {
-        tgt_isample = bcf_hdr_id2int(args->gt_hdr, BCF_DT_SAMPLE, args->target_sample);
-        if ( tgt_isample<0 ) error("No such sample in %s: [%s]\n", args->files->readers[1].fname, args->target_sample);
+        args->gt_hdr = bcf_sr_get_header(args->files,1);
+        if ( !bcf_hdr_nsamples(args->gt_hdr) ) error("No samples in %s?\n", args->gt_fname);
     }
-    if ( args->all_sites )
+
+    // Determine whether GT or PL will be used
+    if ( args->qry_use_GT==-1 ) // not set by -u, qry uses PL by default
     {
-        if ( tgt_isample==-1 )
-        {
-            fprintf(stderr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
-            tgt_isample = 0;
-        }
+        if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"PL")>=0 )
+            args->qry_use_GT = 0;
+        else if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"GT")>=0 )
+            args->qry_use_GT = 1;
+        else
+            error("[E::%s] Neither PL nor GT tag is present in the header of %s\n", __func__, args->qry_fname);
     }
-    if ( args->query_sample )
+    else if ( args->qry_use_GT==1 )
     {
-        query_isample = bcf_hdr_id2int(args->sm_hdr, BCF_DT_SAMPLE, args->query_sample);
-        if ( query_isample<0 ) error("No such sample in %s: [%s]\n", args->files->readers[0].fname, args->query_sample);
+        if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"GT")<0 )
+            error("[E::%s] The GT tag is not present in the header of %s\n", __func__, args->qry_fname);
     }
-    if ( args->all_sites )
-        fprintf(fp, "# [1]SC, Site by Site Comparison\t[2]Chromosome\t[3]Position\t[4]-g alleles\t[5]-g GT (%s)\t[6]match log LK\t[7]Query alleles\t[8-]Query PLs (%s)\n",
-                args->gt_hdr->samples[tgt_isample],args->sm_hdr->samples[query_isample]);
+    else if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"PL")<0 )
+        error("[E::%s] The PL tag is not present in the header of %s\n", __func__, args->qry_fname);
 
-    // Main loop
-    float prev_lk = 0;
-    while ( (ret=bcf_sr_next_line(args->files)) )
+    if ( args->gt_hdr )
     {
-        if ( ret!=2 ) continue;
-        bcf1_t *sm_line = args->files->readers[0].buffer[0];    // the query file
-        bcf1_t *gt_line = args->files->readers[1].buffer[0];    // the -g target file
-        bcf_unpack(sm_line, BCF_UN_FMT);
-        bcf_unpack(gt_line, BCF_UN_FMT);
-
-        // Init mapping from target genotype index to the sample's PL fields
-        int n_gt2ipl = gt_line->n_allele*(gt_line->n_allele + 1)/2;
-        if ( n_gt2ipl > m_gt2ipl )
+        if ( args->gt_use_GT==-1 ) // not set by -u, gt uses GT by default
+        {
+            if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"GT")>=0 )
+                args->gt_use_GT = 1;
+            else if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"PL")>=0 )
+                args->gt_use_GT = 0;
+            else
+                error("[E::%s] Neither PL nor GT tag is present in the header of %s\n", __func__, args->gt_fname);
+        }
+        else if ( args->gt_use_GT==1 )
         {
-            m_gt2ipl = n_gt2ipl;
-            gt2ipl   = (int*) realloc(gt2ipl, sizeof(int)*m_gt2ipl);
+            if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"GT")<0 )
+                error("[E::%s] The GT tag is not present in the header of %s\n", __func__, args->gt_fname);
         }
-        if ( !init_gt2ipl(args, gt_line, sm_line, gt2ipl, n_gt2ipl) ) continue;
-
-        // Target genotypes
-        int ngt, npl;
-        if ( (ngt=bcf_get_genotypes(args->gt_hdr, gt_line, &gt_arr, &ngt_arr)) <= 0 )
-            error("GT not present at %s:%"PRId64"?", args->gt_hdr->id[BCF_DT_CTG][gt_line->rid].key, (int64_t) gt_line->pos+1);
-        ngt /= bcf_hdr_nsamples(args->gt_hdr);
-        if ( ngt!=2 ) continue; // checking only diploid genotypes
+        else if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"PL")<0 )
+            error("[E::%s] The PL tag is not present in the header of %s\n", __func__, args->gt_fname);
+    }
+    else
+        args->gt_use_GT = args->qry_use_GT;
 
-        // Sample PLs
-        if ( !fake_pls )
+    // Prepare samples
+    int i,j;
+    args->nqry_smpl = bcf_hdr_nsamples(args->qry_hdr);
+    if ( args->qry_samples )
+    {
+        init_samples(args->qry_samples, args->qry_samples_is_file, &args->qry_smpl, &args->nqry_smpl, args->qry_hdr, args->qry_fname);
+    }
+    if ( args->gt_samples )
+    {   
+        init_samples(args->gt_samples, args->gt_samples_is_file, &args->gt_smpl, &args->ngt_smpl,
+            args->gt_hdr ? args->gt_hdr : args->qry_hdr,
+            args->gt_fname ? args->gt_fname : args->qry_fname);
+    }
+    else if ( args->pair_samples )
+    {
+        int npairs;
+        char **tmp = hts_readlist(args->pair_samples, args->pair_samples_is_file, &npairs);
+        if ( !tmp || !npairs ) error("Failed to parse %s\n", args->pair_samples);
+        if ( !args->pair_samples_is_file && npairs%2 ) error("Expected even number of comma-delimited samples with -p\n");
+        args->npairs = args->pair_samples_is_file ? npairs : npairs/2;
+        args->pairs  = (pair_t*) calloc(args->npairs,sizeof(*args->pairs));
+        if ( !args->pair_samples_is_file )
         {
-            if ( (npl=bcf_get_format_int32(args->sm_hdr, sm_line, "PL", &args->pl_arr, &args->npl_arr)) <= 0 )
+            for (i=0; i<args->npairs; i++)
             {
-                if ( sm_line->n_allele==1 )
-                {
-                    // PL values may not be present when ALT=. (mpileup/bcftools output), in that case 
-                    // switch automatically to GT at these sites
-                    npl = fake_PLs(args, args->sm_hdr, sm_line);
-                }
-                else
-                    error("PL not present at %s:%"PRId64"?\n", args->sm_hdr->id[BCF_DT_CTG][sm_line->rid].key, (int64_t) sm_line->pos+1);
+                args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[2*i]);
+                args->pairs[i].igt  = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, tmp[2*i+1]);
+                if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[2*i]);
+                if ( args->pairs[i].igt  < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,tmp[2*i+1]);
+                free(tmp[2*i]);
+                free(tmp[2*i+1]);
             }
-            else
-                npl /= bcf_hdr_nsamples(args->sm_hdr);
         }
         else
-            npl = fake_PLs(args, args->sm_hdr, sm_line);
+        {
+            for (i=0; i<args->npairs; i++)
+            {
+                char *ptr = tmp[i];
+                while ( *ptr && !isspace(*ptr) ) ptr++;
+                if ( !*ptr ) error("Could not parse %s: %s\n",args->pair_samples,tmp[i]);
+                *ptr = 0;
+                args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[i]);
+                if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[i]);
+                ptr++;
+                while ( *ptr && isspace(*ptr) ) ptr++;
+                args->pairs[i].igt = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, ptr);
+                if ( args->pairs[i].igt < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,ptr);
+                free(tmp[i]);
+            }
+        }
+        free(tmp);
+        qsort(args->pairs,args->npairs,sizeof(*args->pairs),cmp_pair);
+    }
+    else if ( args->gt_hdr )
+        args->ngt_smpl = bcf_hdr_nsamples(args->gt_hdr);
+    if ( !args->ngt_smpl )
+    {
+        args->ngt_smpl = args->nqry_smpl;
+        args->gt_smpl  = args->qry_smpl;
+        args->cross_check = 1;
+    }
+
+    // The data arrays
+    if ( !args->npairs ) args->npairs = args->cross_check ? args->nqry_smpl*(args->nqry_smpl+1)/2 : args->ngt_smpl*args->nqry_smpl;
+    if ( !args->pair_samples )
+    {
+        args->qry_dsg = (uint8_t*) malloc(args->nqry_smpl);
+        args->gt_dsg  = args->cross_check ? args->qry_dsg : (uint8_t*) malloc(args->ngt_smpl);
+    }
+    if ( args->use_PLs )
+    {
+        args->pdiff = (double*) calloc(args->npairs,sizeof(*args->pdiff));      // log probability of pair samples being the same
+        args->qry_prob = (double*) malloc(3*args->nqry_smpl*sizeof(*args->qry_prob));
+        args->gt_prob  = args->cross_check ? args->qry_prob : (double*) malloc(3*args->ngt_smpl*sizeof(*args->gt_prob));
+
+        // dsg2prob: the first index is bitmask of 8 possible dsg combinations (only 1<<0,1<<2,1<<3 are set, accessing
+        // anything else indicated an error, this is just to reuse gt_to_dsg()); the second index are the corresponding 
+        // probabilities of 0/0, 0/1, and 1/1 genotypes
+        for (i=0; i<8; i++)
+            for (j=0; j<3; j++)
+                args->dsg2prob[i][j] = HUGE_VAL;
+        args->dsg2prob[1][0] = -log(1-pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[1][1] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[1][2] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][0] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][1] = -log(1-pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][2] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][0] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][1] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][2] = -log(1-pow(10,-0.1*args->use_PLs));
 
-        // Calculate likelihoods for all samples, assuming diploid genotypes
+        // lookup table to avoid exponentiation
+        for (i=0; i<256; i++) args->pl2prob[i] = pow(10,-0.1*i);
+    }
+    else
+        args->ndiff = (uint32_t*) calloc(args->npairs,sizeof(*args->ndiff));    // number of differing genotypes for each pair of samples
+    args->ncnt  = (uint32_t*) calloc(args->npairs,sizeof(*args->ncnt));         // number of comparisons performed (non-missing data)
+    if ( !args->ncnt ) error("Error: failed to allocate %.1f Mb\n", args->npairs*sizeof(*args->ncnt)/1e6);
+    if ( args->calc_hwe_prob )
+    {
+        // prob of the observed sequence of matches given site AFs and HWE
+        args->hwe_prob = (double*) calloc(args->npairs,sizeof(*args->hwe_prob));
+        if ( !args->hwe_prob ) error("Error: failed to allocate %.1f Mb. Run with --no-HWE-prob to save some memory.\n", args->npairs*sizeof(*args->hwe_prob)/1e6);
+    }
+
+    if ( args->distinctive_sites ) diff_sites_init(args);
+
+    args->fp = stdout;
+    print_header(args, args->fp);
+}
+
+static void destroy_data(args_t *args)
+{
+    if ( args->gt_dsg!=args->qry_dsg ) free(args->gt_dsg);
+    free(args->qry_dsg);
+    if ( args->gt_prob!=args->qry_prob ) free(args->gt_prob);
+    free(args->qry_prob);
+    free(args->es_max_mem);
+    fclose(args->fp);
+    if ( args->distinctive_sites ) diff_sites_destroy(args);
+    free(args->hwe_prob);
+    free(args->cwd);
+    free(args->qry_arr);
+    if ( args->gt_hdr ) free(args->gt_arr);
+    free(args->pdiff);
+    free(args->ndiff);
+    free(args->ncnt);
+    free(args->qry_smpl);
+    if ( args->gt_smpl!=args->qry_smpl ) free(args->gt_smpl);
+    free(args->pairs);
+    bcf_sr_destroy(args->files);
+}
 
-        // For faster access to genotype likelihoods (PLs) of the query sample
-        int max_ipl, *pl_ptr = args->pl_arr + query_isample*npl;
-        double sum_pl = 0; // for converting PLs to probs
-        for (max_ipl=0; max_ipl<npl; max_ipl++)
+static inline uint8_t gt_to_dsg(int32_t *ptr)
+{
+    if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) || ptr[1]==bcf_int32_vector_end ) return 0;
+    uint8_t dsg = (bcf_gt_allele(ptr[0])?1:0) + (bcf_gt_allele(ptr[1])?1:0);
+    return 1<<dsg;
+}
+static inline uint8_t pl_to_dsg(int32_t *ptr)
+{
+    if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing || ptr[2]==bcf_int32_missing ) return 0;
+    if ( ptr[1]==bcf_int32_vector_end || ptr[2]==bcf_int32_vector_end ) return 0;
+    int min_pl = ptr[0]<ptr[1] ? (ptr[0]<ptr[2]?ptr[0]:ptr[2]) : (ptr[1]<ptr[2]?ptr[1]:ptr[2]);
+    uint8_t dsg = 0;
+    if ( ptr[0]==min_pl ) dsg |= 1;
+    if ( ptr[1]==min_pl ) dsg |= 2;
+    if ( ptr[2]==min_pl ) dsg |= 4;
+    return dsg;
+}
+static inline uint8_t gt_to_prob(args_t *args, int32_t *ptr, double *prob)
+{
+    uint8_t dsg = gt_to_dsg(ptr);
+    if ( dsg )
+    {
+        prob[0] = args->dsg2prob[dsg][0];
+        prob[1] = args->dsg2prob[dsg][1];
+        prob[2] = args->dsg2prob[dsg][2];
+    }
+    return dsg;
+}
+static inline uint8_t pl_to_prob(args_t *args, int32_t *ptr, double *prob)
+{
+    uint8_t dsg = pl_to_dsg(ptr);
+    if ( dsg )
+    {
+        prob[0] = (ptr[0]>=0 && ptr[0]<255) ? args->pl2prob[ptr[0]] : args->pl2prob[255];
+        prob[1] = (ptr[1]>=0 && ptr[1]<255) ? args->pl2prob[ptr[1]] : args->pl2prob[255];
+        prob[2] = (ptr[2]>=0 && ptr[2]<255) ? args->pl2prob[ptr[2]] : args->pl2prob[255];
+        double sum = prob[0] + prob[1] + prob[2];
+        prob[0] /= sum;
+        prob[1] /= sum;
+        prob[2] /= sum;
+        prob[0] = -log(prob[0]);
+        prob[1] = -log(prob[1]);
+        prob[2] = -log(prob[2]);
+    }
+    return dsg;
+}
+static int set_data(args_t *args, bcf_hdr_t *hdr, bcf1_t *rec, int32_t **arr, int32_t *narr, int *narr1, int *use_GT)
+{
+    static int warn_dip_GT = 1;
+    static int warn_dip_PL = 1;
+    int i;
+    for (i=0; i<2; i++)
+    {
+        if ( *use_GT )
         {
-            if ( pl_ptr[max_ipl]==bcf_int32_vector_end ) break;
-            if ( pl_ptr[max_ipl]==bcf_int32_missing ) continue;
-            sum_pl += pow(10, -0.1*pl_ptr[max_ipl]);
+            int ret = bcf_get_genotypes(hdr,rec,arr,narr);
+            if ( ret < 0 )
+            {
+                if ( !i ) { *use_GT = 0; continue; }
+                args->nskip_no_data++;
+                return -1;
+            }
+            if ( ret != 2*bcf_hdr_nsamples(hdr) )
+            {
+                if ( warn_dip_GT )
+                {
+                    fprintf(stderr,"INFO: skipping %s:%"PRIhts_pos", only diploid FORMAT/GT fields supported. (This is printed only once.)\n", bcf_seqname(hdr,rec),rec->pos+1);
+                    warn_dip_GT = 0;
+                }
+                args->nskip_dip_GT++;
+                return -1;
+            }
+            *narr1 = 2;
+            return 0;
         }
-        if ( sum_pl==0 ) continue; // no PLs present
-        if ( fake_pls && args->no_PLs==1 ) sum_pl = -1;
 
-        // The main stats: concordance of the query sample with the target -g samples
-        for (i=0; i<bcf_hdr_nsamples(args->gt_hdr); i++)
+        int ret = bcf_get_format_int32(hdr,rec,"PL",arr,narr);
+        if ( ret < 0 )
         {
-            int *gt_ptr = gt_arr + i*ngt;
-            if ( gt_ptr[1]==bcf_int32_vector_end ) continue;    // skip haploid genotypes
-            if ( bcf_gt_is_missing(gt_ptr[0]) || bcf_gt_is_missing(gt_ptr[1]) ) continue;
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            if ( args->hom_only && a!=b ) continue; // heterozygous genotype
-            int igt_tgt = igt_tgt = bcf_alleles2gt(a,b); // genotype index in the target file
-            int igt_qry = gt2ipl[igt_tgt];  // corresponding genotype in query file
-            if ( igt_qry>=max_ipl || pl_ptr[igt_qry]<0 ) continue;   // genotype not present in query sample: haploid or missing
-            args->lks[i] += sum_pl<0 ? -pl_ptr[igt_qry] : log(pow(10, -0.1*pl_ptr[igt_qry])/sum_pl);
-            args->sites[i]++;
+            if ( !i ) { *use_GT = 1; continue; }
+            args->nskip_no_data++;
+            return -1;
         }
-        if ( args->all_sites )
+        if ( ret != 3*bcf_hdr_nsamples(hdr) )
         {
-            // Print LKs at all sites for debugging
-            int *gt_ptr = gt_arr + tgt_isample*ngt;
-            if ( gt_ptr[1]==bcf_int32_vector_end ) continue;    // skip haploid genotypes
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            if ( args->hom_only && a!=b ) continue; // heterozygous genotype
-            fprintf(fp, "SC\t%s\t%"PRId64, args->gt_hdr->id[BCF_DT_CTG][gt_line->rid].key, (int64_t) gt_line->pos+1);
-            for (i=0; i<gt_line->n_allele; i++) fprintf(fp, "%c%s", i==0?'\t':',', gt_line->d.allele[i]);
-            fprintf(fp, "\t%s/%s", a>=0 ? gt_line->d.allele[a] : ".", b>=0 ? gt_line->d.allele[b] : ".");
-            fprintf(fp, "\t%f", args->lks[query_isample]-prev_lk);
-            prev_lk = args->lks[query_isample];
-
-            int igt, *pl_ptr = args->pl_arr + query_isample*npl; // PLs of the query sample
-            for (i=0; i<sm_line->n_allele; i++) fprintf(fp, "%c%s", i==0?'\t':',', sm_line->d.allele[i]);
-            for (igt=0; igt<npl; igt++)
-                if ( pl_ptr[igt]==bcf_int32_vector_end ) break;
-                else if ( pl_ptr[igt]==bcf_int32_missing ) fprintf(fp, ".");
-                else fprintf(fp, "\t%d", pl_ptr[igt]);
-            fprintf(fp, "\n");
+            if ( warn_dip_PL )
+            {
+                fprintf(stderr,"INFO: skipping %s:%"PRIhts_pos", only diploid FORMAT/PL fields supported. (This is printed only once.)\n", bcf_seqname(hdr,rec),rec->pos+1);
+                warn_dip_PL = 0;
+            }
+            args->nskip_dip_PL++;
+            return -1;
         }
+        *narr1 = 3;
+        return 0;
     }
-    free(gt2ipl);
-    free(gt_arr);
-    free(args->pl_arr);
-    free(args->tmp_arr);
+    return -1;  // should never reach
+}
+static void process_line(args_t *args)
+{
+    int i,j,k, nqry1, ngt1, ret;
+
+    bcf1_t *gt_rec = NULL, *qry_rec = bcf_sr_get_line(args->files,0);   // the query file
+    int qry_use_GT = args->qry_use_GT;
+    int gt_use_GT  = args->gt_use_GT;
+
+    ret = set_data(args, args->qry_hdr, qry_rec, &args->qry_arr, &args->nqry_arr, &nqry1, &qry_use_GT);
+    if ( ret<0 ) return;
 
-    // To be able to plot total discordance (=number of mismatching GTs with -G1) in the same
-    // plot as discordance per site, the latter must be scaled to the same range
-    int nsamples = bcf_hdr_nsamples(args->gt_hdr);
-    double extreme_lk = 0, extreme_lk_per_site = 0;
-    for (i=0; i<nsamples; i++)
+    if ( args->gt_hdr )
     {
-        if ( args->lks[i] < extreme_lk ) extreme_lk = args->lks[i];
-        if ( args->sites[i] && args->lks[i]/args->sites[i] < extreme_lk_per_site ) extreme_lk_per_site = args->lks[i]/args->sites[i];
+        gt_rec = bcf_sr_get_line(args->files,1);
+        ret = set_data(args, args->gt_hdr, gt_rec, &args->gt_arr, &args->ngt_arr, &ngt1, &gt_use_GT);
+        if ( ret<0 ) return;
+    }
+    else
+    {
+        ngt1 = nqry1;
+        args->gt_arr = args->qry_arr;
     }
 
-    // Sorted output
-    double **p = (double**) malloc(sizeof(double*)*nsamples);
-    for (i=0; i<nsamples; i++) p[i] = &args->lks[i];
-    qsort(p, nsamples, sizeof(int*), cmp_doubleptr);
+    args->ncmp++;
 
-    fprintf(fp, "# [1]CN\t[2]Discordance with %s (total)\t[3]Discordance (avg score per site)\t[4]Number of sites compared\t[5]Sample\t[6]Sample ID\n", args->sm_hdr->samples[query_isample]);
-    for (i=0; i<nsamples; i++)
+    double af,hwe_dsg[8];
+    if ( args->calc_hwe_prob )
     {
-        int idx = p[i] - args->lks;
-        double per_site = 0;
-        if ( args->sites[idx] )
+        int ac[2];
+        if ( args->gt_hdr )
         {
-            if ( args->sites[idx] && extreme_lk_per_site )
+            if ( bcf_calc_ac(args->gt_hdr, gt_rec, ac, BCF_UN_INFO|BCF_UN_FMT)!=1 ) error("todo: bcf_calc_ac() failed\n");
+        }
+        else if ( bcf_calc_ac(args->qry_hdr, qry_rec, ac, BCF_UN_INFO|BCF_UN_FMT)!=1 ) error("todo: bcf_calc_ac() failed\n");
+
+        // hwe indexes correspond to the bitmask of eight dsg combinations to account for PL uncertainty
+        // for in the extreme case we can have uninformative PL=0,0,0. So the values are the minima of e.g.
+        //      hwe[1,2,4] ..  dsg=0,1,2
+        //      hwe[3]     ..  dsg=0 or 1
+        //      hwe[6]     ..  dsg=1 or 2
+
+        double hwe[3];
+        const double min_af = 1e-5;             // cap the AF in case we get unrealistic values
+        af = (double)ac[1]/(ac[0]+ac[1]);
+        hwe[0] = af>min_af ? -log(af*af) : -log(min_af*min_af);
+        hwe[1] = af>min_af && af<1-min_af ? -log(2*af*(1-af)) : -log(2*min_af*(1-min_af));
+        hwe[2] = af<(1-min_af) ? -log((1-af)*(1-af)) : -log(min_af*min_af);
+        hwe_dsg[0] = 0;
+        for (i=1; i<8; i++)
+        {
+            hwe_dsg[i] = HUGE_VAL;
+            for (k=0; k<3; k++)
             {
-                per_site = args->lks[idx]/args->sites[idx];
-                per_site *= extreme_lk / extreme_lk_per_site;
+                if ( ((1<<k)&i) && hwe_dsg[i] > hwe[k] ) hwe_dsg[i] = hwe[k];
             }
-            else
-                per_site = 0;
         }
-        fprintf(fp, "CN\t%e\t%e\t%.0f\t%s\t%d\n", fabs(args->lks[idx]), fabs(per_site), args->sites[idx], args->gt_hdr->samples[idx], i);
     }
 
-    if ( args->plot )
+    // The sample pairs were given explicitly via -p/-P options
+    if ( args->pairs )
     {
-        if ( fclose(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
-        plot_check(args, args->target_sample ? args->target_sample : "", args->sm_hdr->samples[query_isample]);
-    }
-}
+        if ( !args->use_PLs )
+        {
+            int ndiff = 0;
+            if ( args->kbs_diff ) diff_sites_reset(args);
 
-// static inline int is_hom_most_likely(int nals, int *pls)
-// {
-//     int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
-//     for (ia=1; ia<nals; ia++)
-//     {
-//         for (ib=0; ib<ia; ib++)
-//         {
-//             if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
-//             idx++;
-//         }
-//         if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
-//         idx++;
-//     }
-//     return min_is_hom;
-// }
-
-int process_GT(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
-{
-    int ngt = bcf_get_genotypes(args->sm_hdr, line, &args->tmp_arr, &args->ntmp_arr);
+            for (i=0; i<args->npairs; i++)
+            {
+                int32_t *ptr;
+                uint8_t qry_dsg, gt_dsg;
 
-    if ( ngt<=0 ) return 1;                 // GT not present
-    if ( ngt!=args->nsmpl*2 ) return 2;     // not diploid
-    ngt /= args->nsmpl;
-    
-    int i,j, idx = 0;
-    for (i=1; i<args->nsmpl; i++)
-    {
-        int32_t *a = args->tmp_arr + i*ngt;
-        if ( bcf_gt_is_missing(a[0]) || bcf_gt_is_missing(a[1]) || a[1]==bcf_int32_vector_end ) { idx+=i; continue; }
-        int agt = 1<<bcf_gt_allele(a[0]) | 1<<bcf_gt_allele(a[1]);
+                ptr = args->gt_arr + args->pairs[i].igt*ngt1;
+                gt_dsg = gt_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( !gt_dsg ) continue;                        // missing value
+                if ( args->hom_only && !(gt_dsg&5) ) continue;  // not a hom
+
+                ptr = args->qry_arr + args->pairs[i].iqry*nqry1;
+                qry_dsg = qry_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( !qry_dsg ) continue;                       // missing value
+
+                int match = qry_dsg & gt_dsg;
+                if ( !match )
+                {
+                    args->ndiff[i]++;
+                    if ( args->kbs_diff ) { ndiff++; kbs_insert(args->kbs_diff, i); }
+                }
+                else if ( args->calc_hwe_prob ) args->hwe_prob[i] += hwe_dsg[match];
+                args->ncnt[i]++;
+            }
 
-        for (j=0; j<i; j++)
+            if ( ndiff ) diff_sites_push(args, ndiff, qry_rec->rid, qry_rec->pos);
+        }
+        else    // use_PLs set
         {
-            int32_t *b = args->tmp_arr + j*ngt;
-            if ( bcf_gt_is_missing(b[0]) || bcf_gt_is_missing(b[1]) || b[1]==bcf_int32_vector_end ) { idx++; continue; }
-            int bgt = 1<<bcf_gt_allele(b[0]) | 1<<bcf_gt_allele(b[1]);
+            for (i=0; i<args->npairs; i++)
+            {
+                int32_t *ptr;
+                double qry_prob[3], gt_prob[3];
+                uint8_t qry_dsg, gt_dsg;
+
+                ptr = args->gt_arr + args->pairs[i].igt*ngt1;
+                gt_dsg = gt_use_GT ? gt_to_prob(args,ptr,gt_prob) : pl_to_prob(args,ptr,gt_prob);
+                if ( !gt_dsg ) continue;                        // missing value
+                if ( args->hom_only && !(gt_dsg&5) ) continue;  // not a hom
+               
+                ptr = args->qry_arr + args->pairs[i].iqry*nqry1;
+                qry_dsg = qry_use_GT ? gt_to_prob(args,ptr,qry_prob) : pl_to_prob(args,ptr,qry_prob);
+                if ( !qry_dsg ) continue;                       // missing value
 
-            ntot[idx]++;
-            if ( agt!=bgt ) ndif[idx]++;
-            idx++;
+                double min = qry_prob[0] + gt_prob[0];
+                qry_prob[1] += gt_prob[1];
+                if ( min > qry_prob[1] ) min = qry_prob[1];
+                qry_prob[2] += gt_prob[2];
+                if ( min > qry_prob[2] ) min = qry_prob[2];
+                args->pdiff[i] += min;
+
+                if ( args->calc_hwe_prob )
+                {
+                    int match = qry_dsg & gt_dsg;
+                    args->hwe_prob[i] += hwe_dsg[match];
+                }
+                args->ncnt[i]++;
+            }
         }
+        return;
     }
-    return 0;
-}
-int process_PL(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
-{
-    int npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->tmp_arr, &args->ntmp_arr);
 
-    if ( npl<=0 ) return 1;                 // PL not present
-    npl /= args->nsmpl;
-    
-    int i,j,k, idx = 0;
-    for (i=1; i<args->nsmpl; i++)
+    int idx=0;
+    if ( !args->use_PLs )
     {
-        int32_t *a = args->tmp_arr + i*npl;
-        int imin = -1;
-        for (k=0; k<npl; k++)
+        for (i=0; i<args->nqry_smpl; i++)
         {
-            if ( a[k]==bcf_int32_vector_end ) break;
-            if ( a[k]==bcf_int32_missing ) continue;
-            if ( imin==-1 || a[imin] > a[k] ) imin = k;
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int32_t *ptr = args->qry_arr + nqry1*iqry;
+            args->qry_dsg[i] = qry_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
         }
-        if ( imin<0 ) { idx+=i; continue; }
-
-        for (j=0; j<i; j++)
+        if ( !args->cross_check )   // in this case gt_dsg points to qry_dsg
         {
-            int32_t *b = args->tmp_arr + j*npl;
-            int jmin = -1;
-            for (k=0; k<npl; k++)
+            for (i=0; i<args->ngt_smpl; i++)
             {
-                if ( b[k]==bcf_int32_vector_end ) break;
-                if ( b[k]==bcf_int32_missing ) continue;
-                if ( jmin==-1 || b[jmin] > b[k] ) jmin = k;
+                int igt = args->gt_smpl ? args->gt_smpl[i] : i;
+                int32_t *ptr = args->gt_arr + ngt1*igt;
+                args->gt_dsg[i] = gt_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( args->hom_only && !(args->gt_dsg[i]&5) ) args->gt_dsg[i] = 0;      // not a hom, set to a missing value
+            }
+        }
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int ngt = args->cross_check ? i : args->ngt_smpl;       // two files or a sub-diagonal cross-check mode?
+            if ( !args->qry_dsg[i] ) { idx += ngt; continue; }      // missing value
+            for (j=0; j<ngt; j++)
+            {
+                if ( !args->gt_dsg[j] ) { idx++; continue; }        // missing value
+                int match = args->qry_dsg[i] & args->gt_dsg[j];
+                if ( !match ) args->ndiff[idx]++;
+                else if ( args->calc_hwe_prob ) args->hwe_prob[idx] += hwe_dsg[match];
+                args->ncnt[idx]++;
+                idx++;
             }
-            if ( jmin<0 ) { idx++; continue; }
-
-            ntot[idx]++;
-            if ( imin!=jmin ) ndif[idx]++;
-            idx++;
         }
     }
-    return 0;
-}
+    else    // use_PLs set
+    {
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int32_t *ptr = args->qry_arr + nqry1*iqry;
+            args->qry_dsg[i] = qry_use_GT ? gt_to_prob(args,ptr,args->qry_prob+i*3) : pl_to_prob(args,ptr,args->qry_prob+i*3);
+        }
+        if ( !args->cross_check )   // in this case gt_dsg points to qry_dsg
+        {
+            for (i=0; i<args->ngt_smpl; i++)
+            {
+                int igt = args->gt_smpl ? args->gt_smpl[i] : i;
+                int32_t *ptr = args->gt_arr + ngt1*igt;
+                args->gt_dsg[i] = gt_use_GT ? gt_to_prob(args,ptr,args->gt_prob+i*3) : pl_to_prob(args,ptr,args->gt_prob+i*3);
+                if ( args->hom_only && !(args->gt_dsg[i]&5) ) args->gt_dsg[i] = 0;      // not a hom, set to a missing value
+            }
+        }
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int ngt = args->cross_check ? i : args->ngt_smpl;       // two files or a sub-diagonal cross-check mode?
+            if ( !args->qry_dsg[i] ) { idx += ngt; continue; }      // missing value
+            for (j=0; j<ngt; j++)
+            {
+                if ( !args->gt_dsg[j] ) { idx++; continue; }        // missing value
 
-static void cross_check_gts(args_t *args)
-{
-    // Initialize things: check which tags are defined in the header, sample names etc.
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
-    {
-        if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
-            error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs ) {
-            fprintf(stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-            args->no_PLs = 99;
+                double min = args->qry_prob[i*3] + args->gt_prob[j*3];
+                if ( min > args->qry_prob[i*3+1] + args->gt_prob[j*3+1] ) min = args->qry_prob[i*3+1] + args->gt_prob[j*3+1];
+                if ( min > args->qry_prob[i*3+2] + args->gt_prob[j*3+2] ) min = args->qry_prob[i*3+2] + args->gt_prob[j*3+2];
+                args->pdiff[idx] += min;
+
+                if ( args->calc_hwe_prob )
+                {
+                    int match = args->qry_dsg[i] & args->gt_dsg[j];
+                    args->hwe_prob[idx] += hwe_dsg[match];
+                }
+                args->ncnt[idx]++;
+                idx++;
+            }
         }
     }
+}
 
-    args->nsmpl = bcf_hdr_nsamples(args->sm_hdr);
-    args->narr  = (args->nsmpl-1)*args->nsmpl/2;
 
-    uint32_t *ndif = (uint32_t*) calloc(args->narr,4);
-    uint32_t *ntot = (uint32_t*) calloc(args->narr,4);
+typedef struct
+{
+    int ism, idx;
+    double val;
+}
+idbl_t;
+static int cmp_idbl(const void *_a, const void *_b)
+{
+    idbl_t *a = (idbl_t*)_a;
+    idbl_t *b = (idbl_t*)_b;
+    if ( a->val < b->val ) return -1;
+    if ( a->val > b->val ) return 1;
+    return 0;
+}
+static void report_distinctive_sites(args_t *args)
+{
+    extsort_sort(args->es);
+
+    fprintf(args->fp,"# DS, distinctive sites:\n");
+    fprintf(args->fp,"#     - chromosome\n");
+    fprintf(args->fp,"#     - position\n");
+    fprintf(args->fp,"#     - cumulative number of pairs distinguished by this block\n");
+    fprintf(args->fp,"#     - block id\n");
+    fprintf(args->fp,"#DS\t[2]Chromosome\t[3]Position\t[4]Cumulative number of distinct pairs\t[5]Block id\n");
 
-    while ( bcf_sr_next_line(args->files) )
+    kbitset_t *kbs_blk = kbs_init(args->npairs);
+    kbitset_iter_t itr;
+    int i,ndiff,rid,pos,ndiff_tot = 0, iblock = 0;
+    int ndiff_min = args->distinctive_sites <= args->npairs ? args->distinctive_sites : args->npairs;
+    while ( diff_sites_shift(args,&ndiff,&rid,&pos) )
     {
-        bcf1_t *line = bcf_sr_get_line(args->files,0);
-
-        // use PLs unless no_PLs is set and GT exists
-        if ( args->no_PLs )
+        int ndiff_new = 0, ndiff_dbg = 0;
+        kbs_start(&itr);
+        while ( (i=kbs_next(args->kbs_diff, &itr))>=0 )
         {
-            if ( process_GT(args,line,ntot,ndif)==0 ) continue;
+            ndiff_dbg++;
+            if ( kbs_exists(kbs_blk,i) ) continue;   // already set
+            kbs_insert(kbs_blk,i);
+            ndiff_new++;
         }
-        process_PL(args,line,ntot,ndif);
+        if ( ndiff_dbg!=ndiff ) error("Corrupted data, fixme: %d vs %d\n",ndiff_dbg,ndiff);
+        if ( !ndiff_new ) continue;     // no new pair distinguished by this site
+        ndiff_tot += ndiff_new;
+        fprintf(args->fp,"DS\t%s\t%d\t%d\t%d\n",bcf_hdr_id2name(args->qry_hdr,rid),pos+1,ndiff_tot,iblock);
+        if ( ndiff_tot < ndiff_min ) continue;   // fewer than the requested number of pairs can be distinguished at this point
+        iblock++;
+        ndiff_tot = 0;
+        kbs_clear(kbs_blk);
     }
-    
-    FILE *fp = stdout;
-    print_header(args, fp);
+    kbs_destroy(kbs_blk);
+}
+static void report(args_t *args)
+{
+    fprintf(args->fp,"INFO\tsites-compared\t%u\n",args->ncmp);
+    fprintf(args->fp,"INFO\tsites-skipped-no-match\t%u\n",args->nskip_no_match);
+    fprintf(args->fp,"INFO\tsites-skipped-multiallelic\t%u\n",args->nskip_not_ba);
+    fprintf(args->fp,"INFO\tsites-skipped-monoallelic\t%u\n",args->nskip_mono);
+    fprintf(args->fp,"INFO\tsites-skipped-no-data\t%u\n",args->nskip_no_data);
+    fprintf(args->fp,"INFO\tsites-skipped-GT-not-diploid\t%u\n",args->nskip_dip_GT);
+    fprintf(args->fp,"INFO\tsites-skipped-PL-not-diploid\t%u\n",args->nskip_dip_PL);
+    fprintf(args->fp,"# DC, discordance:\n");
+    fprintf(args->fp,"#     - query sample\n");
+    fprintf(args->fp,"#     - genotyped sample\n");
+    fprintf(args->fp,"#     - discordance (number of mismatches; smaller is better)\n");
+    fprintf(args->fp,"#     - negative log of HWE probability at matching sites (rare genotypes mataches are more informative, bigger is better)\n");
+    fprintf(args->fp,"#     - number of sites compared (bigger is better)\n");
+    fprintf(args->fp,"#DC\t[2]Query Sample\t[3]Genotyped Sample\t[4]Discordance\t[5]-log P(HWE)\t[6]Number of sites compared\n");
 
-    float *tmp = (float*)malloc(sizeof(float)*args->nsmpl*(args->nsmpl-1)/2);
+    int trim = args->ntop;
+    if ( !args->pairs )
+    {
+        if ( !args->ngt_smpl && args->nqry_smpl <= args->ntop ) trim = 0;
+        if ( args->ngt_smpl && args->ngt_smpl <= args->ntop  ) trim = 0;
+    }
 
-    // Output pairwise distances
-    fprintf(fp, "# ERR, error rate\t[2]Pairwise error rate\t[3]Number of sites compared\t[4]Sample i\t[5]Sample j\n");
-    int i,j, idx = 0;
-    for (i=0; i<args->nsmpl; i++)
+    if ( args->pairs )
     {
-        for (j=0; j<i; j++)
+        int i;
+        for (i=0; i<args->npairs; i++)
         {
-            float err = ntot[idx] ? (float)ndif[idx]/ntot[idx] : 1e-10;
-            fprintf(fp, "ERR\t%f\t%"PRId32"\t%s\t%s\n", err, ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
-            PDIST(tmp,i,j) = err;
-            idx++;
+            int iqry = args->pairs[i].iqry;
+            int igt  = args->pairs[i].igt;
+            if ( args->ndiff )
+            {
+                fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                        args->qry_hdr->samples[iqry],
+                        args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                        args->ndiff[i],
+                        args->calc_hwe_prob ? args->hwe_prob[i] : 0,
+                        args->ncnt[i]);
+            }
+            else
+            {
+                fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                        args->qry_hdr->samples[iqry],
+                        args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                        args->pdiff[i],
+                        args->calc_hwe_prob ? args->hwe_prob[i] : 0,
+                        args->ncnt[i]);
+            }
         }
     }
-
-    // Cluster samples
-    int nlist;
-    float clust_max_err = args->max_intra_err;
-    hclust_t *clust = hclust_init(args->nsmpl,tmp);
-    cluster_t *list = hclust_create_list(clust,args->min_inter_err,&clust_max_err,&nlist);
-    fprintf(fp, "# CLUSTER\t[2]Maximum inter-cluster ERR\t[3-]List of samples\n");
-    for (i=0; i<nlist; i++)
-    {
-        fprintf(fp,"CLUSTER\t%f", list[i].dist);
-        for (j=0; j<list[i].nmemb; j++)
-            fprintf(fp,"\t%s",args->sm_hdr->samples[list[i].memb[j]]);
-        fprintf(fp,"\n");
-    }
-    hclust_destroy_list(list,nlist);
-    // Debugging output: the cluster graph and data used for deciding
-    char **dbg = hclust_explain(clust,&nlist);
-    for (i=0; i<nlist; i++)
-        fprintf(fp,"DBG\t%s\n", dbg[i]);
-    fprintf(fp, "# TH, clustering threshold\t[2]Value\nTH\t%f\n",clust_max_err);
-    fprintf(fp, "# DOT\t[2]Cluster graph, visualize e.g. as \"this-output.txt | grep ^DOT | cut -f2- | dot -Tsvg -o graph.svg\"\n");
-    fprintf(fp, "DOT\t%s\n", hclust_create_dot(clust,args->sm_hdr->samples,clust_max_err));
-    hclust_destroy(clust);
-    free(tmp);
-
-
-    // Deprecated output for temporary backward compatibility
-    fprintf(fp, "# Warning: The CN block is deprecated and will be removed in future releases. Use ERR instead.\n");
-    fprintf(fp, "# [1]CN\t[2]Discordance\t[3]Number of sites\t[4]Average minimum depth\t[5]Sample i\t[6]Sample j\n");
-    idx = 0;
-    for (i=0; i<args->nsmpl; i++)
+    else if ( !trim )
     {
-        for (j=0; j<i; j++)
+        int i,j,idx=0;
+        for (i=0; i<args->nqry_smpl; i++)
         {
-            fprintf(fp, "CN\t%"PRId32"\t%"PRId32"\t0\t%s\t%s\n", ndif[idx], ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
-            idx++;
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int ngt  = args->cross_check ? i : args->ngt_smpl;
+            for (j=0; j<ngt; j++)
+            {
+                int igt = args->gt_smpl ? args->gt_smpl[j] : j;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                idx++;
+            }
         }
     }
-
-    free(ndif);
-    free(ntot);
-    free(args->tmp_arr);
+    else if ( !args->cross_check )
+    {
+        idbl_t *arr = (idbl_t*)malloc(sizeof(*arr)*args->ngt_smpl);
+        int i,j;
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int idx  = i*args->ngt_smpl;
+            for (j=0; j<args->ngt_smpl; j++)
+            {
+                if ( args->sort_by_hwe )
+                    arr[j].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[j].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[j].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[j].ism = j;
+                arr[j].idx = idx;
+                idx++;
+            }
+            qsort(arr, args->ngt_smpl, sizeof(*arr), cmp_idbl);
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            for (j=0; j<args->ntop; j++)
+            {
+                int idx = arr[j].idx;
+                int igt = args->gt_smpl ? args->gt_smpl[arr[j].ism] : arr[j].ism;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+            }
+        }
+        free(arr);
+    }
+    else
+    {
+        int narr = args->nqry_smpl-1;
+        idbl_t *arr = (idbl_t*)malloc(sizeof(*arr)*narr);
+        int i,j,k,idx;
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            k = 0, idx = i*(i-1)/2;
+            for (j=0; j<i; j++)
+            {
+                if ( args->sort_by_hwe )
+                    arr[k].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[k].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[k].ism = j;
+                arr[k].idx = idx;
+                idx++;
+                k++;
+            }
+            for (; j<narr; j++)
+            {
+                idx = j*(j+1)/2 + i;
+                if ( args->sort_by_hwe )
+                    arr[k].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[k].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[k].ism = j + 1;
+                arr[k].idx = idx;
+                k++;
+            }
+            qsort(arr, narr, sizeof(*arr), cmp_idbl);
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            for (j=0; j<args->ntop; j++)
+            {
+                if ( i <= arr[j].ism ) continue;
+                int idx = arr[j].idx;
+                int igt = args->qry_smpl ? args->qry_smpl[arr[j].ism] : arr[j].ism;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+            }
+        }
+        free(arr);
+    }
 }
 
-static char *init_prefix(char *prefix)
+static int is_input_okay(args_t *args, int nmatch)
 {
-    int len = strlen(prefix);
-    if ( prefix[len-1] == '/' || prefix[len-1] == '\\' )
-        return msprintf("%sgtcheck", prefix);
-    return strdup(prefix);
+    int i;
+    const char *msg;
+    bcf_hdr_t *hdr;
+    bcf1_t *rec;
+    if ( args->gt_hdr && nmatch!=2 )
+    {
+        if ( args->nskip_no_match++ ) return 0;
+        for (i=0; i<2; i++)
+        {
+            rec = bcf_sr_get_line(args->files,i);
+            if ( rec ) break;
+        }
+        hdr = bcf_sr_get_header(args->files,i);
+        fprintf(stderr,"INFO: skipping %s:%"PRIhts_pos", no record with matching POS+ALT. (This is printed only once.)\n",
+                bcf_seqname(hdr,rec),rec->pos+1);
+        return 0;
+    }
+    for (i=0; i<2; i++)
+    {
+        hdr = bcf_sr_get_header(args->files,i);
+        rec = bcf_sr_get_line(args->files,i);
+        if ( rec->n_allele>2 )
+        {
+            if ( args->nskip_not_ba++ ) return 0;
+            msg = "not a biallelic site, run `bcftools norm -m -` first";
+            goto not_okay;
+        }
+        if ( bcf_get_variant_types(rec)==VCF_REF )
+        {
+            if ( args->nskip_mono++ ) return 0;
+            msg = "monoallelic site";
+            goto not_okay;
+        }
+        if ( !args->gt_hdr ) break;
+    }
+    return 1;
+
+not_okay:
+    fprintf(stderr,"INFO: skipping %s:%"PRIhts_pos", %s. (This is printed only once.)\n", 
+        bcf_seqname(hdr,rec),rec->pos+1,msg);
+    return 0;
 }
 
 static void usage(void)
@@ -712,18 +1026,41 @@ static void usage(void)
     fprintf(stderr, "Usage:   bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "    -a, --all-sites                 output comparison for all sites\n");
-    fprintf(stderr, "    -c, --cluster <min,max>         min inter- and max intra-sample error [0.23,-0.3]\n");
-    fprintf(stderr, "    -g, --genotypes <file>          genotypes to compare against\n");
-    fprintf(stderr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
-    fprintf(stderr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
-    fprintf(stderr, "    -p, --plot <prefix>             plot\n");
-    fprintf(stderr, "    -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(stderr, "    -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(stderr, "    -s, --query-sample <string>     query sample (by default the first sample is checked)\n");
-    fprintf(stderr, "    -S, --target-sample <string>    target sample in the -g file (used only for plotting)\n");
-    fprintf(stderr, "    -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(stderr, "    -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
+    //fprintf(stderr, "    -a, --all-sites                  Output comparison for all sites\n");
+    //fprintf(stderr, "    -c, --cluster MIN,MAX            Min inter- and max intra-sample error [0.23,-0.3]\n");
+    fprintf(stderr, "        --distinctive-sites            Find sites that can distinguish between at least NUM sample pairs.\n");
+    fprintf(stderr, "                  NUM[,MEM[,TMP]]          If the number is smaller or equal to 1, it is interpreted as the fraction of pairs.\n");
+    fprintf(stderr, "                                           The optional MEM string sets the maximum memory used for in-memory sorting [500M]\n");
+#ifdef _WIN32
+    fprintf(stderr, "                                           and TMP is a prefix of temporary files used by external sorting [/bcftools.XXXXXX]\n");
+#else
+    fprintf(stderr, "                                           and TMP is a prefix of temporary files used by external sorting [/tmp/bcftools.XXXXXX]\n");
+#endif
+    fprintf(stderr, "        --dry-run                      Stop after first record to estimate required time\n");
+    fprintf(stderr, "    -e, --error-probability INT        Phred-scaled probability of genotyping error, 0 for faster but less accurate results [40]\n");
+    fprintf(stderr, "    -g, --genotypes FILE               Genotypes to compare against\n");
+    fprintf(stderr, "    -H, --homs-only                    Homozygous genotypes only, useful with low coverage data (requires -g)\n");
+    fprintf(stderr, "        --n-matches INT                Print only top INT matches for each sample (sorted by average score), 0 for unlimited.\n");
+    fprintf(stderr, "                                           Use negative value to sort by HWE probability rather than by discordance [0]\n");
+    fprintf(stderr, "        --no-HWE-prob                  Disable calculation of HWE probability\n");
+    fprintf(stderr, "    -p, --pairs LIST                   Comma-separated sample pairs to compare (qry,gt[,qry,gt..] with -g or qry,qry[,qry,qry..] w/o)\n");
+    fprintf(stderr, "    -P, --pairs-file FILE              File with tab-delimited sample pairs to compare (qry,gt with -g or qry,qry w/o)\n");
+    fprintf(stderr, "    -r, --regions REGION               Restrict to comma-separated list of regions\n");
+    fprintf(stderr, "    -R, --regions-file FILE            Restrict to regions listed in a file\n");
+    fprintf(stderr, "    -s, --samples [qry|gt]:LIST        List of query or -g samples, \"-\" to select all samples (by default all samples are compared)\n");
+    fprintf(stderr, "    -S, --samples-file [qry|gt]:FILE   File with the query or -g samples to compare\n");
+    fprintf(stderr, "    -t, --targets REGION               Similar to -r but streams rather than index-jumps\n");
+    fprintf(stderr, "    -T, --targets-file FILE            Similar to -R but streams rather than index-jumps\n");
+    fprintf(stderr, "    -u, --use TAG1[,TAG2]              Which tag to use in the query file (TAG1) and the -g file (TAG2) [PL,GT]\n");
+    fprintf(stderr, "Examples:\n");
+    fprintf(stderr, "   # Check discordance of all samples from B against all sample in A\n");
+    fprintf(stderr, "   bcftools gtcheck -g A.bcf B.bcf\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "   # Limit comparisons to the fiven list of samples\n");
+    fprintf(stderr, "   bcftools gtcheck -s gt:a1,a2,a3 -s qry:b1,b2 -g A.bcf B.bcf\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "   # Compare only two pairs a1,b1 and a1,b2\n");
+    fprintf(stderr, "   bcftools gtcheck -p a1,b1,a1,b2 -g A.bcf B.bcf\n");
     fprintf(stderr, "\n");
     exit(1);
 }
@@ -732,10 +1069,19 @@ int main_vcfgtcheck(int argc, char *argv[])
 {
     int c;
     args_t *args = (args_t*) calloc(1,sizeof(args_t));
-    args->files  = bcf_sr_init();
     args->argc   = argc; args->argv = argv; set_cwd(args);
-    char *regions = NULL, *targets = NULL;
-    int regions_is_file = 0, targets_is_file = 0;
+    args->qry_use_GT = -1;
+    args->gt_use_GT  = -1;
+    args->calc_hwe_prob = 1;
+    args->use_PLs = 40;
+
+    // external sort for --distinctive-sites
+#ifdef _WIN32
+    args->es_tmp_prefix = NULL;
+#else
+    args->es_tmp_prefix = "/tmp/bcftools-gtcheck";
+#endif
+    args->es_max_mem = strdup("500M");
 
     // In simulated sample swaps the minimum error was 0.3 and maximum intra-sample error was 0.23
     //    - min_inter: pairs with smaller err value will be considered identical 
@@ -746,6 +1092,8 @@ int main_vcfgtcheck(int argc, char *argv[])
 
     static struct option loptions[] =
     {
+        {"error-probability",1,0,'e'},
+        {"use",1,0,'u'},
         {"cluster",1,0,'c'},
         {"GTs-only",1,0,'G'},
         {"all-sites",0,0,'a'},
@@ -753,18 +1101,74 @@ int main_vcfgtcheck(int argc, char *argv[])
         {"help",0,0,'h'},
         {"genotypes",1,0,'g'},
         {"plot",1,0,'p'},
-        {"target-sample",1,0,'S'},
-        {"query-sample",1,0,'s'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
+        {"n-matches",1,0,2},
+        {"no-HWE-prob",0,0,3},
+        {"target-sample",1,0,4},
+        {"dry-run",0,0,5},
+        {"distinctive-sites",1,0,6},
         {"regions",1,0,'r'},
         {"regions-file",1,0,'R'},
         {"targets",1,0,'t'},
         {"targets-file",1,0,'T'},
+        {"pairs",1,0,'p'},
+        {"pairs-file",1,0,'P'},
         {0,0,0,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:c:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hg:p:s:S:p:P:Hr:R:at:T:G:c:u:e:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'e':
+                args->use_PLs = strtol(optarg,&tmp,10);
+                if ( !tmp || *tmp ) error("Could not parse: --error-probability %s\n", optarg);
+                break;
+            case 'u':
+                {
+                    int i,nlist;
+                    char **list = hts_readlist(optarg, 0, &nlist);
+                    if ( !list || nlist<=0 || nlist>2 ) error("Failed to parse --use %s\n", optarg);
+                    if ( !strcasecmp("GT",list[0]) ) args->qry_use_GT = 1;
+                    else if ( !strcasecmp("PL",list[0]) ) args->qry_use_GT = 0;
+                    else error("Failed to parse --use %s; only GT and PL are supported\n", optarg);
+                    if ( nlist==2 )
+                    {
+                        if ( !strcasecmp("GT",list[1]) ) args->gt_use_GT = 1;
+                        else if ( !strcasecmp("PL",list[1]) ) args->gt_use_GT = 0;
+                        else error("Failed to parse --use %s; only GT and PL are supported\n", optarg);
+                    }
+                    else args->gt_use_GT = args->qry_use_GT;
+                    for (i=0; i<nlist; i++) free(list[i]);
+                    free(list);
+                }
+                break;
+            case 2 :
+                args->ntop = strtol(optarg,&tmp,10);
+                if ( !tmp || *tmp ) error("Could not parse: --n-matches %s\n", optarg);
+                if ( args->ntop < 0 )
+                {
+                    args->sort_by_hwe = 1;
+                    args->ntop *= -1;
+                }
+                break;
+            case 3 : args->calc_hwe_prob = 0; break;
+            case 4 : error("The option -S, --target-sample has been deprecated\n"); break;
+            case 5 : args->dry_run = 1; break;
+            case 6 : 
+                args->distinctive_sites = strtod(optarg,&tmp);
+                if ( *tmp )
+                {
+                    if ( *tmp!=',' ) error("Could not parse: --distinctive-sites %s\n", optarg);
+                    tmp++;
+                    free(args->es_max_mem);
+                    args->es_max_mem = strdup(tmp);
+                    while ( *tmp && *tmp!=',' ) tmp++;
+                    if ( *tmp ) { *tmp = 0; args->es_tmp_prefix = tmp+1; }
+                }
+                args->use_PLs = 0;
+                break;
             case 'c':
+                error("The -c option is to be implemented, please open an issue on github\n");
                 args->min_inter_err = strtod(optarg,&tmp);
                 if ( *tmp )
                 {
@@ -773,50 +1177,77 @@ int main_vcfgtcheck(int argc, char *argv[])
                     if ( *tmp ) error("Could not parse: -c %s\n", optarg);
                 }
                 break;
-            case 'G':
-                args->no_PLs = strtol(optarg,&tmp,10);
-                if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
-                break;
-            case 'a': args->all_sites = 1; break;
+            case 'G': error("The option -G, --GTs-only has been deprecated\n"); break;
+            case 'a': args->all_sites = 1; error("The -a option is to be implemented, please open an issue on github\n"); break;
             case 'H': args->hom_only = 1; break;
             case 'g': args->gt_fname = optarg; break;
-            case 'p': args->plot = optarg; break;
-            case 'S': args->target_sample = optarg; break;
-            case 's': args->query_sample = optarg; break;
-            case 'r': regions = optarg; break;
-            case 'R': regions = optarg; regions_is_file = 1; break;
-            case 't': targets = optarg; break;
-            case 'T': targets = optarg; targets_is_file = 1; break;
+//            case 'p': args->plot = optarg; break;
+            case 's':
+                if ( !strncasecmp("gt:",optarg,3) ) args->gt_samples = optarg+3;
+                else if ( !strncasecmp("qry:",optarg,4) ) args->qry_samples = optarg+4;
+                else error("Which one? Query samples (qry:%s) or genotype samples (gt:%s)?\n",optarg,optarg);
+                break;
+            case 'S': 
+                if ( !strncasecmp("gt:",optarg,3) ) args->gt_samples = optarg+3, args->gt_samples_is_file = 1;
+                else if ( !strncasecmp("qry:",optarg,4) ) args->qry_samples = optarg+4, args->qry_samples_is_file = 1;
+                else error("Which one? Query samples (qry:%s) or genotype samples (gt:%s)?\n",optarg,optarg);
+                break;
+            case 'p': args->pair_samples = optarg; break;
+            case 'P': args->pair_samples = optarg; args->pair_samples_is_file = 1; break;
+            case 'r': args->regions = optarg; break;
+            case 'R': args->regions = optarg; args->regions_is_file = 1; break;
+            case 't': args->targets = optarg; break;
+            case 'T': args->targets = optarg; args->targets_is_file = 1; break;
             case 'h':
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
         }
     }
-    char *fname = NULL;
     if ( optind==argc )
     {
-        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        if ( !isatty(fileno((FILE *)stdin)) ) args->qry_fname = "-";  // reading from stdin
         else usage();   // no files given
     }
-    else fname = argv[optind];
-    if ( argc>optind+1 )  usage();  // too many files given
-    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
-    else args->files->require_index = 1;
-    if ( regions && bcf_sr_set_regions(args->files, regions, regions_is_file)<0 ) error("Failed to read the regions: %s\n", regions);
-    if ( targets && bcf_sr_set_targets(args->files, targets, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", targets);
-    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
-    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) )
-        error("Failed to read from %s: %s\n", !strcmp("-",args->gt_fname)?"standard input":args->gt_fname,bcf_sr_strerror(args->files->errnum));
-    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
-    if ( args->plot ) args->plot = init_prefix(args->plot);
+    else args->qry_fname = argv[optind];
+    if ( argc>optind+1 ) error("Error: too many files given, run with -h for help\n");  // too many files given
+    if ( args->pair_samples )
+    {
+        if ( args->gt_samples || args->qry_samples ) error("The -p/-P option cannot be combined with -s/-S\n");
+        if ( args->ntop ) error("The --n-matches option cannot be combined with -p/-P\n");
+    }
+    if ( args->distinctive_sites && !args->pair_samples ) error("The experimental option --distinctive-sites requires -p/-P\n");
+    if ( args->hom_only && !args->gt_fname ) error("The option --homs-only requires --genotypes\n");
+    if ( args->distinctive_sites && args->use_PLs ) error("The option --distinctive-sites cannot be combined with --error-probability\n");
+
     init_data(args);
-    if ( args->cross_check )
-        cross_check_gts(args);
-    else
-        check_gt(args);
+
+    int ret;
+    while ( (ret=bcf_sr_next_line(args->files)) )
+    {
+        if ( !is_input_okay(args,ret) ) continue;
+
+        // time one record to give the user an estimate with very big files
+        struct timeval t0, t1;
+        if ( !args->ncmp )  gettimeofday(&t0, NULL);
+
+        process_line(args);
+
+        if ( args->ncmp==1 )
+        {
+            gettimeofday(&t1, NULL);
+            double delta = (t1.tv_sec - t0.tv_sec) * 1e6 + (t1.tv_usec - t0.tv_usec);
+            fprintf(stderr,"INFO:\tTime required to process one record .. %f seconds\n",delta/1e6);
+            fprintf(args->fp,"INFO\tTime required to process one record .. %f seconds\n",delta/1e6);
+            if ( args->dry_run ) break;
+        }
+    }
+    if ( !args->dry_run )
+    {
+        report(args);
+        if ( args->distinctive_sites ) report_distinctive_sites(args);
+    }
+
     destroy_data(args);
-    bcf_sr_destroy(args->files);
-    if (args->plot) free(args->plot);
     free(args);
     return 0;
 }
diff --git a/bcftools/vcfgtcheck.c.pysam.c b/bcftools/vcfgtcheck.c.pysam.c
index ae8ba74..6ab27ed 100644
--- a/bcftools/vcfgtcheck.c.pysam.c
+++ b/bcftools/vcfgtcheck.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfgtcheck.c -- Check sample identity.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -28,8 +28,10 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <strings.h>
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -37,240 +39,46 @@ THE SOFTWARE.  */
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
 #include <htslib/vcfutils.h>
+#include <htslib/kbitset.h>
+#include <htslib/hts_os.h>
 #include <inttypes.h>
+#include <sys/time.h>
 #include "bcftools.h"
-#include "hclust.h"
+#include "extsort.h"
+//#include "hclust.h"
 
 typedef struct
 {
-    bcf_srs_t *files;           // first reader is the query VCF - single sample normally or multi-sample for cross-check
-    bcf_hdr_t *gt_hdr, *sm_hdr; // VCF with genotypes to compare against and the query VCF
-    int ntmp_arr, npl_arr;
-    int32_t *tmp_arr, *pl_arr;
-    double *lks, *sites, min_inter_err, max_intra_err;
-    int *cnts, *dps, hom_only, cross_check, all_sites;
-    char *cwd, **argv, *gt_fname, *plot, *query_sample, *target_sample;
-    int argc, no_PLs, narr, nsmpl;
-}
-args_t;
-
-FILE *open_file(char **fname, const char *mode, const char *fmt, ...);
-char *msprintf(const char *fmt, ...);
-void mkdir_p(const char *fmt, ...);
-
-void py_plot(char *script)
-{
-    mkdir_p(script);
-    int len = strlen(script);
-    char *cmd = !strcmp(".py",script+len-3) ? msprintf("python %s", script) : msprintf("python %s.py", script);
-    int ret = system(cmd);
-    if ( ret ) fprintf(bcftools_stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
-    free(cmd);
-}
-
-static void plot_check(args_t *args, char *target_sample, char *query_sample)
-{
-    char *fname;
-    FILE *fp = open_file(&fname, "w", "%s.py", args->plot);
-    fprintf(fp,
-            "import matplotlib as mpl\n"
-            "mpl.use('Agg')\n"
-            "import matplotlib.pyplot as plt\n"
-            "import matplotlib.gridspec as gridspec\n"
-            "import csv\n"
-            "csv.register_dialect('tab', delimiter='\\t', quoting=csv.QUOTE_NONE)\n"
-            "\n"
-            "sample_ids = False\n"
-            "\n"
-            "dat = []\n"
-            "with open('%s.tab', 'r') as f:\n"
-            "    reader = csv.reader(f, 'tab')\n"
-            "    for row in reader:\n"
-            "        if row[0][0]=='#': continue\n"
-            "        if row[0]!='CN': continue\n"
-            "        tgt = 0\n"
-            "        if row[4]=='%s': tgt = 1\n"
-            "        dat.append([float(row[1]), float(row[2]), float(row[3]), tgt, row[4]])\n"
-            "\n"
-            "dat = sorted(dat)\n"
-            "\n"
-            "iq = -1; dp = 0\n"
-            "for i in range(len(dat)):\n"
-            "    if iq==-1 and dat[i][3]==1: iq = i\n"
-            "    dp += dat[i][2]\n"
-            "dp /= len(dat)\n"
-            "\n"
-            "fig,ax1 = plt.subplots(figsize=(8,5))\n"
-            "ax2 = ax1.twinx()\n"
-            "plots  = ax1.plot([x[0] for x in dat],'o-', ms=3, color='g', mec='g', label='Discordance (total)')\n"
-            "plots += ax1.plot([x[1] for x in dat], '^', ms=3, color='r', mec='r', label='Discordance (avg per site)')\n"
-            "plots += ax2.plot([x[2] for x in dat],'v', ms=3, color='k', label='Number of sites')\n"
-            "if iq!=-1:\n"
-            "   ax1.plot([iq],[dat[iq][0]],'o',color='orange', ms=9)\n"
-            "   ax1.annotate('%s',xy=(iq,dat[iq][0]), xytext=(5,5), textcoords='offset points',fontsize='xx-small',rotation=45,va='bottom',ha='left')\n"
-            "   ax1.plot([iq],[dat[iq][1]],'^',color='red', ms=5)\n"
-            "for tl in ax1.get_yticklabels(): tl.set_color('g')\n"
-            "for tl in ax2.get_yticklabels(): tl.set_color('k'); tl.set_fontsize(9)\n"
-            "min_dp = min([x[2] for x in dat])\n"
-            "max_dp = max([x[2] for x in dat])\n"
-            "ax2.set_ylim(min_dp-1,max_dp+1)\n"
-            "ax1.set_title('Discordance with %s')\n"
-            "ax1.set_xlim(-0.05*len(dat),1.05*(len(dat)-1))\n"
-            "ax1.set_xlabel('Sample ID')\n"
-            "plt.subplots_adjust(left=0.1,right=0.9,bottom=0.1,top=0.9)\n"
-            "if sample_ids:\n"
-            "   ax1.set_xticks(range(len(dat)))\n"
-            "   ax1.set_xticklabels([x[4] for x in dat],**{'rotation':45, 'ha':'right', 'fontsize':8})\n"
-            "   plt.subplots_adjust(bottom=0.2)\n"
-            "ax1.set_ylabel('Discordance',color='g')\n"
-            "ax2.set_ylabel('Number of sites',color='k')\n"
-            "ax2.ticklabel_format(style='sci', scilimits=(-3,2), axis='y')\n"
-            "ax1.ticklabel_format(style='sci', scilimits=(-3,2), axis='y')\n"
-            "labels = [l.get_label() for l in plots]\n"
-            "plt.legend(plots,labels,numpoints=1,markerscale=1,loc='best',prop={'size':10},frameon=False)\n"
-            "plt.savefig('%s.png')\n"
-            "plt.close()\n"
-            "\n", args->plot, target_sample, target_sample, query_sample, args->plot
-           );
-    fclose(fp);
-    py_plot(fname);
-    free(fname);
-}
-
-#if 0
-static void plot_cross_check(args_t *args)
-{
-    char *fname;
-    FILE *fp = open_file(&fname, "w", "%s.py", args->plot);
-    fprintf(fp,
-            "import matplotlib as mpl\n"
-            "mpl.use('Agg')\n"
-            "import matplotlib.pyplot as plt\n"
-            "import matplotlib.gridspec as gridspec\n"
-            "import csv\n"
-            "csv.register_dialect('tab', delimiter='\\t', quoting=csv.QUOTE_NONE)\n"
-            "avg   = []\n"
-            "dp    = []\n"
-            "sm2id = {}\n"
-            "dat   = None\n"
-            "min   = None\n"
-            "max   = None\n"
-            "with open('%s.tab', 'r') as f:\n"
-            "   reader = csv.reader(f, 'tab')\n"
-            "   i = 0\n"
-            "   for row in reader:\n"
-            "       if row[0]=='SM':\n"
-            "           sm2id[row[4]] = i\n"
-            "           avg.append([i,float(row[1])])\n"
-            "           dp.append([i,float(row[2])])\n"
-            "           i += 1\n"
-            "       elif row[0]=='CN':\n"
-            "           val = 0\n"
-            "           if int(row[2])!=0: val = float(row[1])/int(row[2])\n"
-            "           if not dat:\n"
-            "               dat = [[0]*len(sm2id) for x in xrange(len(sm2id))]\n"
-            "               min = val\n"
-            "               max = val\n"
-            "           id_i = sm2id[row[4]]\n"
-            "           id_j = sm2id[row[5]]\n"
-            "           dat[id_i][id_j] = val\n"
-            "           dat[id_j][id_i] = val\n"
-            "           if min > val: min = val\n"
-            "           if max < val: max = val\n"
-            "\n"
-            "if len(sm2id)<=1: exit(1)\n"
-            "if min==max: exit(1)\n"
-            "\n"
-            "fig = plt.figure(figsize=(6,7))\n"
-            "gs  = gridspec.GridSpec(2, 1, height_ratios=[1, 1.5])\n"
-            "ax1 = plt.subplot(gs[0])\n"
-            "ax2 = plt.subplot(gs[1])\n"
-            "\n"
-            "ax1.plot([x[0] for x in avg],[x[1] for x in avg],'^-', ms=3, color='k')\n"
-            "ax3 = ax1.twinx()\n"
-            "ax3.plot([x[0] for x in dp],[x[1] for x in dp],'^-', ms=3, color='r',mec='r')\n"
-            "for tl in ax3.get_yticklabels():\n"
-            "   tl.set_color('r')\n"
-            "   tl.set_fontsize(9)\n"
-            "\n"
-            "im = ax2.imshow(dat,clim=(min),interpolation='nearest',origin='lower')\n"
-            "cb1  = plt.colorbar(im,ax=ax2)\n"
-            "cb1.set_label('Pairwise discordance')\n"
-            "for t in cb1.ax.get_yticklabels(): t.set_fontsize(9)\n"
-            "\n"
-            "ax1.tick_params(axis='both', which='major', labelsize=9)\n"
-            "ax1.tick_params(axis='both', which='minor', labelsize=9)\n"
-            "ax2.tick_params(axis='both', which='major', labelsize=9)\n"
-            "ax2.tick_params(axis='both', which='minor', labelsize=9)\n"
-            "\n"
-            "ax1.set_title('Sample Discordance Score')\n"
-            "ax2.set_ylabel('Sample ID')\n"
-            "ax2.set_xlabel('Sample ID')\n"
-            "ax3.set_ylabel('Average Depth',color='r')\n"
-            "ax1.set_xlabel('Sample ID')\n"
-            "ax1.set_ylabel('Average discordance')\n"
-            "\n"
-            "plt.subplots_adjust(left=0.15,right=0.87,bottom=0.08,top=0.93,hspace=0.25)\n"
-            "plt.savefig('%s.png')\n"
-            "plt.close()\n"
-            "\n", args->plot,args->plot
-           );
-    fclose(fp);
-    py_plot(fname);
-    free(fname);
-}
-#endif
-
-static void init_data(args_t *args)
-{
-    args->sm_hdr = args->files->readers[0].header;
-    if ( !bcf_hdr_nsamples(args->sm_hdr) ) error("No samples in %s?\n", args->files->readers[0].fname);
-
-    if ( !args->cross_check )
-    {
-        args->gt_hdr = args->files->readers[1].header;
-        int nsamples = bcf_hdr_nsamples(args->gt_hdr);
-        if ( !nsamples ) error("No samples in %s?\n", args->files->readers[1].fname);
-        args->lks   = (double*) calloc(nsamples,sizeof(double));
-        args->cnts  = (int*) calloc(nsamples,sizeof(int));
-        args->sites = (double*) calloc(nsamples,sizeof(double));
-        args->dps   = (int*) calloc(nsamples,sizeof(int));
-    }
+    int iqry, igt;
 }
+pair_t;
 
-static void destroy_data(args_t *args)
-{
-    free(args->lks); free(args->cnts); free(args->dps); free(args->cwd); free(args->sites);
-}
-
-static int allele_to_int(bcf1_t *line, char *allele)
+typedef struct
 {
-    int i;
-    for (i=0; i<line->n_allele; i++)
-        if ( !strcmp(allele,line->d.allele[i]) ) return i;
-    if ( strcmp(line->d.allele[i-1],"X") ) return -1;
-    return i-1;
-}
+    bcf_srs_t *files;           // first reader is the query VCF - single sample normally or multi-sample for cross-check
+    bcf_hdr_t *gt_hdr, *qry_hdr; // VCF with genotypes to compare against and the query VCF
+    char *cwd, **argv, *gt_samples, *qry_samples, *regions, *targets, *qry_fname, *gt_fname, *pair_samples;
+    int argc, gt_samples_is_file, qry_samples_is_file, regions_is_file, targets_is_file, pair_samples_is_file;
+    int qry_use_GT,gt_use_GT, nqry_smpl,ngt_smpl, *qry_smpl,*gt_smpl;
+    double *pdiff, *qry_prob, *gt_prob;
+    uint32_t *ndiff,*ncnt,ncmp, npairs;
+    int32_t *qry_arr,*gt_arr, nqry_arr,ngt_arr;
+    uint8_t *qry_dsg, *gt_dsg;
+    pair_t *pairs;
+    double *hwe_prob, dsg2prob[8][3], pl2prob[256];
+    double min_inter_err, max_intra_err;
+    int all_sites, hom_only, ntop, cross_check, calc_hwe_prob, sort_by_hwe, dry_run, use_PLs;
+    FILE *fp;
+    unsigned int nskip_no_match, nskip_not_ba, nskip_mono, nskip_no_data, nskip_dip_GT, nskip_dip_PL;
 
-static int init_gt2ipl(args_t *args, bcf1_t *gt_line, bcf1_t *sm_line, int *gt2ipl, int n_gt2ipl)
-{
-    int i, j;
-    for (i=0; i<n_gt2ipl; i++) gt2ipl[i] = -1;
-    for (i=0; i<gt_line->n_allele; i++)
-    {
-        // find which of the sm_alleles (k) corresponds to the gt_allele (i)
-        int k = allele_to_int(sm_line, gt_line->d.allele[i]);
-        if ( k<0 ) return 0;
-        for (j=0; j<=i; j++)
-        {
-            int l = allele_to_int(sm_line, gt_line->d.allele[j]);
-            if ( l<0 ) return 0;
-            gt2ipl[ bcf_ij2G(j,i) ] = k<=l ? bcf_ij2G(k,l) : bcf_ij2G(l,k);
-        }
-    }
-    //for (i=0; i<n_gt2ipl; i++) fprintf(bcftools_stdout, "%d .. %d\n", i,gt2ipl[i]);
-    return 1;
+    // for --distinctive-sites
+    double distinctive_sites;
+    kbitset_t *kbs_diff;
+    size_t diff_sites_size;
+    extsort_t *es;
+    char *es_tmp_prefix, *es_max_mem;
 }
+args_t;
 
 static void set_cwd(args_t *args)
 {
@@ -286,7 +94,6 @@ static void set_cwd(args_t *args)
     }
     assert(buf);
 }
-
 static void print_header(args_t *args, FILE *fp)
 {
     fprintf(fp, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version());
@@ -298,413 +105,920 @@ static void print_header(args_t *args, FILE *fp)
     fprintf(fp, "# \t %s\n#\n", args->cwd);
 }
 
-static int fake_PLs(args_t *args, bcf_hdr_t *hdr, bcf1_t *line)
+static int cmp_int(const void *_a, const void *_b)
 {
-    // PLs not present, use GTs instead.
-    int fake_PL = args->no_PLs ? args->no_PLs : 99;    // with 1, discordance is the number of non-matching GTs
-    int nsm_gt, i;
-    if ( (nsm_gt=bcf_get_genotypes(hdr, line, &args->tmp_arr, &args->ntmp_arr)) <= 0 )
-        error("GT not present at %s:%"PRId64"?\n", hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) line->pos+1);
-    nsm_gt /= bcf_hdr_nsamples(hdr);
-    int npl = line->n_allele*(line->n_allele+1)/2;
-    hts_expand(int,npl*bcf_hdr_nsamples(hdr),args->npl_arr,args->pl_arr);
-    for (i=0; i<bcf_hdr_nsamples(hdr); i++)
-    {
-        int *gt_ptr = args->tmp_arr + i*nsm_gt;
-        int j, *pl_ptr = args->pl_arr + i*npl;
-        if ( bcf_gt_is_missing(gt_ptr[0]) || bcf_gt_is_missing(gt_ptr[1]) ) // missing genotype
-        {
-            for (j=0; j<npl; j++) pl_ptr[j] = -1;
-        }
-        else
-        {
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            for (j=0; j<npl; j++) pl_ptr[j] = fake_PL;
-            int idx = bcf_alleles2gt(a,b);
-            pl_ptr[idx] = 0;
-        }
-    }
-    return npl;
+    int a = *((int*)_a);
+    int b = *((int*)_b);
+    if ( a < b ) return -1;
+    if ( a > b ) return 1;
+    return 0;
+}
+static int cmp_pair(const void *_a, const void *_b)
+{
+    pair_t *a = (pair_t*)_a;
+    pair_t *b = (pair_t*)_b;
+    if ( a->iqry < b->iqry ) return -1;
+    if ( a->iqry > b->iqry ) return 1;
+    if ( a->igt < b->igt ) return -1;
+    if ( a->igt > b->igt ) return 1;
+    return 0;
 }
 
-static int cmp_doubleptr(const void *_a, const void *_b)
+typedef struct
+{
+    uint32_t ndiff,rid,pos,rand; // rand is to shuffle sites with the same ndiff from across all chromosoms
+    unsigned long kbs_dat[1];
+}
+diff_sites_t;
+#if DBG
+static void diff_sites_debug_print(args_t *args, diff_sites_t *ds)
+{
+    int i;
+    memcpy(args->kbs_diff->b,ds->kbs_dat,args->kbs_diff->n*sizeof(unsigned long));
+    fprintf(bcftools_stderr,"%s:%d\t%d\t",bcf_hdr_id2name(args->qry_hdr,ds->rid),ds->pos+1,ds->ndiff);
+    for (i=0; i<args->npairs; i++) fprintf(bcftools_stderr,"%d",kbs_exists(args->kbs_diff,i)?1:0);
+    fprintf(bcftools_stderr,"\n");
+}
+#endif
+static int diff_sites_cmp(const void *aptr, const void *bptr)
+{
+    diff_sites_t *a = *((diff_sites_t**)aptr);
+    diff_sites_t *b = *((diff_sites_t**)bptr);
+    if ( a->ndiff < b->ndiff ) return 1;        // descending order
+    if ( a->ndiff > b->ndiff ) return -1;
+    if ( a->rand < b->rand ) return -1;
+    if ( a->rand > b->rand ) return 1;
+    return 0;
+}
+static void diff_sites_init(args_t *args)
+{
+    int nsites = args->distinctive_sites<=1 ? args->npairs*args->distinctive_sites : args->distinctive_sites;
+    if ( nsites<=0 ) error("The value for --distinctive-sites was set too low: %d\n",nsites);
+    if ( nsites > args->npairs )
+    {
+        fprintf(bcftools_stderr,"Warning: The value for --distinctive-sites is bigger than is the number of pairs, all discordant sites be printed.\n");
+        nsites = args->npairs;
+        args->distinctive_sites = args->npairs + 1;
+    }
+    else
+        args->distinctive_sites = nsites;
+    args->kbs_diff = kbs_init(args->npairs);
+    size_t n = (args->npairs + KBS_ELTBITS-1) / KBS_ELTBITS;
+    assert( n==args->kbs_diff->n );
+    args->diff_sites_size = sizeof(diff_sites_t) + (n-1)*sizeof(unsigned long);
+    args->es = extsort_alloc();
+    extsort_set_opt(args->es,size_t,DAT_SIZE,args->diff_sites_size);
+    extsort_set_opt(args->es,const char*,TMP_PREFIX,args->es_tmp_prefix);
+    extsort_set_opt(args->es,const char*,MAX_MEM,args->es_max_mem);
+    extsort_set_opt(args->es,extsort_cmp_f,FUNC_CMP,diff_sites_cmp);
+    extsort_init(args->es);
+}
+static void diff_sites_destroy(args_t *args)
 {
-    double *a = *((double**)_a);
-    double *b = *((double**)_b);
-    if ( *a < *b ) return -1;
-    else if ( *a == *b ) return 0;
+    kbs_destroy(args->kbs_diff);
+    extsort_destroy(args->es);
+}
+static inline void diff_sites_reset(args_t *args)
+{
+    kbs_clear(args->kbs_diff);
+}
+static inline void diff_sites_push(args_t *args, int ndiff, int rid, int pos)
+{
+    diff_sites_t *dat = (diff_sites_t*) malloc(args->diff_sites_size);
+    memset(dat,0,sizeof(*dat)); // for debugging: prevent warnings about uninitialized memory coming from struct padding (not needed after rand added)
+    dat->ndiff = ndiff;
+    dat->rid  = rid;
+    dat->pos  = pos;
+    dat->rand = hts_lrand48();
+    memcpy(dat->kbs_dat,args->kbs_diff->b,args->kbs_diff->n*sizeof(unsigned long));
+    extsort_push(args->es,dat);
+}
+static inline int diff_sites_shift(args_t *args, int *ndiff, int *rid, int *pos)
+{
+    diff_sites_t *dat = (diff_sites_t*) extsort_shift(args->es);
+    if ( !dat ) return 0;
+    *ndiff = dat->ndiff;
+    *rid   = dat->rid;
+    *pos   = dat->pos;
+    memcpy(args->kbs_diff->b,dat->kbs_dat,args->kbs_diff->n*sizeof(unsigned long));
     return 1;
 }
 
-static void check_gt(args_t *args)
+static void init_samples(char *list, int list_is_file, int **smpl, int *nsmpl, bcf_hdr_t *hdr, char *vcf_fname)
 {
-    int i,ret, *gt2ipl = NULL, m_gt2ipl = 0, *gt_arr = NULL, ngt_arr = 0;
-    int fake_pls = args->no_PLs;
+    int i;
+    if ( !strcmp(list,"-") )
+    {
+        *nsmpl = bcf_hdr_nsamples(hdr);
+        *smpl  = (int*) malloc(sizeof(**smpl)*(*nsmpl));
+        for (i=0; i<*nsmpl; i++) (*smpl)[i] = i;
+        return;
+    }
 
-    // Initialize things: check which tags are defined in the header, sample names etc.
-    if ( bcf_hdr_id2int(args->gt_hdr, BCF_DT_ID, "GT")<0 ) error("[E::%s] GT not present in the header of %s?\n", __func__, args->files->readers[1].fname);
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
+    char **tmp = hts_readlist(list, list_is_file, nsmpl);
+    if ( !tmp || !*nsmpl ) error("Failed to parse %s\n", list);
+    *smpl = (int*) malloc(sizeof(**smpl)*(*nsmpl));
+    for (i=0; i<*nsmpl; i++)
     {
-        if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
-            error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs )
-            fprintf(bcftools_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-        fake_pls = 1;
+        int idx = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, tmp[i]);
+        if ( idx<0 ) error("No such sample in %s: [%s]\n",vcf_fname,tmp[i]);
+        (*smpl)[i] = idx;
+        free(tmp[i]);
     }
+    free(tmp);
+    qsort(*smpl,*nsmpl,sizeof(**smpl),cmp_int);
+    // check for duplicates
+    for (i=1; i<*nsmpl; i++)
+        if ( (*smpl)[i-1]==(*smpl)[i] )
+            error("Error: the sample \"%s\" is listed twice in %s\n", hdr->samples[(*smpl)[i]],list);
+}
 
-    FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : bcftools_stdout;
-    print_header(args, fp);
+static void init_data(args_t *args)
+{
+    hts_srand48(0);
 
-    int tgt_isample = -1, query_isample = 0;
-    if ( args->target_sample )
+    args->files = bcf_sr_init();
+    if ( args->regions && bcf_sr_set_regions(args->files, args->regions, args->regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions);
+    if ( args->targets && bcf_sr_set_targets(args->files, args->targets, args->targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets);
+
+    if ( args->gt_fname ) bcf_sr_set_opt(args->files, BCF_SR_REQUIRE_IDX);
+    if ( !bcf_sr_add_reader(args->files,args->qry_fname) ) error("Failed to open %s: %s\n", args->qry_fname,bcf_sr_strerror(args->files->errnum));
+    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) )
+        error("Failed to read from %s: %s\n", !strcmp("-",args->gt_fname)?"standard input":args->gt_fname,bcf_sr_strerror(args->files->errnum));
+
+    args->qry_hdr = bcf_sr_get_header(args->files,0);
+    if ( !bcf_hdr_nsamples(args->qry_hdr) ) error("No samples in %s?\n", args->qry_fname);
+    if ( args->gt_fname )
     {
-        tgt_isample = bcf_hdr_id2int(args->gt_hdr, BCF_DT_SAMPLE, args->target_sample);
-        if ( tgt_isample<0 ) error("No such sample in %s: [%s]\n", args->files->readers[1].fname, args->target_sample);
+        args->gt_hdr = bcf_sr_get_header(args->files,1);
+        if ( !bcf_hdr_nsamples(args->gt_hdr) ) error("No samples in %s?\n", args->gt_fname);
     }
-    if ( args->all_sites )
+
+    // Determine whether GT or PL will be used
+    if ( args->qry_use_GT==-1 ) // not set by -u, qry uses PL by default
     {
-        if ( tgt_isample==-1 )
-        {
-            fprintf(bcftools_stderr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
-            tgt_isample = 0;
-        }
+        if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"PL")>=0 )
+            args->qry_use_GT = 0;
+        else if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"GT")>=0 )
+            args->qry_use_GT = 1;
+        else
+            error("[E::%s] Neither PL nor GT tag is present in the header of %s\n", __func__, args->qry_fname);
     }
-    if ( args->query_sample )
+    else if ( args->qry_use_GT==1 )
     {
-        query_isample = bcf_hdr_id2int(args->sm_hdr, BCF_DT_SAMPLE, args->query_sample);
-        if ( query_isample<0 ) error("No such sample in %s: [%s]\n", args->files->readers[0].fname, args->query_sample);
+        if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"GT")<0 )
+            error("[E::%s] The GT tag is not present in the header of %s\n", __func__, args->qry_fname);
     }
-    if ( args->all_sites )
-        fprintf(fp, "# [1]SC, Site by Site Comparison\t[2]Chromosome\t[3]Position\t[4]-g alleles\t[5]-g GT (%s)\t[6]match log LK\t[7]Query alleles\t[8-]Query PLs (%s)\n",
-                args->gt_hdr->samples[tgt_isample],args->sm_hdr->samples[query_isample]);
+    else if ( bcf_hdr_id2int(args->qry_hdr,BCF_DT_ID,"PL")<0 )
+        error("[E::%s] The PL tag is not present in the header of %s\n", __func__, args->qry_fname);
 
-    // Main loop
-    float prev_lk = 0;
-    while ( (ret=bcf_sr_next_line(args->files)) )
+    if ( args->gt_hdr )
     {
-        if ( ret!=2 ) continue;
-        bcf1_t *sm_line = args->files->readers[0].buffer[0];    // the query file
-        bcf1_t *gt_line = args->files->readers[1].buffer[0];    // the -g target file
-        bcf_unpack(sm_line, BCF_UN_FMT);
-        bcf_unpack(gt_line, BCF_UN_FMT);
-
-        // Init mapping from target genotype index to the sample's PL fields
-        int n_gt2ipl = gt_line->n_allele*(gt_line->n_allele + 1)/2;
-        if ( n_gt2ipl > m_gt2ipl )
+        if ( args->gt_use_GT==-1 ) // not set by -u, gt uses GT by default
+        {
+            if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"GT")>=0 )
+                args->gt_use_GT = 1;
+            else if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"PL")>=0 )
+                args->gt_use_GT = 0;
+            else
+                error("[E::%s] Neither PL nor GT tag is present in the header of %s\n", __func__, args->gt_fname);
+        }
+        else if ( args->gt_use_GT==1 )
         {
-            m_gt2ipl = n_gt2ipl;
-            gt2ipl   = (int*) realloc(gt2ipl, sizeof(int)*m_gt2ipl);
+            if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"GT")<0 )
+                error("[E::%s] The GT tag is not present in the header of %s\n", __func__, args->gt_fname);
         }
-        if ( !init_gt2ipl(args, gt_line, sm_line, gt2ipl, n_gt2ipl) ) continue;
-
-        // Target genotypes
-        int ngt, npl;
-        if ( (ngt=bcf_get_genotypes(args->gt_hdr, gt_line, &gt_arr, &ngt_arr)) <= 0 )
-            error("GT not present at %s:%"PRId64"?", args->gt_hdr->id[BCF_DT_CTG][gt_line->rid].key, (int64_t) gt_line->pos+1);
-        ngt /= bcf_hdr_nsamples(args->gt_hdr);
-        if ( ngt!=2 ) continue; // checking only diploid genotypes
+        else if ( bcf_hdr_id2int(args->gt_hdr,BCF_DT_ID,"PL")<0 )
+            error("[E::%s] The PL tag is not present in the header of %s\n", __func__, args->gt_fname);
+    }
+    else
+        args->gt_use_GT = args->qry_use_GT;
 
-        // Sample PLs
-        if ( !fake_pls )
+    // Prepare samples
+    int i,j;
+    args->nqry_smpl = bcf_hdr_nsamples(args->qry_hdr);
+    if ( args->qry_samples )
+    {
+        init_samples(args->qry_samples, args->qry_samples_is_file, &args->qry_smpl, &args->nqry_smpl, args->qry_hdr, args->qry_fname);
+    }
+    if ( args->gt_samples )
+    {   
+        init_samples(args->gt_samples, args->gt_samples_is_file, &args->gt_smpl, &args->ngt_smpl,
+            args->gt_hdr ? args->gt_hdr : args->qry_hdr,
+            args->gt_fname ? args->gt_fname : args->qry_fname);
+    }
+    else if ( args->pair_samples )
+    {
+        int npairs;
+        char **tmp = hts_readlist(args->pair_samples, args->pair_samples_is_file, &npairs);
+        if ( !tmp || !npairs ) error("Failed to parse %s\n", args->pair_samples);
+        if ( !args->pair_samples_is_file && npairs%2 ) error("Expected even number of comma-delimited samples with -p\n");
+        args->npairs = args->pair_samples_is_file ? npairs : npairs/2;
+        args->pairs  = (pair_t*) calloc(args->npairs,sizeof(*args->pairs));
+        if ( !args->pair_samples_is_file )
         {
-            if ( (npl=bcf_get_format_int32(args->sm_hdr, sm_line, "PL", &args->pl_arr, &args->npl_arr)) <= 0 )
+            for (i=0; i<args->npairs; i++)
             {
-                if ( sm_line->n_allele==1 )
-                {
-                    // PL values may not be present when ALT=. (mpileup/bcftools output), in that case 
-                    // switch automatically to GT at these sites
-                    npl = fake_PLs(args, args->sm_hdr, sm_line);
-                }
-                else
-                    error("PL not present at %s:%"PRId64"?\n", args->sm_hdr->id[BCF_DT_CTG][sm_line->rid].key, (int64_t) sm_line->pos+1);
+                args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[2*i]);
+                args->pairs[i].igt  = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, tmp[2*i+1]);
+                if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[2*i]);
+                if ( args->pairs[i].igt  < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,tmp[2*i+1]);
+                free(tmp[2*i]);
+                free(tmp[2*i+1]);
             }
-            else
-                npl /= bcf_hdr_nsamples(args->sm_hdr);
         }
         else
-            npl = fake_PLs(args, args->sm_hdr, sm_line);
+        {
+            for (i=0; i<args->npairs; i++)
+            {
+                char *ptr = tmp[i];
+                while ( *ptr && !isspace(*ptr) ) ptr++;
+                if ( !*ptr ) error("Could not parse %s: %s\n",args->pair_samples,tmp[i]);
+                *ptr = 0;
+                args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[i]);
+                if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[i]);
+                ptr++;
+                while ( *ptr && isspace(*ptr) ) ptr++;
+                args->pairs[i].igt = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, ptr);
+                if ( args->pairs[i].igt < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,ptr);
+                free(tmp[i]);
+            }
+        }
+        free(tmp);
+        qsort(args->pairs,args->npairs,sizeof(*args->pairs),cmp_pair);
+    }
+    else if ( args->gt_hdr )
+        args->ngt_smpl = bcf_hdr_nsamples(args->gt_hdr);
+    if ( !args->ngt_smpl )
+    {
+        args->ngt_smpl = args->nqry_smpl;
+        args->gt_smpl  = args->qry_smpl;
+        args->cross_check = 1;
+    }
+
+    // The data arrays
+    if ( !args->npairs ) args->npairs = args->cross_check ? args->nqry_smpl*(args->nqry_smpl+1)/2 : args->ngt_smpl*args->nqry_smpl;
+    if ( !args->pair_samples )
+    {
+        args->qry_dsg = (uint8_t*) malloc(args->nqry_smpl);
+        args->gt_dsg  = args->cross_check ? args->qry_dsg : (uint8_t*) malloc(args->ngt_smpl);
+    }
+    if ( args->use_PLs )
+    {
+        args->pdiff = (double*) calloc(args->npairs,sizeof(*args->pdiff));      // log probability of pair samples being the same
+        args->qry_prob = (double*) malloc(3*args->nqry_smpl*sizeof(*args->qry_prob));
+        args->gt_prob  = args->cross_check ? args->qry_prob : (double*) malloc(3*args->ngt_smpl*sizeof(*args->gt_prob));
+
+        // dsg2prob: the first index is bitmask of 8 possible dsg combinations (only 1<<0,1<<2,1<<3 are set, accessing
+        // anything else indicated an error, this is just to reuse gt_to_dsg()); the second index are the corresponding 
+        // probabilities of 0/0, 0/1, and 1/1 genotypes
+        for (i=0; i<8; i++)
+            for (j=0; j<3; j++)
+                args->dsg2prob[i][j] = HUGE_VAL;
+        args->dsg2prob[1][0] = -log(1-pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[1][1] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[1][2] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][0] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][1] = -log(1-pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[2][2] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][0] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][1] = -log(0.5*pow(10,-0.1*args->use_PLs));
+        args->dsg2prob[4][2] = -log(1-pow(10,-0.1*args->use_PLs));
 
-        // Calculate likelihoods for all samples, assuming diploid genotypes
+        // lookup table to avoid exponentiation
+        for (i=0; i<256; i++) args->pl2prob[i] = pow(10,-0.1*i);
+    }
+    else
+        args->ndiff = (uint32_t*) calloc(args->npairs,sizeof(*args->ndiff));    // number of differing genotypes for each pair of samples
+    args->ncnt  = (uint32_t*) calloc(args->npairs,sizeof(*args->ncnt));         // number of comparisons performed (non-missing data)
+    if ( !args->ncnt ) error("Error: failed to allocate %.1f Mb\n", args->npairs*sizeof(*args->ncnt)/1e6);
+    if ( args->calc_hwe_prob )
+    {
+        // prob of the observed sequence of matches given site AFs and HWE
+        args->hwe_prob = (double*) calloc(args->npairs,sizeof(*args->hwe_prob));
+        if ( !args->hwe_prob ) error("Error: failed to allocate %.1f Mb. Run with --no-HWE-prob to save some memory.\n", args->npairs*sizeof(*args->hwe_prob)/1e6);
+    }
+
+    if ( args->distinctive_sites ) diff_sites_init(args);
+
+    args->fp = bcftools_stdout;
+    print_header(args, args->fp);
+}
+
+static void destroy_data(args_t *args)
+{
+    if ( args->gt_dsg!=args->qry_dsg ) free(args->gt_dsg);
+    free(args->qry_dsg);
+    if ( args->gt_prob!=args->qry_prob ) free(args->gt_prob);
+    free(args->qry_prob);
+    free(args->es_max_mem);
+    fclose(args->fp);
+    if ( args->distinctive_sites ) diff_sites_destroy(args);
+    free(args->hwe_prob);
+    free(args->cwd);
+    free(args->qry_arr);
+    if ( args->gt_hdr ) free(args->gt_arr);
+    free(args->pdiff);
+    free(args->ndiff);
+    free(args->ncnt);
+    free(args->qry_smpl);
+    if ( args->gt_smpl!=args->qry_smpl ) free(args->gt_smpl);
+    free(args->pairs);
+    bcf_sr_destroy(args->files);
+}
 
-        // For faster access to genotype likelihoods (PLs) of the query sample
-        int max_ipl, *pl_ptr = args->pl_arr + query_isample*npl;
-        double sum_pl = 0; // for converting PLs to probs
-        for (max_ipl=0; max_ipl<npl; max_ipl++)
+static inline uint8_t gt_to_dsg(int32_t *ptr)
+{
+    if ( bcf_gt_is_missing(ptr[0]) || bcf_gt_is_missing(ptr[1]) || ptr[1]==bcf_int32_vector_end ) return 0;
+    uint8_t dsg = (bcf_gt_allele(ptr[0])?1:0) + (bcf_gt_allele(ptr[1])?1:0);
+    return 1<<dsg;
+}
+static inline uint8_t pl_to_dsg(int32_t *ptr)
+{
+    if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing || ptr[2]==bcf_int32_missing ) return 0;
+    if ( ptr[1]==bcf_int32_vector_end || ptr[2]==bcf_int32_vector_end ) return 0;
+    int min_pl = ptr[0]<ptr[1] ? (ptr[0]<ptr[2]?ptr[0]:ptr[2]) : (ptr[1]<ptr[2]?ptr[1]:ptr[2]);
+    uint8_t dsg = 0;
+    if ( ptr[0]==min_pl ) dsg |= 1;
+    if ( ptr[1]==min_pl ) dsg |= 2;
+    if ( ptr[2]==min_pl ) dsg |= 4;
+    return dsg;
+}
+static inline uint8_t gt_to_prob(args_t *args, int32_t *ptr, double *prob)
+{
+    uint8_t dsg = gt_to_dsg(ptr);
+    if ( dsg )
+    {
+        prob[0] = args->dsg2prob[dsg][0];
+        prob[1] = args->dsg2prob[dsg][1];
+        prob[2] = args->dsg2prob[dsg][2];
+    }
+    return dsg;
+}
+static inline uint8_t pl_to_prob(args_t *args, int32_t *ptr, double *prob)
+{
+    uint8_t dsg = pl_to_dsg(ptr);
+    if ( dsg )
+    {
+        prob[0] = (ptr[0]>=0 && ptr[0]<255) ? args->pl2prob[ptr[0]] : args->pl2prob[255];
+        prob[1] = (ptr[1]>=0 && ptr[1]<255) ? args->pl2prob[ptr[1]] : args->pl2prob[255];
+        prob[2] = (ptr[2]>=0 && ptr[2]<255) ? args->pl2prob[ptr[2]] : args->pl2prob[255];
+        double sum = prob[0] + prob[1] + prob[2];
+        prob[0] /= sum;
+        prob[1] /= sum;
+        prob[2] /= sum;
+        prob[0] = -log(prob[0]);
+        prob[1] = -log(prob[1]);
+        prob[2] = -log(prob[2]);
+    }
+    return dsg;
+}
+static int set_data(args_t *args, bcf_hdr_t *hdr, bcf1_t *rec, int32_t **arr, int32_t *narr, int *narr1, int *use_GT)
+{
+    static int warn_dip_GT = 1;
+    static int warn_dip_PL = 1;
+    int i;
+    for (i=0; i<2; i++)
+    {
+        if ( *use_GT )
         {
-            if ( pl_ptr[max_ipl]==bcf_int32_vector_end ) break;
-            if ( pl_ptr[max_ipl]==bcf_int32_missing ) continue;
-            sum_pl += pow(10, -0.1*pl_ptr[max_ipl]);
+            int ret = bcf_get_genotypes(hdr,rec,arr,narr);
+            if ( ret < 0 )
+            {
+                if ( !i ) { *use_GT = 0; continue; }
+                args->nskip_no_data++;
+                return -1;
+            }
+            if ( ret != 2*bcf_hdr_nsamples(hdr) )
+            {
+                if ( warn_dip_GT )
+                {
+                    fprintf(bcftools_stderr,"INFO: skipping %s:%"PRIhts_pos", only diploid FORMAT/GT fields supported. (This is printed only once.)\n", bcf_seqname(hdr,rec),rec->pos+1);
+                    warn_dip_GT = 0;
+                }
+                args->nskip_dip_GT++;
+                return -1;
+            }
+            *narr1 = 2;
+            return 0;
         }
-        if ( sum_pl==0 ) continue; // no PLs present
-        if ( fake_pls && args->no_PLs==1 ) sum_pl = -1;
 
-        // The main stats: concordance of the query sample with the target -g samples
-        for (i=0; i<bcf_hdr_nsamples(args->gt_hdr); i++)
+        int ret = bcf_get_format_int32(hdr,rec,"PL",arr,narr);
+        if ( ret < 0 )
         {
-            int *gt_ptr = gt_arr + i*ngt;
-            if ( gt_ptr[1]==bcf_int32_vector_end ) continue;    // skip haploid genotypes
-            if ( bcf_gt_is_missing(gt_ptr[0]) || bcf_gt_is_missing(gt_ptr[1]) ) continue;
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            if ( args->hom_only && a!=b ) continue; // heterozygous genotype
-            int igt_tgt = igt_tgt = bcf_alleles2gt(a,b); // genotype index in the target file
-            int igt_qry = gt2ipl[igt_tgt];  // corresponding genotype in query file
-            if ( igt_qry>=max_ipl || pl_ptr[igt_qry]<0 ) continue;   // genotype not present in query sample: haploid or missing
-            args->lks[i] += sum_pl<0 ? -pl_ptr[igt_qry] : log(pow(10, -0.1*pl_ptr[igt_qry])/sum_pl);
-            args->sites[i]++;
+            if ( !i ) { *use_GT = 1; continue; }
+            args->nskip_no_data++;
+            return -1;
         }
-        if ( args->all_sites )
+        if ( ret != 3*bcf_hdr_nsamples(hdr) )
         {
-            // Print LKs at all sites for debugging
-            int *gt_ptr = gt_arr + tgt_isample*ngt;
-            if ( gt_ptr[1]==bcf_int32_vector_end ) continue;    // skip haploid genotypes
-            int a = bcf_gt_allele(gt_ptr[0]);
-            int b = bcf_gt_allele(gt_ptr[1]);
-            if ( args->hom_only && a!=b ) continue; // heterozygous genotype
-            fprintf(fp, "SC\t%s\t%"PRId64, args->gt_hdr->id[BCF_DT_CTG][gt_line->rid].key, (int64_t) gt_line->pos+1);
-            for (i=0; i<gt_line->n_allele; i++) fprintf(fp, "%c%s", i==0?'\t':',', gt_line->d.allele[i]);
-            fprintf(fp, "\t%s/%s", a>=0 ? gt_line->d.allele[a] : ".", b>=0 ? gt_line->d.allele[b] : ".");
-            fprintf(fp, "\t%f", args->lks[query_isample]-prev_lk);
-            prev_lk = args->lks[query_isample];
-
-            int igt, *pl_ptr = args->pl_arr + query_isample*npl; // PLs of the query sample
-            for (i=0; i<sm_line->n_allele; i++) fprintf(fp, "%c%s", i==0?'\t':',', sm_line->d.allele[i]);
-            for (igt=0; igt<npl; igt++)
-                if ( pl_ptr[igt]==bcf_int32_vector_end ) break;
-                else if ( pl_ptr[igt]==bcf_int32_missing ) fprintf(fp, ".");
-                else fprintf(fp, "\t%d", pl_ptr[igt]);
-            fprintf(fp, "\n");
+            if ( warn_dip_PL )
+            {
+                fprintf(bcftools_stderr,"INFO: skipping %s:%"PRIhts_pos", only diploid FORMAT/PL fields supported. (This is printed only once.)\n", bcf_seqname(hdr,rec),rec->pos+1);
+                warn_dip_PL = 0;
+            }
+            args->nskip_dip_PL++;
+            return -1;
         }
+        *narr1 = 3;
+        return 0;
     }
-    free(gt2ipl);
-    free(gt_arr);
-    free(args->pl_arr);
-    free(args->tmp_arr);
+    return -1;  // should never reach
+}
+static void process_line(args_t *args)
+{
+    int i,j,k, nqry1, ngt1, ret;
+
+    bcf1_t *gt_rec = NULL, *qry_rec = bcf_sr_get_line(args->files,0);   // the query file
+    int qry_use_GT = args->qry_use_GT;
+    int gt_use_GT  = args->gt_use_GT;
+
+    ret = set_data(args, args->qry_hdr, qry_rec, &args->qry_arr, &args->nqry_arr, &nqry1, &qry_use_GT);
+    if ( ret<0 ) return;
 
-    // To be able to plot total discordance (=number of mismatching GTs with -G1) in the same
-    // plot as discordance per site, the latter must be scaled to the same range
-    int nsamples = bcf_hdr_nsamples(args->gt_hdr);
-    double extreme_lk = 0, extreme_lk_per_site = 0;
-    for (i=0; i<nsamples; i++)
+    if ( args->gt_hdr )
     {
-        if ( args->lks[i] < extreme_lk ) extreme_lk = args->lks[i];
-        if ( args->sites[i] && args->lks[i]/args->sites[i] < extreme_lk_per_site ) extreme_lk_per_site = args->lks[i]/args->sites[i];
+        gt_rec = bcf_sr_get_line(args->files,1);
+        ret = set_data(args, args->gt_hdr, gt_rec, &args->gt_arr, &args->ngt_arr, &ngt1, &gt_use_GT);
+        if ( ret<0 ) return;
+    }
+    else
+    {
+        ngt1 = nqry1;
+        args->gt_arr = args->qry_arr;
     }
 
-    // Sorted output
-    double **p = (double**) malloc(sizeof(double*)*nsamples);
-    for (i=0; i<nsamples; i++) p[i] = &args->lks[i];
-    qsort(p, nsamples, sizeof(int*), cmp_doubleptr);
+    args->ncmp++;
 
-    fprintf(fp, "# [1]CN\t[2]Discordance with %s (total)\t[3]Discordance (avg score per site)\t[4]Number of sites compared\t[5]Sample\t[6]Sample ID\n", args->sm_hdr->samples[query_isample]);
-    for (i=0; i<nsamples; i++)
+    double af,hwe_dsg[8];
+    if ( args->calc_hwe_prob )
     {
-        int idx = p[i] - args->lks;
-        double per_site = 0;
-        if ( args->sites[idx] )
+        int ac[2];
+        if ( args->gt_hdr )
         {
-            if ( args->sites[idx] && extreme_lk_per_site )
+            if ( bcf_calc_ac(args->gt_hdr, gt_rec, ac, BCF_UN_INFO|BCF_UN_FMT)!=1 ) error("todo: bcf_calc_ac() failed\n");
+        }
+        else if ( bcf_calc_ac(args->qry_hdr, qry_rec, ac, BCF_UN_INFO|BCF_UN_FMT)!=1 ) error("todo: bcf_calc_ac() failed\n");
+
+        // hwe indexes correspond to the bitmask of eight dsg combinations to account for PL uncertainty
+        // for in the extreme case we can have uninformative PL=0,0,0. So the values are the minima of e.g.
+        //      hwe[1,2,4] ..  dsg=0,1,2
+        //      hwe[3]     ..  dsg=0 or 1
+        //      hwe[6]     ..  dsg=1 or 2
+
+        double hwe[3];
+        const double min_af = 1e-5;             // cap the AF in case we get unrealistic values
+        af = (double)ac[1]/(ac[0]+ac[1]);
+        hwe[0] = af>min_af ? -log(af*af) : -log(min_af*min_af);
+        hwe[1] = af>min_af && af<1-min_af ? -log(2*af*(1-af)) : -log(2*min_af*(1-min_af));
+        hwe[2] = af<(1-min_af) ? -log((1-af)*(1-af)) : -log(min_af*min_af);
+        hwe_dsg[0] = 0;
+        for (i=1; i<8; i++)
+        {
+            hwe_dsg[i] = HUGE_VAL;
+            for (k=0; k<3; k++)
             {
-                per_site = args->lks[idx]/args->sites[idx];
-                per_site *= extreme_lk / extreme_lk_per_site;
+                if ( ((1<<k)&i) && hwe_dsg[i] > hwe[k] ) hwe_dsg[i] = hwe[k];
             }
-            else
-                per_site = 0;
         }
-        fprintf(fp, "CN\t%e\t%e\t%.0f\t%s\t%d\n", fabs(args->lks[idx]), fabs(per_site), args->sites[idx], args->gt_hdr->samples[idx], i);
     }
 
-    if ( args->plot )
+    // The sample pairs were given explicitly via -p/-P options
+    if ( args->pairs )
     {
-        if ( fclose(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
-        plot_check(args, args->target_sample ? args->target_sample : "", args->sm_hdr->samples[query_isample]);
-    }
-}
+        if ( !args->use_PLs )
+        {
+            int ndiff = 0;
+            if ( args->kbs_diff ) diff_sites_reset(args);
 
-// static inline int is_hom_most_likely(int nals, int *pls)
-// {
-//     int ia, ib, idx = 1, min_is_hom = 1, min_pl = pls[0];
-//     for (ia=1; ia<nals; ia++)
-//     {
-//         for (ib=0; ib<ia; ib++)
-//         {
-//             if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 0; }
-//             idx++;
-//         }
-//         if ( pls[idx] < min_pl ) { min_pl = pls[idx]; min_is_hom = 1; }
-//         idx++;
-//     }
-//     return min_is_hom;
-// }
-
-int process_GT(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
-{
-    int ngt = bcf_get_genotypes(args->sm_hdr, line, &args->tmp_arr, &args->ntmp_arr);
+            for (i=0; i<args->npairs; i++)
+            {
+                int32_t *ptr;
+                uint8_t qry_dsg, gt_dsg;
 
-    if ( ngt<=0 ) return 1;                 // GT not present
-    if ( ngt!=args->nsmpl*2 ) return 2;     // not diploid
-    ngt /= args->nsmpl;
-    
-    int i,j, idx = 0;
-    for (i=1; i<args->nsmpl; i++)
-    {
-        int32_t *a = args->tmp_arr + i*ngt;
-        if ( bcf_gt_is_missing(a[0]) || bcf_gt_is_missing(a[1]) || a[1]==bcf_int32_vector_end ) { idx+=i; continue; }
-        int agt = 1<<bcf_gt_allele(a[0]) | 1<<bcf_gt_allele(a[1]);
+                ptr = args->gt_arr + args->pairs[i].igt*ngt1;
+                gt_dsg = gt_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( !gt_dsg ) continue;                        // missing value
+                if ( args->hom_only && !(gt_dsg&5) ) continue;  // not a hom
+
+                ptr = args->qry_arr + args->pairs[i].iqry*nqry1;
+                qry_dsg = qry_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( !qry_dsg ) continue;                       // missing value
+
+                int match = qry_dsg & gt_dsg;
+                if ( !match )
+                {
+                    args->ndiff[i]++;
+                    if ( args->kbs_diff ) { ndiff++; kbs_insert(args->kbs_diff, i); }
+                }
+                else if ( args->calc_hwe_prob ) args->hwe_prob[i] += hwe_dsg[match];
+                args->ncnt[i]++;
+            }
 
-        for (j=0; j<i; j++)
+            if ( ndiff ) diff_sites_push(args, ndiff, qry_rec->rid, qry_rec->pos);
+        }
+        else    // use_PLs set
         {
-            int32_t *b = args->tmp_arr + j*ngt;
-            if ( bcf_gt_is_missing(b[0]) || bcf_gt_is_missing(b[1]) || b[1]==bcf_int32_vector_end ) { idx++; continue; }
-            int bgt = 1<<bcf_gt_allele(b[0]) | 1<<bcf_gt_allele(b[1]);
+            for (i=0; i<args->npairs; i++)
+            {
+                int32_t *ptr;
+                double qry_prob[3], gt_prob[3];
+                uint8_t qry_dsg, gt_dsg;
+
+                ptr = args->gt_arr + args->pairs[i].igt*ngt1;
+                gt_dsg = gt_use_GT ? gt_to_prob(args,ptr,gt_prob) : pl_to_prob(args,ptr,gt_prob);
+                if ( !gt_dsg ) continue;                        // missing value
+                if ( args->hom_only && !(gt_dsg&5) ) continue;  // not a hom
+               
+                ptr = args->qry_arr + args->pairs[i].iqry*nqry1;
+                qry_dsg = qry_use_GT ? gt_to_prob(args,ptr,qry_prob) : pl_to_prob(args,ptr,qry_prob);
+                if ( !qry_dsg ) continue;                       // missing value
 
-            ntot[idx]++;
-            if ( agt!=bgt ) ndif[idx]++;
-            idx++;
+                double min = qry_prob[0] + gt_prob[0];
+                qry_prob[1] += gt_prob[1];
+                if ( min > qry_prob[1] ) min = qry_prob[1];
+                qry_prob[2] += gt_prob[2];
+                if ( min > qry_prob[2] ) min = qry_prob[2];
+                args->pdiff[i] += min;
+
+                if ( args->calc_hwe_prob )
+                {
+                    int match = qry_dsg & gt_dsg;
+                    args->hwe_prob[i] += hwe_dsg[match];
+                }
+                args->ncnt[i]++;
+            }
         }
+        return;
     }
-    return 0;
-}
-int process_PL(args_t *args, bcf1_t *line, uint32_t *ntot, uint32_t *ndif)
-{
-    int npl = bcf_get_format_int32(args->sm_hdr, line, "PL", &args->tmp_arr, &args->ntmp_arr);
 
-    if ( npl<=0 ) return 1;                 // PL not present
-    npl /= args->nsmpl;
-    
-    int i,j,k, idx = 0;
-    for (i=1; i<args->nsmpl; i++)
+    int idx=0;
+    if ( !args->use_PLs )
     {
-        int32_t *a = args->tmp_arr + i*npl;
-        int imin = -1;
-        for (k=0; k<npl; k++)
+        for (i=0; i<args->nqry_smpl; i++)
         {
-            if ( a[k]==bcf_int32_vector_end ) break;
-            if ( a[k]==bcf_int32_missing ) continue;
-            if ( imin==-1 || a[imin] > a[k] ) imin = k;
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int32_t *ptr = args->qry_arr + nqry1*iqry;
+            args->qry_dsg[i] = qry_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
         }
-        if ( imin<0 ) { idx+=i; continue; }
-
-        for (j=0; j<i; j++)
+        if ( !args->cross_check )   // in this case gt_dsg points to qry_dsg
         {
-            int32_t *b = args->tmp_arr + j*npl;
-            int jmin = -1;
-            for (k=0; k<npl; k++)
+            for (i=0; i<args->ngt_smpl; i++)
             {
-                if ( b[k]==bcf_int32_vector_end ) break;
-                if ( b[k]==bcf_int32_missing ) continue;
-                if ( jmin==-1 || b[jmin] > b[k] ) jmin = k;
+                int igt = args->gt_smpl ? args->gt_smpl[i] : i;
+                int32_t *ptr = args->gt_arr + ngt1*igt;
+                args->gt_dsg[i] = gt_use_GT ? gt_to_dsg(ptr) : pl_to_dsg(ptr);
+                if ( args->hom_only && !(args->gt_dsg[i]&5) ) args->gt_dsg[i] = 0;      // not a hom, set to a missing value
+            }
+        }
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int ngt = args->cross_check ? i : args->ngt_smpl;       // two files or a sub-diagonal cross-check mode?
+            if ( !args->qry_dsg[i] ) { idx += ngt; continue; }      // missing value
+            for (j=0; j<ngt; j++)
+            {
+                if ( !args->gt_dsg[j] ) { idx++; continue; }        // missing value
+                int match = args->qry_dsg[i] & args->gt_dsg[j];
+                if ( !match ) args->ndiff[idx]++;
+                else if ( args->calc_hwe_prob ) args->hwe_prob[idx] += hwe_dsg[match];
+                args->ncnt[idx]++;
+                idx++;
             }
-            if ( jmin<0 ) { idx++; continue; }
-
-            ntot[idx]++;
-            if ( imin!=jmin ) ndif[idx]++;
-            idx++;
         }
     }
-    return 0;
-}
+    else    // use_PLs set
+    {
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int32_t *ptr = args->qry_arr + nqry1*iqry;
+            args->qry_dsg[i] = qry_use_GT ? gt_to_prob(args,ptr,args->qry_prob+i*3) : pl_to_prob(args,ptr,args->qry_prob+i*3);
+        }
+        if ( !args->cross_check )   // in this case gt_dsg points to qry_dsg
+        {
+            for (i=0; i<args->ngt_smpl; i++)
+            {
+                int igt = args->gt_smpl ? args->gt_smpl[i] : i;
+                int32_t *ptr = args->gt_arr + ngt1*igt;
+                args->gt_dsg[i] = gt_use_GT ? gt_to_prob(args,ptr,args->gt_prob+i*3) : pl_to_prob(args,ptr,args->gt_prob+i*3);
+                if ( args->hom_only && !(args->gt_dsg[i]&5) ) args->gt_dsg[i] = 0;      // not a hom, set to a missing value
+            }
+        }
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int ngt = args->cross_check ? i : args->ngt_smpl;       // two files or a sub-diagonal cross-check mode?
+            if ( !args->qry_dsg[i] ) { idx += ngt; continue; }      // missing value
+            for (j=0; j<ngt; j++)
+            {
+                if ( !args->gt_dsg[j] ) { idx++; continue; }        // missing value
 
-static void cross_check_gts(args_t *args)
-{
-    // Initialize things: check which tags are defined in the header, sample names etc.
-    if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "PL")<0 )
-    {
-        if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
-            error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
-        if ( !args->no_PLs ) {
-            fprintf(bcftools_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
-            args->no_PLs = 99;
+                double min = args->qry_prob[i*3] + args->gt_prob[j*3];
+                if ( min > args->qry_prob[i*3+1] + args->gt_prob[j*3+1] ) min = args->qry_prob[i*3+1] + args->gt_prob[j*3+1];
+                if ( min > args->qry_prob[i*3+2] + args->gt_prob[j*3+2] ) min = args->qry_prob[i*3+2] + args->gt_prob[j*3+2];
+                args->pdiff[idx] += min;
+
+                if ( args->calc_hwe_prob )
+                {
+                    int match = args->qry_dsg[i] & args->gt_dsg[j];
+                    args->hwe_prob[idx] += hwe_dsg[match];
+                }
+                args->ncnt[idx]++;
+                idx++;
+            }
         }
     }
+}
 
-    args->nsmpl = bcf_hdr_nsamples(args->sm_hdr);
-    args->narr  = (args->nsmpl-1)*args->nsmpl/2;
 
-    uint32_t *ndif = (uint32_t*) calloc(args->narr,4);
-    uint32_t *ntot = (uint32_t*) calloc(args->narr,4);
+typedef struct
+{
+    int ism, idx;
+    double val;
+}
+idbl_t;
+static int cmp_idbl(const void *_a, const void *_b)
+{
+    idbl_t *a = (idbl_t*)_a;
+    idbl_t *b = (idbl_t*)_b;
+    if ( a->val < b->val ) return -1;
+    if ( a->val > b->val ) return 1;
+    return 0;
+}
+static void report_distinctive_sites(args_t *args)
+{
+    extsort_sort(args->es);
+
+    fprintf(args->fp,"# DS, distinctive sites:\n");
+    fprintf(args->fp,"#     - chromosome\n");
+    fprintf(args->fp,"#     - position\n");
+    fprintf(args->fp,"#     - cumulative number of pairs distinguished by this block\n");
+    fprintf(args->fp,"#     - block id\n");
+    fprintf(args->fp,"#DS\t[2]Chromosome\t[3]Position\t[4]Cumulative number of distinct pairs\t[5]Block id\n");
 
-    while ( bcf_sr_next_line(args->files) )
+    kbitset_t *kbs_blk = kbs_init(args->npairs);
+    kbitset_iter_t itr;
+    int i,ndiff,rid,pos,ndiff_tot = 0, iblock = 0;
+    int ndiff_min = args->distinctive_sites <= args->npairs ? args->distinctive_sites : args->npairs;
+    while ( diff_sites_shift(args,&ndiff,&rid,&pos) )
     {
-        bcf1_t *line = bcf_sr_get_line(args->files,0);
-
-        // use PLs unless no_PLs is set and GT exists
-        if ( args->no_PLs )
+        int ndiff_new = 0, ndiff_dbg = 0;
+        kbs_start(&itr);
+        while ( (i=kbs_next(args->kbs_diff, &itr))>=0 )
         {
-            if ( process_GT(args,line,ntot,ndif)==0 ) continue;
+            ndiff_dbg++;
+            if ( kbs_exists(kbs_blk,i) ) continue;   // already set
+            kbs_insert(kbs_blk,i);
+            ndiff_new++;
         }
-        process_PL(args,line,ntot,ndif);
+        if ( ndiff_dbg!=ndiff ) error("Corrupted data, fixme: %d vs %d\n",ndiff_dbg,ndiff);
+        if ( !ndiff_new ) continue;     // no new pair distinguished by this site
+        ndiff_tot += ndiff_new;
+        fprintf(args->fp,"DS\t%s\t%d\t%d\t%d\n",bcf_hdr_id2name(args->qry_hdr,rid),pos+1,ndiff_tot,iblock);
+        if ( ndiff_tot < ndiff_min ) continue;   // fewer than the requested number of pairs can be distinguished at this point
+        iblock++;
+        ndiff_tot = 0;
+        kbs_clear(kbs_blk);
     }
-    
-    FILE *fp = bcftools_stdout;
-    print_header(args, fp);
+    kbs_destroy(kbs_blk);
+}
+static void report(args_t *args)
+{
+    fprintf(args->fp,"INFO\tsites-compared\t%u\n",args->ncmp);
+    fprintf(args->fp,"INFO\tsites-skipped-no-match\t%u\n",args->nskip_no_match);
+    fprintf(args->fp,"INFO\tsites-skipped-multiallelic\t%u\n",args->nskip_not_ba);
+    fprintf(args->fp,"INFO\tsites-skipped-monoallelic\t%u\n",args->nskip_mono);
+    fprintf(args->fp,"INFO\tsites-skipped-no-data\t%u\n",args->nskip_no_data);
+    fprintf(args->fp,"INFO\tsites-skipped-GT-not-diploid\t%u\n",args->nskip_dip_GT);
+    fprintf(args->fp,"INFO\tsites-skipped-PL-not-diploid\t%u\n",args->nskip_dip_PL);
+    fprintf(args->fp,"# DC, discordance:\n");
+    fprintf(args->fp,"#     - query sample\n");
+    fprintf(args->fp,"#     - genotyped sample\n");
+    fprintf(args->fp,"#     - discordance (number of mismatches; smaller is better)\n");
+    fprintf(args->fp,"#     - negative log of HWE probability at matching sites (rare genotypes mataches are more informative, bigger is better)\n");
+    fprintf(args->fp,"#     - number of sites compared (bigger is better)\n");
+    fprintf(args->fp,"#DC\t[2]Query Sample\t[3]Genotyped Sample\t[4]Discordance\t[5]-log P(HWE)\t[6]Number of sites compared\n");
 
-    float *tmp = (float*)malloc(sizeof(float)*args->nsmpl*(args->nsmpl-1)/2);
+    int trim = args->ntop;
+    if ( !args->pairs )
+    {
+        if ( !args->ngt_smpl && args->nqry_smpl <= args->ntop ) trim = 0;
+        if ( args->ngt_smpl && args->ngt_smpl <= args->ntop  ) trim = 0;
+    }
 
-    // Output pairwise distances
-    fprintf(fp, "# ERR, error rate\t[2]Pairwise error rate\t[3]Number of sites compared\t[4]Sample i\t[5]Sample j\n");
-    int i,j, idx = 0;
-    for (i=0; i<args->nsmpl; i++)
+    if ( args->pairs )
     {
-        for (j=0; j<i; j++)
+        int i;
+        for (i=0; i<args->npairs; i++)
         {
-            float err = ntot[idx] ? (float)ndif[idx]/ntot[idx] : 1e-10;
-            fprintf(fp, "ERR\t%f\t%"PRId32"\t%s\t%s\n", err, ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
-            PDIST(tmp,i,j) = err;
-            idx++;
+            int iqry = args->pairs[i].iqry;
+            int igt  = args->pairs[i].igt;
+            if ( args->ndiff )
+            {
+                fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                        args->qry_hdr->samples[iqry],
+                        args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                        args->ndiff[i],
+                        args->calc_hwe_prob ? args->hwe_prob[i] : 0,
+                        args->ncnt[i]);
+            }
+            else
+            {
+                fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                        args->qry_hdr->samples[iqry],
+                        args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                        args->pdiff[i],
+                        args->calc_hwe_prob ? args->hwe_prob[i] : 0,
+                        args->ncnt[i]);
+            }
         }
     }
-
-    // Cluster samples
-    int nlist;
-    float clust_max_err = args->max_intra_err;
-    hclust_t *clust = hclust_init(args->nsmpl,tmp);
-    cluster_t *list = hclust_create_list(clust,args->min_inter_err,&clust_max_err,&nlist);
-    fprintf(fp, "# CLUSTER\t[2]Maximum inter-cluster ERR\t[3-]List of samples\n");
-    for (i=0; i<nlist; i++)
-    {
-        fprintf(fp,"CLUSTER\t%f", list[i].dist);
-        for (j=0; j<list[i].nmemb; j++)
-            fprintf(fp,"\t%s",args->sm_hdr->samples[list[i].memb[j]]);
-        fprintf(fp,"\n");
-    }
-    hclust_destroy_list(list,nlist);
-    // Debugging output: the cluster graph and data used for deciding
-    char **dbg = hclust_explain(clust,&nlist);
-    for (i=0; i<nlist; i++)
-        fprintf(fp,"DBG\t%s\n", dbg[i]);
-    fprintf(fp, "# TH, clustering threshold\t[2]Value\nTH\t%f\n",clust_max_err);
-    fprintf(fp, "# DOT\t[2]Cluster graph, visualize e.g. as \"this-output.txt | grep ^DOT | cut -f2- | dot -Tsvg -o graph.svg\"\n");
-    fprintf(fp, "DOT\t%s\n", hclust_create_dot(clust,args->sm_hdr->samples,clust_max_err));
-    hclust_destroy(clust);
-    free(tmp);
-
-
-    // Deprecated output for temporary backward compatibility
-    fprintf(fp, "# Warning: The CN block is deprecated and will be removed in future releases. Use ERR instead.\n");
-    fprintf(fp, "# [1]CN\t[2]Discordance\t[3]Number of sites\t[4]Average minimum depth\t[5]Sample i\t[6]Sample j\n");
-    idx = 0;
-    for (i=0; i<args->nsmpl; i++)
+    else if ( !trim )
     {
-        for (j=0; j<i; j++)
+        int i,j,idx=0;
+        for (i=0; i<args->nqry_smpl; i++)
         {
-            fprintf(fp, "CN\t%"PRId32"\t%"PRId32"\t0\t%s\t%s\n", ndif[idx], ntot[idx],args->sm_hdr->samples[i],args->sm_hdr->samples[j]);
-            idx++;
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            int ngt  = args->cross_check ? i : args->ngt_smpl;
+            for (j=0; j<ngt; j++)
+            {
+                int igt = args->gt_smpl ? args->gt_smpl[j] : j;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                idx++;
+            }
         }
     }
-
-    free(ndif);
-    free(ntot);
-    free(args->tmp_arr);
+    else if ( !args->cross_check )
+    {
+        idbl_t *arr = (idbl_t*)malloc(sizeof(*arr)*args->ngt_smpl);
+        int i,j;
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            int idx  = i*args->ngt_smpl;
+            for (j=0; j<args->ngt_smpl; j++)
+            {
+                if ( args->sort_by_hwe )
+                    arr[j].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[j].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[j].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[j].ism = j;
+                arr[j].idx = idx;
+                idx++;
+            }
+            qsort(arr, args->ngt_smpl, sizeof(*arr), cmp_idbl);
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            for (j=0; j<args->ntop; j++)
+            {
+                int idx = arr[j].idx;
+                int igt = args->gt_smpl ? args->gt_smpl[arr[j].ism] : arr[j].ism;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->gt_hdr?args->gt_hdr->samples[igt]:args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+            }
+        }
+        free(arr);
+    }
+    else
+    {
+        int narr = args->nqry_smpl-1;
+        idbl_t *arr = (idbl_t*)malloc(sizeof(*arr)*narr);
+        int i,j,k,idx;
+        for (i=0; i<args->nqry_smpl; i++)
+        {
+            k = 0, idx = i*(i-1)/2;
+            for (j=0; j<i; j++)
+            {
+                if ( args->sort_by_hwe )
+                    arr[k].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[k].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[k].ism = j;
+                arr[k].idx = idx;
+                idx++;
+                k++;
+            }
+            for (; j<narr; j++)
+            {
+                idx = j*(j+1)/2 + i;
+                if ( args->sort_by_hwe )
+                    arr[k].val = -args->hwe_prob[idx];
+                else if ( args->ndiff )
+                    arr[k].val = args->ncnt[idx] ? (double)args->ndiff[idx]/args->ncnt[idx] : 0;
+                else
+                    arr[k].val = args->ncnt[idx] ? args->pdiff[idx]/args->ncnt[idx] : 0;
+                arr[k].ism = j + 1;
+                arr[k].idx = idx;
+                k++;
+            }
+            qsort(arr, narr, sizeof(*arr), cmp_idbl);
+            int iqry = args->qry_smpl ? args->qry_smpl[i] : i;
+            for (j=0; j<args->ntop; j++)
+            {
+                if ( i <= arr[j].ism ) continue;
+                int idx = arr[j].idx;
+                int igt = args->qry_smpl ? args->qry_smpl[arr[j].ism] : arr[j].ism;
+                if ( args->ndiff )
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%u\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->qry_hdr->samples[igt],
+                            args->ndiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+                else
+                {
+                    fprintf(args->fp,"DC\t%s\t%s\t%e\t%e\t%u\n",
+                            args->qry_hdr->samples[iqry],
+                            args->qry_hdr->samples[igt],
+                            args->pdiff[idx],
+                            args->calc_hwe_prob ? args->hwe_prob[idx] : 0,
+                            args->ncnt[idx]);
+                }
+            }
+        }
+        free(arr);
+    }
 }
 
-static char *init_prefix(char *prefix)
+static int is_input_okay(args_t *args, int nmatch)
 {
-    int len = strlen(prefix);
-    if ( prefix[len-1] == '/' || prefix[len-1] == '\\' )
-        return msprintf("%sgtcheck", prefix);
-    return strdup(prefix);
+    int i;
+    const char *msg;
+    bcf_hdr_t *hdr;
+    bcf1_t *rec;
+    if ( args->gt_hdr && nmatch!=2 )
+    {
+        if ( args->nskip_no_match++ ) return 0;
+        for (i=0; i<2; i++)
+        {
+            rec = bcf_sr_get_line(args->files,i);
+            if ( rec ) break;
+        }
+        hdr = bcf_sr_get_header(args->files,i);
+        fprintf(bcftools_stderr,"INFO: skipping %s:%"PRIhts_pos", no record with matching POS+ALT. (This is printed only once.)\n",
+                bcf_seqname(hdr,rec),rec->pos+1);
+        return 0;
+    }
+    for (i=0; i<2; i++)
+    {
+        hdr = bcf_sr_get_header(args->files,i);
+        rec = bcf_sr_get_line(args->files,i);
+        if ( rec->n_allele>2 )
+        {
+            if ( args->nskip_not_ba++ ) return 0;
+            msg = "not a biallelic site, run `bcftools norm -m -` first";
+            goto not_okay;
+        }
+        if ( bcf_get_variant_types(rec)==VCF_REF )
+        {
+            if ( args->nskip_mono++ ) return 0;
+            msg = "monoallelic site";
+            goto not_okay;
+        }
+        if ( !args->gt_hdr ) break;
+    }
+    return 1;
+
+not_okay:
+    fprintf(bcftools_stderr,"INFO: skipping %s:%"PRIhts_pos", %s. (This is printed only once.)\n", 
+        bcf_seqname(hdr,rec),rec->pos+1,msg);
+    return 0;
 }
 
 static void usage(void)
@@ -714,30 +1028,62 @@ static void usage(void)
     fprintf(bcftools_stderr, "Usage:   bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "    -a, --all-sites                 output comparison for all sites\n");
-    fprintf(bcftools_stderr, "    -c, --cluster <min,max>         min inter- and max intra-sample error [0.23,-0.3]\n");
-    fprintf(bcftools_stderr, "    -g, --genotypes <file>          genotypes to compare against\n");
-    fprintf(bcftools_stderr, "    -G, --GTs-only <int>            use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
-    fprintf(bcftools_stderr, "    -H, --homs-only                 homozygous genotypes only (useful for low coverage data)\n");
-    fprintf(bcftools_stderr, "    -p, --plot <prefix>             plot\n");
-    fprintf(bcftools_stderr, "    -r, --regions <region>          restrict to comma-separated list of regions\n");
-    fprintf(bcftools_stderr, "    -R, --regions-file <file>       restrict to regions listed in a file\n");
-    fprintf(bcftools_stderr, "    -s, --query-sample <string>     query sample (by default the first sample is checked)\n");
-    fprintf(bcftools_stderr, "    -S, --target-sample <string>    target sample in the -g file (used only for plotting)\n");
-    fprintf(bcftools_stderr, "    -t, --targets <region>          similar to -r but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "    -T, --targets-file <file>       similar to -R but streams rather than index-jumps\n");
+    //fprintf(bcftools_stderr, "    -a, --all-sites                  Output comparison for all sites\n");
+    //fprintf(bcftools_stderr, "    -c, --cluster MIN,MAX            Min inter- and max intra-sample error [0.23,-0.3]\n");
+    fprintf(bcftools_stderr, "        --distinctive-sites            Find sites that can distinguish between at least NUM sample pairs.\n");
+    fprintf(bcftools_stderr, "                  NUM[,MEM[,TMP]]          If the number is smaller or equal to 1, it is interpreted as the fraction of pairs.\n");
+    fprintf(bcftools_stderr, "                                           The optional MEM string sets the maximum memory used for in-memory sorting [500M]\n");
+#ifdef _WIN32
+    fprintf(bcftools_stderr, "                                           and TMP is a prefix of temporary files used by external sorting [/bcftools.XXXXXX]\n");
+#else
+    fprintf(bcftools_stderr, "                                           and TMP is a prefix of temporary files used by external sorting [/tmp/bcftools.XXXXXX]\n");
+#endif
+    fprintf(bcftools_stderr, "        --dry-run                      Stop after first record to estimate required time\n");
+    fprintf(bcftools_stderr, "    -e, --error-probability INT        Phred-scaled probability of genotyping error, 0 for faster but less accurate results [40]\n");
+    fprintf(bcftools_stderr, "    -g, --genotypes FILE               Genotypes to compare against\n");
+    fprintf(bcftools_stderr, "    -H, --homs-only                    Homozygous genotypes only, useful with low coverage data (requires -g)\n");
+    fprintf(bcftools_stderr, "        --n-matches INT                Print only top INT matches for each sample (sorted by average score), 0 for unlimited.\n");
+    fprintf(bcftools_stderr, "                                           Use negative value to sort by HWE probability rather than by discordance [0]\n");
+    fprintf(bcftools_stderr, "        --no-HWE-prob                  Disable calculation of HWE probability\n");
+    fprintf(bcftools_stderr, "    -p, --pairs LIST                   Comma-separated sample pairs to compare (qry,gt[,qry,gt..] with -g or qry,qry[,qry,qry..] w/o)\n");
+    fprintf(bcftools_stderr, "    -P, --pairs-file FILE              File with tab-delimited sample pairs to compare (qry,gt with -g or qry,qry w/o)\n");
+    fprintf(bcftools_stderr, "    -r, --regions REGION               Restrict to comma-separated list of regions\n");
+    fprintf(bcftools_stderr, "    -R, --regions-file FILE            Restrict to regions listed in a file\n");
+    fprintf(bcftools_stderr, "    -s, --samples [qry|gt]:LIST        List of query or -g samples, \"-\" to select all samples (by default all samples are compared)\n");
+    fprintf(bcftools_stderr, "    -S, --samples-file [qry|gt]:FILE   File with the query or -g samples to compare\n");
+    fprintf(bcftools_stderr, "    -t, --targets REGION               Similar to -r but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "    -T, --targets-file FILE            Similar to -R but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "    -u, --use TAG1[,TAG2]              Which tag to use in the query file (TAG1) and the -g file (TAG2) [PL,GT]\n");
+    fprintf(bcftools_stderr, "Examples:\n");
+    fprintf(bcftools_stderr, "   # Check discordance of all samples from B against all sample in A\n");
+    fprintf(bcftools_stderr, "   bcftools gtcheck -g A.bcf B.bcf\n");
+    fprintf(bcftools_stderr, "\n");
+    fprintf(bcftools_stderr, "   # Limit comparisons to the fiven list of samples\n");
+    fprintf(bcftools_stderr, "   bcftools gtcheck -s gt:a1,a2,a3 -s qry:b1,b2 -g A.bcf B.bcf\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    fprintf(bcftools_stderr, "   # Compare only two pairs a1,b1 and a1,b2\n");
+    fprintf(bcftools_stderr, "   bcftools gtcheck -p a1,b1,a1,b2 -g A.bcf B.bcf\n");
+    fprintf(bcftools_stderr, "\n");
+    bcftools_exit(1);
 }
 
 int main_vcfgtcheck(int argc, char *argv[])
 {
     int c;
     args_t *args = (args_t*) calloc(1,sizeof(args_t));
-    args->files  = bcf_sr_init();
     args->argc   = argc; args->argv = argv; set_cwd(args);
-    char *regions = NULL, *targets = NULL;
-    int regions_is_file = 0, targets_is_file = 0;
+    args->qry_use_GT = -1;
+    args->gt_use_GT  = -1;
+    args->calc_hwe_prob = 1;
+    args->use_PLs = 40;
+
+    // external sort for --distinctive-sites
+#ifdef _WIN32
+    args->es_tmp_prefix = NULL;
+#else
+    args->es_tmp_prefix = "/tmp/bcftools-gtcheck";
+#endif
+    args->es_max_mem = strdup("500M");
 
     // In simulated sample swaps the minimum error was 0.3 and maximum intra-sample error was 0.23
     //    - min_inter: pairs with smaller err value will be considered identical 
@@ -748,6 +1094,8 @@ int main_vcfgtcheck(int argc, char *argv[])
 
     static struct option loptions[] =
     {
+        {"error-probability",1,0,'e'},
+        {"use",1,0,'u'},
         {"cluster",1,0,'c'},
         {"GTs-only",1,0,'G'},
         {"all-sites",0,0,'a'},
@@ -755,18 +1103,74 @@ int main_vcfgtcheck(int argc, char *argv[])
         {"help",0,0,'h'},
         {"genotypes",1,0,'g'},
         {"plot",1,0,'p'},
-        {"target-sample",1,0,'S'},
-        {"query-sample",1,0,'s'},
+        {"samples",1,0,'s'},
+        {"samples-file",1,0,'S'},
+        {"n-matches",1,0,2},
+        {"no-HWE-prob",0,0,3},
+        {"target-sample",1,0,4},
+        {"dry-run",0,0,5},
+        {"distinctive-sites",1,0,6},
         {"regions",1,0,'r'},
         {"regions-file",1,0,'R'},
         {"targets",1,0,'t'},
         {"targets-file",1,0,'T'},
+        {"pairs",1,0,'p'},
+        {"pairs-file",1,0,'P'},
         {0,0,0,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:c:",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hg:p:s:S:p:P:Hr:R:at:T:G:c:u:e:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'e':
+                args->use_PLs = strtol(optarg,&tmp,10);
+                if ( !tmp || *tmp ) error("Could not parse: --error-probability %s\n", optarg);
+                break;
+            case 'u':
+                {
+                    int i,nlist;
+                    char **list = hts_readlist(optarg, 0, &nlist);
+                    if ( !list || nlist<=0 || nlist>2 ) error("Failed to parse --use %s\n", optarg);
+                    if ( !strcasecmp("GT",list[0]) ) args->qry_use_GT = 1;
+                    else if ( !strcasecmp("PL",list[0]) ) args->qry_use_GT = 0;
+                    else error("Failed to parse --use %s; only GT and PL are supported\n", optarg);
+                    if ( nlist==2 )
+                    {
+                        if ( !strcasecmp("GT",list[1]) ) args->gt_use_GT = 1;
+                        else if ( !strcasecmp("PL",list[1]) ) args->gt_use_GT = 0;
+                        else error("Failed to parse --use %s; only GT and PL are supported\n", optarg);
+                    }
+                    else args->gt_use_GT = args->qry_use_GT;
+                    for (i=0; i<nlist; i++) free(list[i]);
+                    free(list);
+                }
+                break;
+            case 2 :
+                args->ntop = strtol(optarg,&tmp,10);
+                if ( !tmp || *tmp ) error("Could not parse: --n-matches %s\n", optarg);
+                if ( args->ntop < 0 )
+                {
+                    args->sort_by_hwe = 1;
+                    args->ntop *= -1;
+                }
+                break;
+            case 3 : args->calc_hwe_prob = 0; break;
+            case 4 : error("The option -S, --target-sample has been deprecated\n"); break;
+            case 5 : args->dry_run = 1; break;
+            case 6 : 
+                args->distinctive_sites = strtod(optarg,&tmp);
+                if ( *tmp )
+                {
+                    if ( *tmp!=',' ) error("Could not parse: --distinctive-sites %s\n", optarg);
+                    tmp++;
+                    free(args->es_max_mem);
+                    args->es_max_mem = strdup(tmp);
+                    while ( *tmp && *tmp!=',' ) tmp++;
+                    if ( *tmp ) { *tmp = 0; args->es_tmp_prefix = tmp+1; }
+                }
+                args->use_PLs = 0;
+                break;
             case 'c':
+                error("The -c option is to be implemented, please open an issue on github\n");
                 args->min_inter_err = strtod(optarg,&tmp);
                 if ( *tmp )
                 {
@@ -775,50 +1179,77 @@ int main_vcfgtcheck(int argc, char *argv[])
                     if ( *tmp ) error("Could not parse: -c %s\n", optarg);
                 }
                 break;
-            case 'G':
-                args->no_PLs = strtol(optarg,&tmp,10);
-                if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
-                break;
-            case 'a': args->all_sites = 1; break;
+            case 'G': error("The option -G, --GTs-only has been deprecated\n"); break;
+            case 'a': args->all_sites = 1; error("The -a option is to be implemented, please open an issue on github\n"); break;
             case 'H': args->hom_only = 1; break;
             case 'g': args->gt_fname = optarg; break;
-            case 'p': args->plot = optarg; break;
-            case 'S': args->target_sample = optarg; break;
-            case 's': args->query_sample = optarg; break;
-            case 'r': regions = optarg; break;
-            case 'R': regions = optarg; regions_is_file = 1; break;
-            case 't': targets = optarg; break;
-            case 'T': targets = optarg; targets_is_file = 1; break;
+//            case 'p': args->plot = optarg; break;
+            case 's':
+                if ( !strncasecmp("gt:",optarg,3) ) args->gt_samples = optarg+3;
+                else if ( !strncasecmp("qry:",optarg,4) ) args->qry_samples = optarg+4;
+                else error("Which one? Query samples (qry:%s) or genotype samples (gt:%s)?\n",optarg,optarg);
+                break;
+            case 'S': 
+                if ( !strncasecmp("gt:",optarg,3) ) args->gt_samples = optarg+3, args->gt_samples_is_file = 1;
+                else if ( !strncasecmp("qry:",optarg,4) ) args->qry_samples = optarg+4, args->qry_samples_is_file = 1;
+                else error("Which one? Query samples (qry:%s) or genotype samples (gt:%s)?\n",optarg,optarg);
+                break;
+            case 'p': args->pair_samples = optarg; break;
+            case 'P': args->pair_samples = optarg; args->pair_samples_is_file = 1; break;
+            case 'r': args->regions = optarg; break;
+            case 'R': args->regions = optarg; args->regions_is_file = 1; break;
+            case 't': args->targets = optarg; break;
+            case 'T': args->targets = optarg; args->targets_is_file = 1; break;
             case 'h':
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
         }
     }
-    char *fname = NULL;
     if ( optind==argc )
     {
-        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
+        if ( !isatty(fileno((FILE *)stdin)) ) args->qry_fname = "-";  // reading from stdin
         else usage();   // no files given
     }
-    else fname = argv[optind];
-    if ( argc>optind+1 )  usage();  // too many files given
-    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
-    else args->files->require_index = 1;
-    if ( regions && bcf_sr_set_regions(args->files, regions, regions_is_file)<0 ) error("Failed to read the regions: %s\n", regions);
-    if ( targets && bcf_sr_set_targets(args->files, targets, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", targets);
-    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
-    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) )
-        error("Failed to read from %s: %s\n", !strcmp("-",args->gt_fname)?"standard input":args->gt_fname,bcf_sr_strerror(args->files->errnum));
-    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
-    if ( args->plot ) args->plot = init_prefix(args->plot);
+    else args->qry_fname = argv[optind];
+    if ( argc>optind+1 ) error("Error: too many files given, run with -h for help\n");  // too many files given
+    if ( args->pair_samples )
+    {
+        if ( args->gt_samples || args->qry_samples ) error("The -p/-P option cannot be combined with -s/-S\n");
+        if ( args->ntop ) error("The --n-matches option cannot be combined with -p/-P\n");
+    }
+    if ( args->distinctive_sites && !args->pair_samples ) error("The experimental option --distinctive-sites requires -p/-P\n");
+    if ( args->hom_only && !args->gt_fname ) error("The option --homs-only requires --genotypes\n");
+    if ( args->distinctive_sites && args->use_PLs ) error("The option --distinctive-sites cannot be combined with --error-probability\n");
+
     init_data(args);
-    if ( args->cross_check )
-        cross_check_gts(args);
-    else
-        check_gt(args);
+
+    int ret;
+    while ( (ret=bcf_sr_next_line(args->files)) )
+    {
+        if ( !is_input_okay(args,ret) ) continue;
+
+        // time one record to give the user an estimate with very big files
+        struct timeval t0, t1;
+        if ( !args->ncmp )  gettimeofday(&t0, NULL);
+
+        process_line(args);
+
+        if ( args->ncmp==1 )
+        {
+            gettimeofday(&t1, NULL);
+            double delta = (t1.tv_sec - t0.tv_sec) * 1e6 + (t1.tv_usec - t0.tv_usec);
+            fprintf(bcftools_stderr,"INFO:\tTime required to process one record .. %f seconds\n",delta/1e6);
+            fprintf(args->fp,"INFO\tTime required to process one record .. %f seconds\n",delta/1e6);
+            if ( args->dry_run ) break;
+        }
+    }
+    if ( !args->dry_run )
+    {
+        report(args);
+        if ( args->distinctive_sites ) report_distinctive_sites(args);
+    }
+
     destroy_data(args);
-    bcf_sr_destroy(args->files);
-    if (args->plot) free(args->plot);
     free(args);
     return 0;
 }
diff --git a/bcftools/vcfindex.c b/bcftools/vcfindex.c
index 9f7de23..4a16d8a 100644
--- a/bcftools/vcfindex.c
+++ b/bcftools/vcfindex.c
@@ -1,6 +1,6 @@
 /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
 
-    Copyright (C) 2014-2016 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -24,6 +24,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <htslib/vcf.h>
@@ -37,6 +38,11 @@ DEALINGS IN THE SOFTWARE.  */
 
 #define BCF_LIDX_SHIFT    14
 
+enum {
+    per_contig = 1,
+    total = 2
+};
+
 static void usage(void)
 {
     fprintf(stderr, "\n");
@@ -47,7 +53,7 @@ static void usage(void)
     fprintf(stderr, "    -c, --csi                generate CSI-format index for VCF/BCF files [default]\n");
     fprintf(stderr, "    -f, --force              overwrite index if it already exists\n");
     fprintf(stderr, "    -m, --min-shift INT      set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(stderr, "    -o, --output-file FILE   optional output index file name\n");
+    fprintf(stderr, "    -o, --output FILE        optional output index file name\n");
     fprintf(stderr, "    -t, --tbi                generate TBI-format index for VCF files\n");
     fprintf(stderr, "        --threads INT        use multithreading with INT worker threads [0]\n");
     fprintf(stderr, "\n");
@@ -60,65 +66,137 @@ static void usage(void)
 
 int vcf_index_stats(char *fname, int stats)
 {
-    const char **seq;
-    int i, nseq;
+    const char **seq = NULL;
+    int tid, nseq = 0, ret = 0;
     tbx_t *tbx = NULL;
+    bcf_hdr_t *hdr = NULL;
     hts_idx_t *idx = NULL;
+    htsFile *fp = NULL;
+    uint64_t sum = 0;
+    char *fntemp = NULL, *fnidx = NULL;
 
-    htsFile *fp = hts_open(fname,"r");
-    if ( !fp ) { fprintf(stderr,"Could not read %s\n", fname); return 1; }
-    bcf_hdr_t *hdr = bcf_hdr_read(fp);
-    if ( !hdr ) { fprintf(stderr,"Could not read the header: %s\n", fname); return 1; }
-
-    if ( hts_get_format(fp)->format==vcf )
+    /*
+     * First, has the user provided an index file? If per contig stats
+     * are requested, open the variant file (together with the index file,
+     * if provided), since the contig names can only be retrieved from its
+     * header. Otherwise, use just the corresponding index file to count
+     * the total number of records.
+     */
+    int len = strlen(fname);
+    if ( (fnidx = strstr(fname, HTS_IDX_DELIM)) != NULL ) {
+        fntemp = strdup(fname);
+        if ( !fntemp ) return 1;
+        fntemp[fnidx-fname] = 0;
+        fname = fntemp;
+        fnidx += strlen(HTS_IDX_DELIM);
+    }
+    else if ( len>4 && (!strcasecmp(".csi",fname+len-4) || !strcasecmp(".tbi",fname+len-4)) )
     {
-        tbx = tbx_index_load(fname);
-        if ( !tbx ) { fprintf(stderr,"Could not load index for VCF: %s\n", fname); return 1; }
+        fnidx  = fname;
+        fntemp = strdup(fname);
+        fname  = fntemp;
+        fname[len-4] = 0;
     }
-    else if ( hts_get_format(fp)->format==bcf )
+
+    if ( stats&per_contig )
     {
-        idx = bcf_index_load(fname);
-        if ( !idx ) { fprintf(stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
+        fp = hts_open(fname,"r");
+        if ( !fp ) {
+            fprintf(stderr,"Could not read %s\n", fname);
+            ret = 1; goto cleanup;
+        }
+        hdr = bcf_hdr_read(fp);
+        if ( !hdr ) {
+            fprintf(stderr,"Could not read the header: %s\n", fname);
+            ret = 1; goto cleanup;
+        }
+
+        if ( hts_get_format(fp)->format==vcf )
+        {
+            tbx = tbx_index_load2(fname, fnidx);
+            if ( !tbx ) { fprintf(stderr,"Could not load index for VCF: %s\n", fname); return 1; }
+        }
+        else if ( hts_get_format(fp)->format==bcf )
+        {
+            idx = bcf_index_load2(fname, fnidx);
+            if ( !idx ) { fprintf(stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
+        }
+        else
+        {
+            fprintf(stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
+            return 1;
+        }
     }
-    else
+    else if ( fnidx )
     {
-        fprintf(stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
-        return 1;
+        char *ext = strrchr(fnidx, '.');
+        if ( ext && strcmp(ext, ".tbi") == 0 ) {
+            tbx = tbx_index_load2(fname, fnidx);
+        } else if ( ext && strcmp(ext, ".csi") == 0 ) {
+            idx = bcf_index_load2(fname, fnidx);
+        }
+        if ( !tbx && !idx ) {
+            fprintf(stderr,"Could not load index file '%s'\n", fnidx);
+            ret = 1; goto cleanup;
+        }
+    } else {
+        char *ext = strrchr(fname, '.');
+        if ( ext && strcmp(ext, ".bcf") == 0 ) {
+            idx = bcf_index_load(fname);
+        } else if ( ext && (ext-fname) > 4 && strcmp(ext-4, ".vcf.gz") == 0 ) {
+            tbx = tbx_index_load(fname);
+        }
     }
 
-    seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq);
-    uint64_t sum = 0;
-    for (i=0; i<nseq; i++)
+    if ( !tbx && !idx ) {
+        fprintf(stderr,"No index file could be found for '%s'. Use 'bcftools index' to create one\n", fname);
+        ret = 1; goto cleanup;
+    }
+
+    if ( tbx ) {
+        seq = tbx_seqnames(tbx, &nseq);
+    } else {
+        nseq = hts_idx_nseq(idx);
+    }
+
+    for (tid=0; tid<nseq; tid++)
     {
         uint64_t records, v;
-        hts_idx_get_stat(tbx ? tbx->idx : idx, i, &records, &v);
-        sum+=records;
-        if (stats&2 || !records) continue;
-        bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL);
-        int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
-        printf("%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
-    }
-    if (!sum)
+        hts_idx_get_stat(tbx ? tbx->idx : idx, tid, &records, &v);
+        sum += records;
+        if ( (stats&total) || !records ) continue;
+        const char *ctg_name = tbx ? seq[tid] : hdr ? bcf_hdr_id2name(hdr, tid) : NULL;
+        if ( ctg_name ) {
+            bcf_hrec_t *hrec = hdr ? bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", ctg_name, NULL) : NULL;
+            int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
+            printf("%s\t%s\t%" PRIu64 "\n", ctg_name, hkey<0?".":hrec->vals[hkey], records);
+        }
+    }
+    if ( !sum )
     {
         // No counts found.
         // Is this because index version has no stored count data, or no records?
         bcf1_t *rec = bcf_init1();
-        if (bcf_read1(fp, hdr, rec) >= 0)
-        {
+        if (fp && hdr && rec && bcf_read1(fp, hdr, rec) >= 0) {
             fprintf(stderr,"index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", fname);
-            return 1;
+            ret = 1;
         }
         bcf_destroy1(rec);
     }
-    if (stats&2) printf("%" PRIu64 "\n", sum);
+    if ( (stats&total) && !ret ) {
+        printf("%" PRIu64 "\n", sum);
+    }
+
+cleanup:
     free(seq);
-    if ( hts_close(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
+    free(fntemp);
+    if ( fp && hts_close(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
     bcf_hdr_destroy(hdr);
     if (tbx)
         tbx_destroy(tbx);
     if (idx)
         hts_idx_destroy(idx);
-    return 0;
+    return ret;
 }
 
 int main_vcfindex(int argc, char *argv[])
@@ -137,6 +215,7 @@ int main_vcfindex(int argc, char *argv[])
         {"nrecords",no_argument,NULL,'n'},
         {"threads",required_argument,NULL,9},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {NULL, 0, NULL, 0}
     };
 
@@ -152,8 +231,8 @@ int main_vcfindex(int argc, char *argv[])
                 min_shift = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg);
                 break;
-            case 's': stats |= 1; break;
-            case 'n': stats |= 2; break;
+            case 's': stats |= per_contig; break;
+            case 'n': stats |= total; break;
             case 9:
                 n_threads = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --threads %s\n", optarg);
@@ -162,7 +241,7 @@ int main_vcfindex(int argc, char *argv[])
             default: usage();
         }
     }
-    if (stats>2)
+    if (stats > total)
     {
         fprintf(stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
         return 1;
diff --git a/bcftools/vcfindex.c.pysam.c b/bcftools/vcfindex.c.pysam.c
index 0b7aeeb..acbae89 100644
--- a/bcftools/vcfindex.c.pysam.c
+++ b/bcftools/vcfindex.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
 
-    Copyright (C) 2014-2016 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -26,6 +26,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <htslib/vcf.h>
@@ -39,6 +40,11 @@ DEALINGS IN THE SOFTWARE.  */
 
 #define BCF_LIDX_SHIFT    14
 
+enum {
+    per_contig = 1,
+    total = 2
+};
+
 static void usage(void)
 {
     fprintf(bcftools_stderr, "\n");
@@ -49,7 +55,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -c, --csi                generate CSI-format index for VCF/BCF files [default]\n");
     fprintf(bcftools_stderr, "    -f, --force              overwrite index if it already exists\n");
     fprintf(bcftools_stderr, "    -m, --min-shift INT      set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(bcftools_stderr, "    -o, --output-file FILE   optional output index file name\n");
+    fprintf(bcftools_stderr, "    -o, --output FILE        optional output index file name\n");
     fprintf(bcftools_stderr, "    -t, --tbi                generate TBI-format index for VCF files\n");
     fprintf(bcftools_stderr, "        --threads INT        use multithreading with INT worker threads [0]\n");
     fprintf(bcftools_stderr, "\n");
@@ -57,70 +63,142 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -n, --nrecords       print number of records based on existing index file\n");
     fprintf(bcftools_stderr, "    -s, --stats          print per contig stats based on existing index file\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int vcf_index_stats(char *fname, int stats)
 {
-    const char **seq;
-    int i, nseq;
+    const char **seq = NULL;
+    int tid, nseq = 0, ret = 0;
     tbx_t *tbx = NULL;
+    bcf_hdr_t *hdr = NULL;
     hts_idx_t *idx = NULL;
+    htsFile *fp = NULL;
+    uint64_t sum = 0;
+    char *fntemp = NULL, *fnidx = NULL;
 
-    htsFile *fp = hts_open(fname,"r");
-    if ( !fp ) { fprintf(bcftools_stderr,"Could not read %s\n", fname); return 1; }
-    bcf_hdr_t *hdr = bcf_hdr_read(fp);
-    if ( !hdr ) { fprintf(bcftools_stderr,"Could not read the header: %s\n", fname); return 1; }
-
-    if ( hts_get_format(fp)->format==vcf )
+    /*
+     * First, has the user provided an index file? If per contig stats
+     * are requested, open the variant file (together with the index file,
+     * if provided), since the contig names can only be retrieved from its
+     * header. Otherwise, use just the corresponding index file to count
+     * the total number of records.
+     */
+    int len = strlen(fname);
+    if ( (fnidx = strstr(fname, HTS_IDX_DELIM)) != NULL ) {
+        fntemp = strdup(fname);
+        if ( !fntemp ) return 1;
+        fntemp[fnidx-fname] = 0;
+        fname = fntemp;
+        fnidx += strlen(HTS_IDX_DELIM);
+    }
+    else if ( len>4 && (!strcasecmp(".csi",fname+len-4) || !strcasecmp(".tbi",fname+len-4)) )
     {
-        tbx = tbx_index_load(fname);
-        if ( !tbx ) { fprintf(bcftools_stderr,"Could not load index for VCF: %s\n", fname); return 1; }
+        fnidx  = fname;
+        fntemp = strdup(fname);
+        fname  = fntemp;
+        fname[len-4] = 0;
     }
-    else if ( hts_get_format(fp)->format==bcf )
+
+    if ( stats&per_contig )
     {
-        idx = bcf_index_load(fname);
-        if ( !idx ) { fprintf(bcftools_stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
+        fp = hts_open(fname,"r");
+        if ( !fp ) {
+            fprintf(bcftools_stderr,"Could not read %s\n", fname);
+            ret = 1; goto cleanup;
+        }
+        hdr = bcf_hdr_read(fp);
+        if ( !hdr ) {
+            fprintf(bcftools_stderr,"Could not read the header: %s\n", fname);
+            ret = 1; goto cleanup;
+        }
+
+        if ( hts_get_format(fp)->format==vcf )
+        {
+            tbx = tbx_index_load2(fname, fnidx);
+            if ( !tbx ) { fprintf(bcftools_stderr,"Could not load index for VCF: %s\n", fname); return 1; }
+        }
+        else if ( hts_get_format(fp)->format==bcf )
+        {
+            idx = bcf_index_load2(fname, fnidx);
+            if ( !idx ) { fprintf(bcftools_stderr,"Could not load index for BCF file: %s\n", fname); return 1; }
+        }
+        else
+        {
+            fprintf(bcftools_stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
+            return 1;
+        }
     }
-    else
+    else if ( fnidx )
     {
-        fprintf(bcftools_stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
-        return 1;
+        char *ext = strrchr(fnidx, '.');
+        if ( ext && strcmp(ext, ".tbi") == 0 ) {
+            tbx = tbx_index_load2(fname, fnidx);
+        } else if ( ext && strcmp(ext, ".csi") == 0 ) {
+            idx = bcf_index_load2(fname, fnidx);
+        }
+        if ( !tbx && !idx ) {
+            fprintf(bcftools_stderr,"Could not load index file '%s'\n", fnidx);
+            ret = 1; goto cleanup;
+        }
+    } else {
+        char *ext = strrchr(fname, '.');
+        if ( ext && strcmp(ext, ".bcf") == 0 ) {
+            idx = bcf_index_load(fname);
+        } else if ( ext && (ext-fname) > 4 && strcmp(ext-4, ".vcf.gz") == 0 ) {
+            tbx = tbx_index_load(fname);
+        }
     }
 
-    seq = tbx ? tbx_seqnames(tbx, &nseq) : bcf_index_seqnames(idx, hdr, &nseq);
-    uint64_t sum = 0;
-    for (i=0; i<nseq; i++)
+    if ( !tbx && !idx ) {
+        fprintf(bcftools_stderr,"No index file could be found for '%s'. Use 'bcftools index' to create one\n", fname);
+        ret = 1; goto cleanup;
+    }
+
+    if ( tbx ) {
+        seq = tbx_seqnames(tbx, &nseq);
+    } else {
+        nseq = hts_idx_nseq(idx);
+    }
+
+    for (tid=0; tid<nseq; tid++)
     {
         uint64_t records, v;
-        hts_idx_get_stat(tbx ? tbx->idx : idx, i, &records, &v);
-        sum+=records;
-        if (stats&2 || !records) continue;
-        bcf_hrec_t *hrec = bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", seq[i], NULL);
-        int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
-        fprintf(bcftools_stdout, "%s\t%s\t%" PRIu64 "\n", seq[i], hkey<0?".":hrec->vals[hkey], records);
-    }
-    if (!sum)
+        hts_idx_get_stat(tbx ? tbx->idx : idx, tid, &records, &v);
+        sum += records;
+        if ( (stats&total) || !records ) continue;
+        const char *ctg_name = tbx ? seq[tid] : hdr ? bcf_hdr_id2name(hdr, tid) : NULL;
+        if ( ctg_name ) {
+            bcf_hrec_t *hrec = hdr ? bcf_hdr_get_hrec(hdr, BCF_HL_CTG, "ID", ctg_name, NULL) : NULL;
+            int hkey = hrec ? bcf_hrec_find_key(hrec, "length") : -1;
+            fprintf(bcftools_stdout, "%s\t%s\t%" PRIu64 "\n", ctg_name, hkey<0?".":hrec->vals[hkey], records);
+        }
+    }
+    if ( !sum )
     {
         // No counts found.
         // Is this because index version has no stored count data, or no records?
         bcf1_t *rec = bcf_init1();
-        if (bcf_read1(fp, hdr, rec) >= 0)
-        {
+        if (fp && hdr && rec && bcf_read1(fp, hdr, rec) >= 0) {
             fprintf(bcftools_stderr,"index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", fname);
-            return 1;
+            ret = 1;
         }
         bcf_destroy1(rec);
     }
-    if (stats&2) fprintf(bcftools_stdout, "%" PRIu64 "\n", sum);
+    if ( (stats&total) && !ret ) {
+        fprintf(bcftools_stdout, "%" PRIu64 "\n", sum);
+    }
+
+cleanup:
     free(seq);
-    if ( hts_close(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
+    free(fntemp);
+    if ( fp && hts_close(fp)!=0 ) error("[%s] Error: close failed\n", __func__);
     bcf_hdr_destroy(hdr);
     if (tbx)
         tbx_destroy(tbx);
     if (idx)
         hts_idx_destroy(idx);
-    return 0;
+    return ret;
 }
 
 int main_vcfindex(int argc, char *argv[])
@@ -139,6 +217,7 @@ int main_vcfindex(int argc, char *argv[])
         {"nrecords",no_argument,NULL,'n'},
         {"threads",required_argument,NULL,9},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {NULL, 0, NULL, 0}
     };
 
@@ -154,8 +233,8 @@ int main_vcfindex(int argc, char *argv[])
                 min_shift = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg);
                 break;
-            case 's': stats |= 1; break;
-            case 'n': stats |= 2; break;
+            case 's': stats |= per_contig; break;
+            case 'n': stats |= total; break;
             case 9:
                 n_threads = strtol(optarg,&tmp,10);
                 if ( *tmp ) error("Could not parse argument: --threads %s\n", optarg);
@@ -164,7 +243,7 @@ int main_vcfindex(int argc, char *argv[])
             default: usage();
         }
     }
-    if (stats>2)
+    if (stats > total)
     {
         fprintf(bcftools_stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
         return 1;
diff --git a/bcftools/vcfisec.c b/bcftools/vcfisec.c
index 261841c..1d2fab1 100644
--- a/bcftools/vcfisec.c
+++ b/bcftools/vcfisec.c
@@ -1,6 +1,6 @@
 /*  vcfisec.c -- Create intersections, unions and complements of VCF files.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -141,7 +141,7 @@ void isec_vcf(args_t *args)
     if ( args->targets_list && files->nreaders==1 ) out_std = 1;
     if ( out_std )
     {
-        out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
         if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
         if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
@@ -356,7 +356,7 @@ static void init_data(args_t *args)
 
             #define OPEN_FILE(i,j) { \
                 open_file(&args->fnames[i], NULL, "%s/%04d.%s", args->prefix, i, suffix); \
-                args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type));  \
+                args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode2(args->output_type,args->fnames[i]));  \
                 if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
                 if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
                 if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
@@ -397,10 +397,9 @@ static void init_data(args_t *args)
                 fprintf(args->fh_log,"%s\tfor stripped\t%s\n", args->fnames[i], args->files->readers[i].fname);
             }
             #undef OPEN_FILE
-
-            args->fh_sites = open_file(NULL, "w", "%s/sites.txt", args->prefix);
-            if ( !args->fh_sites ) error("%s/sites.txt: %s\n", args->prefix, strerror(errno));
         }
+        args->fh_sites = open_file(NULL, "w", "%s/sites.txt", args->prefix);
+        if ( !args->fh_sites ) error("%s/sites.txt: %s\n", args->prefix, strerror(errno));
     }
     else {
         if (args->output_fname) {
diff --git a/bcftools/vcfisec.c.pysam.c b/bcftools/vcfisec.c.pysam.c
index 2ef8853..d59d7df 100644
--- a/bcftools/vcfisec.c.pysam.c
+++ b/bcftools/vcfisec.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfisec.c -- Create intersections, unions and complements of VCF files.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -143,7 +143,7 @@ void isec_vcf(args_t *args)
     if ( args->targets_list && files->nreaders==1 ) out_std = 1;
     if ( out_std )
     {
-        out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
+        out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
         if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
         if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
@@ -358,7 +358,7 @@ static void init_data(args_t *args)
 
             #define OPEN_FILE(i,j) { \
                 open_file(&args->fnames[i], NULL, "%s/%04d.%s", args->prefix, i, suffix); \
-                args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type));  \
+                args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode2(args->output_type,args->fnames[i]));  \
                 if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
                 if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
                 if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
@@ -399,10 +399,9 @@ static void init_data(args_t *args)
                 fprintf(args->fh_log,"%s\tfor stripped\t%s\n", args->fnames[i], args->files->readers[i].fname);
             }
             #undef OPEN_FILE
-
-            args->fh_sites = open_file(NULL, "w", "%s/sites.txt", args->prefix);
-            if ( !args->fh_sites ) error("%s/sites.txt: %s\n", args->prefix, strerror(errno));
         }
+        args->fh_sites = open_file(NULL, "w", "%s/sites.txt", args->prefix);
+        if ( !args->fh_sites ) error("%s/sites.txt: %s\n", args->prefix, strerror(errno));
     }
     else {
         if (args->output_fname) {
@@ -494,7 +493,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "   # Extract records private to A or B comparing by position only\n");
     fprintf(bcftools_stderr, "   bcftools isec A.vcf.gz B.vcf.gz -p dir -n -1 -c all\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfisec(int argc, char *argv[])
diff --git a/bcftools/vcfmerge.c b/bcftools/vcfmerge.c
index 42c2bd3..637e1b9 100644
--- a/bcftools/vcfmerge.c
+++ b/bcftools/vcfmerge.c
@@ -1,6 +1,6 @@
 /*  vcfmerge.c -- Merge multiple VCF/BCF files to create one multi-sample file.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -58,6 +59,8 @@ typedef khash_t(strdict) strdict_t;
 
 #define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
 
+#define PL2PROB_MAX 1024
+
 // For merging INFO Number=A,G,R tags
 typedef struct
 {
@@ -132,6 +135,11 @@ typedef struct
     gvcf_aux_t *gvcf;   // buffer of gVCF lines, for each reader one line
     int nout_smpl;
     kstring_t *str;
+    int32_t *laa;           // localized alternate alleles given as input-based indexes in per-sample blocks of (args->local_alleles+1) values, 0 is always first
+    int nlaa, laa_dirty;    // number of LAA alleles actually used at this site, and was any L* added?
+    int32_t *tmpi, *k2k;
+    double *tmpd, *pl2prob; // mapping from phred-score likelihoods (PL) to probability
+    int ntmpi, ntmpd, nk2k;
 }
 maux_t;
 
@@ -141,7 +149,7 @@ typedef struct
     maux_t *maux;
     regidx_t *regs;    // apply regions only after the blocks are expanded
     regitr_t *regs_itr;
-    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref;
+    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index;
     char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
     faidx_t *gvcf_fai;
     info_rule_t *rules;
@@ -154,6 +162,7 @@ typedef struct
     bcf_hdr_t *out_hdr;
     char **argv;
     int argc, n_threads, record_cmd_line;
+    int local_alleles;    // the value of -L option
 }
 args_t;
 
@@ -262,7 +271,28 @@ static void info_rules_merge_join(bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rul
         bcf_update_info_string(hdr,line,rule->hdr_tag,rule->vals);
     }
     else
+    {
+        int isrc, idst = 0;
+        #define BRANCH(type_t,is_missing,is_vector_end) { \
+            type_t *ptr = (type_t*) rule->vals; \
+            for (isrc=0; isrc<rule->nvals; isrc++) \
+            { \
+                if ( is_vector_end ) break; \
+                if ( is_missing ) continue; \
+                if ( idst!=isrc ) ptr[idst] = ptr[isrc]; \
+                idst++; \
+            } \
+        }
+        switch (rule->type) {
+            case BCF_HT_INT:  BRANCH(int32_t, ptr[isrc]==bcf_int32_missing, ptr[isrc]==bcf_int32_vector_end); break;
+            case BCF_HT_REAL: BRANCH(float, bcf_float_is_missing(ptr[isrc]), bcf_float_is_vector_end(ptr[isrc])); break;
+            default: error("TODO: %s:%d .. type=%d\n", __FILE__,__LINE__, rule->type);
+        }
+        #undef BRANCH
+
+        rule->nvals = idst;
         bcf_update_info(hdr,line,rule->hdr_tag,rule->vals,rule->nvals,rule->type);
+    }
 }
 
 static int info_rules_comp_key2(const void *a, const void *b)
@@ -344,7 +374,7 @@ static void info_rules_init(args_t *args)
         if ( rule->type==BCF_HT_INT ) rule->type_size = sizeof(int32_t);
         else if ( rule->type==BCF_HT_REAL ) rule->type_size = sizeof(float);
         else if ( rule->type==BCF_HT_STR ) rule->type_size = sizeof(char); 
-        else error("The type is not supported: \"%s\"\n", rule->hdr_tag);
+        else error("The INFO rule \"%s\" is not supported; the tag \"%s\" type is %d\n", ss,rule->hdr_tag,rule->type);
 
         ss = strchr(ss, '\0'); ss++;
         if ( !*ss ) error("Could not parse INFO rules, missing logic of \"%s\"\n", rule->hdr_tag);
@@ -366,8 +396,17 @@ static void info_rules_init(args_t *args)
                     bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)==BCF_VL_G ||
                     bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)==BCF_VL_R
                     ) ? 1 : 0;
-            if ( is_join && is_agr )
-                error("Cannot -i %s:join on Number=[AGR] tags is not supported.\n", rule->hdr_tag);
+            if ( is_join && bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)!=BCF_VL_VAR )
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->out_hdr, BCF_HL_INFO, "ID", rule->hdr_tag, NULL);
+                hrec = bcf_hrec_dup(hrec);
+                int i = bcf_hrec_find_key(hrec, "Number");
+                if ( i<0 ) error("Uh, could not find the entry Number in the header record of %s\n",rule->hdr_tag);
+                free(hrec->vals[i]);
+                hrec->vals[i] = strdup(".");
+                bcf_hdr_remove(args->out_hdr,BCF_HL_INFO, rule->hdr_tag);
+                bcf_hdr_add_hrec(args->out_hdr, hrec);
+            }
             if ( !is_join && !is_agr )
                 error("Only fixed-length vectors are supported with -i %s:%s\n", ss, rule->hdr_tag);
         }
@@ -689,7 +728,7 @@ maux_t *maux_init(args_t *args)
     assert( n_smpl==bcf_hdr_nsamples(args->out_hdr) );
     if ( args->do_gvcf )
     {
-        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));
+        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));  // -Walloc-size-larger-than gives a harmless warning caused by signed integer ma->n
         for (i=0; i<ma->n; i++)
             ma->gvcf[i].line = bcf_init1();
     }
@@ -699,6 +738,13 @@ maux_t *maux_init(args_t *args)
     for (i=0; i<ma->n; i++)
         ma->buf[i].rid = -1;
     ma->str = (kstring_t*) calloc(n_smpl,sizeof(kstring_t));
+    if ( args->local_alleles )
+    {
+        ma->laa = (int32_t*)malloc(sizeof(*ma->laa)*ma->nout_smpl*(1+args->local_alleles));
+        ma->pl2prob = (double*)malloc(PL2PROB_MAX*sizeof(*ma->pl2prob));
+        for (i=0; i<PL2PROB_MAX; i++)
+            ma->pl2prob[i] = pow(10,-0.1*i);
+    }
     return ma;
 }
 void maux_destroy(maux_t *ma)
@@ -737,6 +783,11 @@ void maux_destroy(maux_t *ma)
     free(ma->smpl_ploidy);
     free(ma->smpl_nGsize);
     free(ma->chr);
+    free(ma->laa);
+    free(ma->tmpi);
+    free(ma->k2k);
+    free(ma->tmpd);
+    free(ma->pl2prob);
     free(ma);
 }
 void maux_expand1(buffer_t *buf, int size)
@@ -1325,6 +1376,171 @@ static inline int max_used_gt_ploidy(bcf_fmt_t *fmt, int nsmpl)
     return max_ploidy;
 }
 
+// Sets ma->laa to local indexes relevant for each sample or missing/vector_end.
+// The indexes are with respect to the source indexes and must be translated as
+// the very last step.
+void init_local_alleles(args_t *args, bcf1_t *out, int ifmt_PL)
+{
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    int i,j,k,l, ismpl = 0, nlaa = 0;
+    static int warned = 0;
+
+    hts_expand(double,out->n_allele,ma->ntmpd,ma->tmpd); // allele probabilities
+    hts_expand(int,out->n_allele,ma->ntmpi,ma->tmpi);    // indexes of the sorted probabilities
+
+    // Let map[] be the mapping from src to output idx. Then k2k[] is mapping from src allele idxs to src allele idxs
+    // reordered so that if i<j then map[k2k[i]] < map[k2k[j]]
+    hts_expand(int,out->n_allele,ma->nk2k,ma->k2k);
+
+    // Determine local alleles: either take all that are present in the reader or use PL to determine the best
+    // subset for each sample. The alleles must be listed in the order of the alleles in the output file.
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = ma->fmt_map[files->nreaders*ifmt_PL+i];
+        bcf1_t *line = maux_get_line(args, i);
+        int nsmpl = bcf_hdr_nsamples(hdr);
+        if ( line )
+        {
+            if ( nlaa < line->n_allele - 1 )
+                nlaa = line->n_allele - 1 <= args->local_alleles ? line->n_allele - 1 : args->local_alleles;
+
+            for (j=0; j<line->n_allele; j++) ma->k2k[j] = j;
+
+            if ( line->n_allele <= args->local_alleles + 1 )
+            {
+                // sort to the output order, insertion sort, ascending 
+                int *map = ma->buf[i].rec[ma->buf[i].cur].map;
+                int *k2k = ma->k2k;
+                int tmp;
+                for (k=1; k<line->n_allele; k++)
+                    for (l=k; l>0 && map[k2k[l]] < map[k2k[l-1]]; l--)
+                        tmp = k2k[l], k2k[l] = k2k[l-1], k2k[l-1] = tmp;
+
+                // fewer than the allowed number of alleles, use all alleles from this file
+                for (j=0; j<nsmpl; j++)
+                {
+                    int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl;
+                    for (k=0; k<line->n_allele; k++) ptr[k] = k2k[k];
+                    for (; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end;
+                    ismpl++;
+                }
+                continue;
+            }
+        }
+        if ( !line || !fmt_ori )
+        {
+            // no values, fill in missing values
+            for (j=0; j<nsmpl; j++)
+            {
+                int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl;
+                ptr[0] = bcf_int32_missing;
+                for (k=1; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end;
+                ismpl++;
+            }
+            continue;
+        }
+
+        // there are more alternate alleles in the input files than is allowed on output, need to subset
+        if ( ifmt_PL==-1 )
+        {
+            if ( !warned )
+                fprintf(stderr,"Warning: local alleles are determined from FORMAT/PL but the tag is missing, cannot apply --local-alleles\n");
+            warned = 1;
+            ma->nlaa = 0;
+            return;
+        }
+
+        if ( !IS_VL_G(hdr, fmt_ori->id) ) error("FORMAT/PL must be defined as Number=G\n");
+        if ( 2*fmt_ori->n != line->n_allele*(line->n_allele+1) ) error("Todo: haploid PL to LPL\n");
+
+        int *map = ma->buf[i].rec[ma->buf[i].cur].map;
+        double *allele_prob = ma->tmpd;
+        int *idx = ma->tmpi;
+        #define BRANCH(src_type_t, src_is_missing, src_is_vector_end, pl2prob_idx) { \
+            src_type_t *src = (src_type_t*) fmt_ori->p; \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                for (k=0; k<line->n_allele; k++) allele_prob[k] = 0; \
+                for (k=0; k<line->n_allele; k++) \
+                    for (l=0; l<=k; l++) \
+                    { \
+                        if ( src_is_missing || src_is_vector_end ) { src++; continue; } \
+                        double prob = ma->pl2prob[pl2prob_idx]; \
+                        allele_prob[k] += prob; \
+                        allele_prob[l] += prob; \
+                        src++; \
+                    } \
+                /* insertion sort by allele probability, descending order, with the twist that REF (idx=0) always comes first */ \
+                allele_prob++; idx[0] = -1; idx++; /* keep REF first */ \
+                int si,sj,tmp; \
+                for (si=0; si<line->n_allele-1; si++) idx[si] = si; \
+                for (si=1; si<line->n_allele-1; si++) \
+                    for (sj=si; sj>0 && allele_prob[idx[sj]] > allele_prob[idx[sj-1]]; sj--) \
+                        tmp = idx[sj], idx[sj] = idx[sj-1], idx[sj-1] = tmp; \
+                /*for debugging only: test order*/ \
+                for (si=1; si<line->n_allele-1; si++) \
+                    assert( allele_prob[idx[si-1]] >= allele_prob[idx[si]] ); \
+                allele_prob--; idx--; /* this was to keep REF first */ \
+                int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl; \
+                ptr[0] = 0; \
+                for (k=1; k<=args->local_alleles && k<line->n_allele; k++) ptr[k] = idx[k]+1; \
+                int kmax = k; \
+                for (; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end; \
+                /* insertion sort by indexes to the output order, ascending */ \
+                for (k=1; k<kmax; k++) \
+                    for (l=k; l>0 && map[ptr[l]] < map[ptr[l-1]]; l--) \
+                        tmp = ptr[l], ptr[l] = ptr[l-1], ptr[l-1] = tmp; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH( int8_t, *src==bcf_int8_missing,  *src==bcf_int8_vector_end,  *src); break;
+            case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, *src>=0 && *src<PL2PROB_MAX ? *src : PL2PROB_MAX-1); break;
+            case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, *src>=0 && *src<PL2PROB_MAX ? *src : PL2PROB_MAX-1); break;
+            default: error("Unexpected case: %d, PL\n", fmt_ori->type);
+        }
+        #undef BRANCH
+    }
+    ma->nlaa = nlaa;
+}
+
+void update_local_alleles(args_t *args, bcf1_t *out)
+{
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    int i,j,k,ismpl=0,nsamples = bcf_hdr_nsamples(args->out_hdr);
+    for (i=0; i<files->nreaders; i++)
+    {
+        int irec = ma->buf[i].cur;
+        bcf_sr_t *reader = &files->readers[i];
+        int nsmpl = bcf_hdr_nsamples(reader->header);
+        for (k=0; k<nsmpl; k++)
+        {
+            int32_t *src = ma->laa + ismpl*(1+args->local_alleles);
+            int32_t *dst = ma->laa + ismpl*ma->nlaa;
+            j = 0;
+            if ( irec>=0 )
+            {
+                for (; j<ma->nlaa; j++)
+                {
+                    if ( src[j+1]==bcf_int32_missing ) dst[j] = bcf_int32_missing;
+                    else if ( src[j+1]==bcf_int32_vector_end ) break;
+                    else
+                        dst[j] = ma->buf[i].rec[irec].map[src[j+1]];
+                }
+            }
+            if ( j==0 ) dst[j++] = bcf_int32_missing;
+            for (; j<ma->nlaa; j++) src[j] = bcf_int32_vector_end;
+            ismpl++;
+        }
+    }
+    bcf_update_format_int32(args->out_hdr, out, "LAA", ma->laa, nsamples*ma->nlaa);
+}
+
 void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 {
     bcf_srs_t *files = args->files;
@@ -1333,7 +1549,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     int i, ismpl = 0, nsamples = bcf_hdr_nsamples(out_hdr);
     static int warned = 0;
 
-    int nsize = 0, msize = sizeof(int32_t);
+    int nsize = 0;
     for (i=0; i<files->nreaders; i++)
     {
         bcf_fmt_t *fmt = fmt_map[i];
@@ -1343,17 +1559,18 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
     if ( nsize==0 ) nsize = 1;
 
-    if ( ma->ntmp_arr < nsamples*nsize*msize )
+    size_t msize = sizeof(int32_t)*nsize*nsamples;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nsize*msize;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",ma->ntmp_arr);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(stderr,"Warning: Too many genotypes at %s:%"PRId64", requires %zu bytes, skipping.\n", bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(stderr,"Warning: Too many genotypes at %s:%"PRId64", requires %zu bytes, skipping.\n", bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",msize);
+        ma->ntmp_arr = msize;
     }
     memset(ma->smpl_ploidy,0,nsamples*sizeof(int));
 
@@ -1509,6 +1726,7 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
                     int ret = copy_string_field(src, iori - ifrom, fmt_ori->size, str, inew);
                     if ( ret<-1 ) error("[E::%s] fixme: internal error at %s:%"PRId64" .. %d\n",__func__,bcf_seqname(hdr,line),(int64_t) line->pos+1,ret);
                 }
+                if ( nmax < str->l ) nmax = str->l;
                 src += fmt_ori->size;
             }
             continue;
@@ -1520,17 +1738,18 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
               "If you don't really need it, use `bcftools annotate -x` to remove the annotation before merging.\n", __func__,key);
     }
     // update the record
-    if ( ma->ntmp_arr < nsamples*nmax )
+    size_t msize = nsamples*nmax;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nmax;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",ma->ntmp_arr);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",msize);
+        ma->ntmp_arr = msize;
     }
     char *tgt = (char*) ma->tmp_arr;
     for (i=0; i<nsamples; i++)
@@ -1542,6 +1761,204 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
     bcf_update_format_char(out_hdr, out, key, (float*)ma->tmp_arr, nsamples*nmax);
 }
 
+// Note: only diploid Number=G tags only for now
+void merge_localized_numberG_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out, int irdr)
+{
+    int i,j,k, nsamples = bcf_hdr_nsamples(args->out_hdr);
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    bcf_fmt_t *fmt = fmt_map[irdr];
+    const char *key = files->readers[irdr].header->id[BCF_DT_ID][fmt_map[irdr]->id].key;
+    size_t nsize = (ma->nlaa+1)*(ma->nlaa+2)/2;             // max number of Number=G localized fields
+    size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
+    {
+        static int warned = 0;
+        if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
+    }
+    int ismpl = 0;
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = fmt_map[i];
+        bcf1_t *line = maux_get_line(args, i);
+        int nsmpl = bcf_hdr_nsamples(hdr);
+
+        if ( !fmt_ori )
+        {
+            // fill missing values
+            #define BRANCH(tgt_type_t, tgt_set_missing, tgt_set_vector_end) { \
+                for (j=0; j<nsmpl; j++) \
+                { \
+                    tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                    tgt_set_missing; \
+                    for (k=1; k<nsize; k++) { tgt++; tgt_set_vector_end; } \
+                    ismpl++; \
+                } \
+            }
+            switch (fmt->type)
+            {
+                case BCF_BT_INT8:  BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT16: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT32: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_FLOAT: BRANCH(float, bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break;
+                default: error("Unexpected case: %d, %s\n", fmt->type, key);
+            }
+            #undef BRANCH
+            continue;
+        }
+        if ( 2*fmt_ori->n!=line->n_allele*(line->n_allele+1) ) error("Todo: localization of missing or haploid Number=G tags\n");
+
+        // localize
+        #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                src_type_t *src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \
+                tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                int *laa = ma->laa + (1+args->local_alleles)*ismpl; \
+                int ii,ij,tgt_idx = 0; \
+                for (ii=0; ii<=ma->nlaa; ii++) \
+                { \
+                    if ( laa[ii]==bcf_int32_missing || laa[ii]==bcf_int32_vector_end ) break; \
+                    for (ij=0; ij<=ii; ij++) \
+                    { \
+                        int src_idx = bcf_alleles2gt(laa[ii],laa[ij]); \
+                        if ( src_is_missing ) tgt_set_missing; \
+                        else if ( src_is_vector_end ) break; \
+                        else tgt[tgt_idx] = src[src_idx]; \
+                        tgt_idx++; \
+                    } \
+                } \
+                if ( !tgt_idx ) { tgt_set_missing; tgt_idx++; } \
+                for (; tgt_idx<nsize; tgt_idx++) tgt_set_vector_end; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH(int32_t,  int8_t, src[src_idx]==bcf_int8_missing,  src[src_idx]==bcf_int8_vector_end,  tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break;
+            default: error("Unexpected case: %d, %s\n", fmt_ori->type, key);
+        }
+        #undef BRANCH
+    }
+    args->tmps.l = 0;
+    kputc('L',&args->tmps);
+    kputs(key,&args->tmps);
+    if ( fmt_map[irdr]->type==BCF_BT_FLOAT )
+        bcf_update_format_float(args->out_hdr, out, args->tmps.s, (float*)ma->tmp_arr, nsamples*nsize);
+    else
+        bcf_update_format_int32(args->out_hdr, out, args->tmps.s, (int32_t*)ma->tmp_arr, nsamples*nsize);
+    ma->laa_dirty = 1;
+}
+void merge_localized_numberAR_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out, int irdr)
+{
+    int i,j,k, nsamples = bcf_hdr_nsamples(args->out_hdr);
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    bcf_fmt_t *fmt = fmt_map[irdr];
+    const char *key = files->readers[irdr].header->id[BCF_DT_ID][fmt->id].key;
+    size_t nsize = IS_VL_R(files->readers[irdr].header, fmt->id) ? ma->nlaa + 1 : ma->nlaa;
+    size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
+    {
+        static int warned = 0;
+        if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
+    }
+    int ismpl = 0, ibeg = IS_VL_R(files->readers[irdr].header, fmt->id) ? 0 : 1;;
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = fmt_map[i];
+        int nsmpl = bcf_hdr_nsamples(hdr);
+
+        if ( !fmt_ori )
+        {
+            // fill missing values
+            #define BRANCH(tgt_type_t, tgt_set_missing, tgt_set_vector_end) { \
+                for (j=0; j<nsmpl; j++) \
+                { \
+                    tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                    tgt_set_missing; \
+                    for (k=1; k<nsize; k++) { tgt++; tgt_set_vector_end; } \
+                    ismpl++; \
+                } \
+            }
+            switch (fmt->type)
+            {
+                case BCF_BT_INT8:  BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT16: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT32: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_FLOAT: BRANCH(float, bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break;
+                default: error("Unexpected case: %d, %s\n", fmt->type, key);
+            }
+            #undef BRANCH
+            continue;
+        }
+
+        // localize
+        #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                src_type_t *src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \
+                tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                int *laa = ma->laa + (1+args->local_alleles)*ismpl; \
+                int ii,tgt_idx = 0; \
+                for (ii=ibeg; ii<=ma->nlaa; ii++) \
+                { \
+                    if ( laa[ii]==bcf_int32_missing || laa[ii]==bcf_int32_vector_end ) break; \
+                    int src_idx = laa[ii] - ibeg; \
+                    if ( src_is_missing ) tgt_set_missing; \
+                    else if ( src_is_vector_end ) break; \
+                    else tgt[tgt_idx] = src[src_idx]; \
+                    tgt_idx++; \
+                } \
+                if ( !tgt_idx ) { tgt_set_missing; tgt_idx++; } \
+                for (; tgt_idx<nsize; tgt_idx++) tgt_set_vector_end; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH(int32_t,  int8_t, src[src_idx]==bcf_int8_missing,  src[src_idx]==bcf_int8_vector_end,  tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break;
+            default: error("Unexpected case: %d, %s\n", fmt_ori->type, key);
+        }
+        #undef BRANCH
+    }
+    args->tmps.l = 0;
+    kputc('L',&args->tmps);
+    kputs(key,&args->tmps);
+    if ( fmt_map[irdr]->type==BCF_BT_FLOAT )
+        bcf_update_format_float(args->out_hdr, out, args->tmps.s, (float*)ma->tmp_arr, nsamples*nsize);
+    else
+        bcf_update_format_int32(args->out_hdr, out, args->tmps.s, (int32_t*)ma->tmp_arr, nsamples*nsize);
+    ma->laa_dirty = 1;
+}
 void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 {
     bcf_srs_t *files = args->files;
@@ -1579,6 +1996,13 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
         }
         if ( fmt_map[i]->n > nsize ) nsize = fmt_map[i]->n;
     }
+    if ( ma->nlaa && length!=BCF_VL_FIXED )
+    {
+        if ( length==BCF_VL_G ) merge_localized_numberG_format_field(args,fmt_map,out,i);
+        else if ( length==BCF_VL_A || length==BCF_VL_R ) merge_localized_numberAR_format_field(args,fmt_map,out,i);
+        return;
+    }
+
     if ( type==BCF_BT_CHAR )
     {
         merge_format_string(args, key, fmt_map, out, length, nsize);
@@ -1586,17 +2010,18 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
 
     size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
-    if ( ma->ntmp_arr < nsamples*nsize*msize )
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nsize*msize;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", ma->ntmp_arr,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
     }
 
     // Fill the temp array for all samples by collecting values from all files
@@ -1790,7 +2215,7 @@ void merge_format(args_t *args, bcf1_t *out)
     khiter_t kitr;
     strdict_t *tmph = args->tmph;
     kh_clear(strdict, tmph);
-    int i, j, ret, has_GT = 0, max_ifmt = 0; // max fmt index
+    int i, j, ret, has_GT = 0, has_PL = -1, max_ifmt = 0; // max fmt index
     for (i=0; i<files->nreaders; i++)
     {
         bcf1_t *line = maux_get_line(args,i);
@@ -1820,6 +2245,7 @@ void merge_format(args_t *args, bcf1_t *out)
                         memset(ma->fmt_map+ma->nfmt_map*files->nreaders, 0, (max_ifmt-ma->nfmt_map+1)*files->nreaders*sizeof(bcf_fmt_t*));
                         ma->nfmt_map = max_ifmt+1;
                     }
+                    if ( key[0]=='P' && key[1]=='L' && key[2]==0  ) { has_PL = ifmt; }
                 }
                 kitr = kh_put(strdict, tmph, key, &ret);
                 kh_value(tmph, kitr) = ifmt;
@@ -1833,6 +2259,12 @@ void merge_format(args_t *args, bcf1_t *out)
         ma->buf[i].rec[irec].als_differ = j==line->n_allele ? 0 : 1;
     }
 
+    if ( args->local_alleles )
+    {
+        ma->laa_dirty = ma->nlaa = 0;
+        if ( out->n_allele > args->local_alleles + 1 ) init_local_alleles(args, out, has_PL);
+    }
+
     out->n_sample = bcf_hdr_nsamples(out_hdr);
     if ( has_GT )
         merge_GT(args, ma->fmt_map, out);
@@ -1840,6 +2272,10 @@ void merge_format(args_t *args, bcf1_t *out)
 
     for (i=1; i<=max_ifmt; i++)
         merge_format_field(args, &ma->fmt_map[i*files->nreaders], out);
+
+    if ( ma->laa_dirty )
+        update_local_alleles(args, out);
+
     out->d.indiv_dirty = 1;
 }
 
@@ -2041,6 +2477,23 @@ void gvcf_flush(args_t *args, int done)
     }
 }
 
+static inline int is_gvcf_block(bcf1_t *line)
+{
+    if ( line->rlen<=1 ) return 0;
+    if ( strlen(line->d.allele[0])==line->rlen ) return 0;
+    if ( line->n_allele==1 ) return 1;
+
+    int i;
+    for (i=1; i<line->n_allele; i++)
+    {
+        if ( !strcmp(line->d.allele[i],"<*>") ) return 1;
+        if ( !strcmp(line->d.allele[i],"<NON_REF>") ) return 1;
+        if ( !strcmp(line->d.allele[i],"<X>") ) return 1;
+    }
+    return 0;
+}
+static const int snp_mask = (VCF_SNP<<2)|(VCF_MNP<<2), indel_mask = VCF_INDEL<<2, ref_mask = 2;
+
 /*
     Check incoming lines for new gVCF blocks, set pointer to the current source
     buffer (gvcf or readers).  In contrast to gvcf_flush, this function can be
@@ -2059,6 +2512,7 @@ void gvcf_stage(args_t *args, int pos)
     maux->gvcf_min = INT_MAX;
     for (i=0; i<files->nreaders; i++)
     {
+        if ( gaux[i].active && gaux[i].end < pos ) gaux[i].active = 0;
         if ( gaux[i].active )
         {
             // gvcf block should not overlap with another record
@@ -2077,7 +2531,7 @@ void gvcf_stage(args_t *args, int pos)
         int irec = maux->buf[i].beg;
         bcf_hdr_t *hdr = bcf_sr_get_header(files, i);
         bcf1_t *line = args->files->readers[i].buffer[irec];
-        int ret = bcf_get_info_int32(hdr,line,"END",&end,&nend);
+        int ret = is_gvcf_block(line) ? bcf_get_info_int32(hdr,line,"END",&end,&nend) : 0;
         if ( ret==1 )
         {
             if ( end[0] == line->pos + 1 )  // POS and INFO/END are identical, treat as if a normal w/o INFO/END
@@ -2218,7 +2672,6 @@ void debug_state(args_t *args)
     fprintf(stderr,"\n");
 }
 
-
 /*
    Determine which line should be merged from which reader: go through all
    readers and all buffered lines, expand REF,ALT and try to match lines with
@@ -2227,7 +2680,6 @@ void debug_state(args_t *args)
 int can_merge(args_t *args)
 {
     bcf_srs_t *files = args->files;
-    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     maux_t *maux = args->maux;
     gvcf_aux_t *gaux = maux->gvcf;
     char *id = NULL, ref = 'N';
@@ -2240,6 +2692,9 @@ int can_merge(args_t *args)
     }
     maux->var_types = maux->nals = 0;
 
+    // this is only for the `-m none -g` mode, ensure that <*> lines come last
+    #define VCF_GVCF_REF 1
+
     for (i=0; i<files->nreaders; i++)
     {
         buffer_t *buf = &maux->buf[i];
@@ -2257,12 +2712,17 @@ int can_merge(args_t *args)
             buf->rec[j].skip = SKIP_DIFF;
             ntodo++;
 
+            bcf1_t *line = buf->lines[j];
             if ( args->merge_by_id )
-                id = buf->lines[j]->d.id;
+                id = line->d.id;
             else
             {
-                int var_type = bcf_get_variant_types(buf->lines[j]);
-                maux->var_types |= var_type ? var_type<<1 : 1;
+                int var_type = bcf_get_variant_types(line);
+                maux->var_types |= var_type ? var_type<<2 : 2;
+
+                // for the `-m none -g` mode
+                if ( args->collapse==COLLAPSE_NONE && args->do_gvcf && is_gvcf_block(line) )
+                    maux->var_types |= VCF_GVCF_REF;
             }
         }
 
@@ -2294,7 +2754,7 @@ int can_merge(args_t *args)
             bcf1_t *line = buf->lines[j]; // ptr to reader's buffer or gvcf buffer
 
             int line_type = bcf_get_variant_types(line);
-            line_type = line_type ? line_type<<1 : 1;
+            line_type = line_type ? line_type<<2 : 2;
 
             // select relevant lines
             if ( args->merge_by_id )
@@ -2303,6 +2763,12 @@ int can_merge(args_t *args)
             }
             else
             {
+                // when merging gVCF in -m none mode, make sure that gVCF blocks with the same POS as variant
+                // records come last, otherwise infinite loop is created (#1164)
+                if ( args->collapse==COLLAPSE_NONE && args->do_gvcf )
+                {
+                    if ( is_gvcf_block(line) && (maux->var_types & (~(VCF_GVCF_REF|2))) ) continue;
+                }
                 if ( args->collapse==COLLAPSE_NONE && maux->nals )
                 {
                     // All alleles of the tested record must be present in the
@@ -2366,7 +2832,6 @@ int can_merge(args_t *args)
 */
 void stage_line(args_t *args)
 {
-    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     bcf_srs_t *files = args->files;
     maux_t *maux = args->maux;
 
@@ -2436,13 +2901,9 @@ void stage_line(args_t *args)
 
 void merge_line(args_t *args)
 {
-    if ( args->regs )
-    {
-        if ( !regidx_overlap(args->regs,args->maux->chr,args->maux->pos,args->maux->pos,NULL) ) return;
-    }
-
     bcf1_t *out = args->out_line;
     merge_chrom2qual(args, out);
+    if ( args->regs && !regidx_overlap(args->regs,args->maux->chr,out->pos,out->pos+out->rlen-1,NULL) ) return;
     merge_filter(args, out);
     merge_info(args, out);
     if ( args->do_gvcf )
@@ -2490,9 +2951,59 @@ void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *c
     error_errno("[%s] Failed to add program information to header", __func__);
 }
 
+void hdr_add_localized_tags(args_t *args, bcf_hdr_t *hdr)
+{
+    char **str = NULL;
+    int i,j, nstr = 0, mstr = 0;
+    for (i=0; i<hdr->nhrec; i++)
+    {
+        if ( hdr->hrec[i]->type!=BCF_HL_FMT ) continue;
+        j = bcf_hrec_find_key(hdr->hrec[i],"ID");
+        if ( j<0 ) continue;
+        char *key = hdr->hrec[i]->vals[j];
+        int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key);
+        assert( id>=0 );
+        int localize = 0;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_G ) localize = 1;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_A ) localize = 1;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_R ) localize = 1;
+        if ( !localize ) continue;
+        args->tmps.l = 0;
+
+        uint32_t e = 0, nout = 0;
+        e |= ksprintf(&args->tmps, "##%s=<", hdr->hrec[i]->key) < 0;
+        for (j=0; j<hdr->hrec[i]->nkeys; j++)
+        {
+            if ( !strcmp("IDX",hdr->hrec[i]->keys[j]) ) continue;
+            if ( nout ) e |= kputc(',',&args->tmps) < 0;
+            if ( !strcmp("ID",hdr->hrec[i]->keys[j]) )
+                e |= ksprintf(&args->tmps,"%s=L%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]) < 0;
+            else if ( !strcmp("Number",hdr->hrec[i]->keys[j]) )
+                e |= ksprintf(&args->tmps,"Number=.") < 0;
+            else if ( !strcmp("Description",hdr->hrec[i]->keys[j]) && hdr->hrec[i]->vals[j][0]=='"' )
+                e |= ksprintf(&args->tmps,"Description=\"Localized field: %s", hdr->hrec[i]->vals[j]+1) < 0;
+            else
+                e |= ksprintf(&args->tmps,"%s=%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]) < 0;
+            nout++;
+        }
+        e |= ksprintf(&args->tmps,">\n") < 0;
+        if ( e ) error("Failed to format the header line for %s\n", key);
+        nstr++;
+        hts_expand(char*,nstr,mstr,str);
+        str[nstr-1] = strdup(args->tmps.s);
+    }
+    if ( !nstr ) return;
+    bcf_hdr_append(hdr,"##FORMAT=<ID=LAA,Number=.,Type=Integer,Description=\"Localized alleles: subset of alternate alleles relevant for each sample\">");
+    for (i=0; i<nstr; i++)
+    {
+        bcf_hdr_append(hdr, str[i]);
+        free(str[i]);
+    }
+    free(str);
+}
 void merge_vcf(args_t *args)
 {
-    args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p); //hts_set_threads(args->out_fh, args->n_threads);
     args->out_hdr = bcf_hdr_init("w");
@@ -2509,6 +3020,7 @@ void merge_vcf(args_t *args)
             char buf[24]; snprintf(buf,sizeof buf,"%d",i+1);
             merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
         }
+        if ( args->local_alleles ) hdr_add_localized_tags(args, args->out_hdr);
         if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
         if (bcf_hdr_sync(args->out_hdr) < 0)
             error_errno("[%s] Failed to update header", __func__);
@@ -2580,7 +3092,9 @@ static void usage(void)
     fprintf(stderr, "    -g, --gvcf <-|ref.fa>              merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,I16:sum,IDV:max,IMF:max\n");
     fprintf(stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
     fprintf(stderr, "    -l, --file-list <file>             read file names from the file\n");
+    fprintf(stderr, "    -L, --local-alleles <int>          EXPERIMENTAL: if more than <int> ALT alleles are encountered, drop FMT/PL and output LAA+LPL instead; 0=unlimited [0]\n");
     fprintf(stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+    fprintf(stderr, "        --no-index                     merge unindexed files, the same chromosomal order is required and -r/-R are not allowed\n");
     fprintf(stderr, "        --no-version                   do not append version and command line to the header\n");
     fprintf(stderr, "    -o, --output <file>                write output to a file [standard output]\n");
     fprintf(stderr, "    -O, --output-type <b|u|z|v>        'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
@@ -2608,6 +3122,7 @@ int main_vcfmerge(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"merge",required_argument,NULL,'m'},
+        {"local-alleles",required_argument,NULL,'L'},
         {"gvcf",required_argument,NULL,'g'},
         {"file-list",required_argument,NULL,'l'},
         {"missing-to-ref",no_argument,NULL,'0'},
@@ -2622,11 +3137,19 @@ int main_vcfmerge(int argc, char *argv[])
         {"regions-file",required_argument,NULL,'R'},
         {"info-rules",required_argument,NULL,'i'},
         {"no-version",no_argument,NULL,8},
+        {"no-index",no_argument,NULL,10},
         {"filter-logic",required_argument,NULL,'F'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0",loptions,NULL)) >= 0) {
+    char *tmp;
+    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'L':
+                args->local_alleles = strtol(optarg,&tmp,10);
+                if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg);
+                if ( args->local_alleles < 1 )
+                    error("Error: \"--local-alleles %s\" makes no sense, expected value bigger or equal than 1\n", optarg);
+                break;
             case 'F': 
                 if ( !strcmp(optarg,"+") ) args->filter_logic = FLT_LOGIC_ADD;
                 else if ( !strcmp(optarg,"x") ) args->filter_logic = FLT_LOGIC_REMOVE;
@@ -2672,6 +3195,7 @@ int main_vcfmerge(int argc, char *argv[])
             case  3 : args->force_samples = 1; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
+            case 10 : args->no_index = 1; break;
             case 'h':
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
@@ -2680,7 +3204,13 @@ int main_vcfmerge(int argc, char *argv[])
     if ( argc==optind && !args->file_list ) usage();
     if ( argc-optind<2 && !args->file_list ) usage();
 
-    args->files->require_index = 1;
+    if ( args->no_index )
+    {
+        if ( args->regions_list ) error("Error: cannot combine --no-index with -r/-R\n");
+        bcf_sr_set_opt(args->files,BCF_SR_ALLOW_NO_IDX);
+    }
+    else
+        bcf_sr_set_opt(args->files,BCF_SR_REQUIRE_IDX);
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
diff --git a/bcftools/vcfmerge.c.pysam.c b/bcftools/vcfmerge.c.pysam.c
index 651ea51..0f1c94c 100644
--- a/bcftools/vcfmerge.c.pysam.c
+++ b/bcftools/vcfmerge.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfmerge.c -- Merge multiple VCF/BCF files to create one multi-sample file.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
+#include <assert.h>
 #include <errno.h>
 #include <unistd.h>
 #include <getopt.h>
@@ -60,6 +61,8 @@ typedef khash_t(strdict) strdict_t;
 
 #define SWAP(type_t,a,b) { type_t tmp = (a); (a) = (b); (b) = tmp; }
 
+#define PL2PROB_MAX 1024
+
 // For merging INFO Number=A,G,R tags
 typedef struct
 {
@@ -134,6 +137,11 @@ typedef struct
     gvcf_aux_t *gvcf;   // buffer of gVCF lines, for each reader one line
     int nout_smpl;
     kstring_t *str;
+    int32_t *laa;           // localized alternate alleles given as input-based indexes in per-sample blocks of (args->local_alleles+1) values, 0 is always first
+    int nlaa, laa_dirty;    // number of LAA alleles actually used at this site, and was any L* added?
+    int32_t *tmpi, *k2k;
+    double *tmpd, *pl2prob; // mapping from phred-score likelihoods (PL) to probability
+    int ntmpi, ntmpd, nk2k;
 }
 maux_t;
 
@@ -143,7 +151,7 @@ typedef struct
     maux_t *maux;
     regidx_t *regs;    // apply regions only after the blocks are expanded
     regitr_t *regs_itr;
-    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref;
+    int header_only, collapse, output_type, force_samples, merge_by_id, do_gvcf, filter_logic, missing_to_ref, no_index;
     char *header_fname, *output_fname, *regions_list, *info_rules, *file_list;
     faidx_t *gvcf_fai;
     info_rule_t *rules;
@@ -156,6 +164,7 @@ typedef struct
     bcf_hdr_t *out_hdr;
     char **argv;
     int argc, n_threads, record_cmd_line;
+    int local_alleles;    // the value of -L option
 }
 args_t;
 
@@ -264,7 +273,28 @@ static void info_rules_merge_join(bcf_hdr_t *hdr, bcf1_t *line, info_rule_t *rul
         bcf_update_info_string(hdr,line,rule->hdr_tag,rule->vals);
     }
     else
+    {
+        int isrc, idst = 0;
+        #define BRANCH(type_t,is_missing,is_vector_end) { \
+            type_t *ptr = (type_t*) rule->vals; \
+            for (isrc=0; isrc<rule->nvals; isrc++) \
+            { \
+                if ( is_vector_end ) break; \
+                if ( is_missing ) continue; \
+                if ( idst!=isrc ) ptr[idst] = ptr[isrc]; \
+                idst++; \
+            } \
+        }
+        switch (rule->type) {
+            case BCF_HT_INT:  BRANCH(int32_t, ptr[isrc]==bcf_int32_missing, ptr[isrc]==bcf_int32_vector_end); break;
+            case BCF_HT_REAL: BRANCH(float, bcf_float_is_missing(ptr[isrc]), bcf_float_is_vector_end(ptr[isrc])); break;
+            default: error("TODO: %s:%d .. type=%d\n", __FILE__,__LINE__, rule->type);
+        }
+        #undef BRANCH
+
+        rule->nvals = idst;
         bcf_update_info(hdr,line,rule->hdr_tag,rule->vals,rule->nvals,rule->type);
+    }
 }
 
 static int info_rules_comp_key2(const void *a, const void *b)
@@ -346,7 +376,7 @@ static void info_rules_init(args_t *args)
         if ( rule->type==BCF_HT_INT ) rule->type_size = sizeof(int32_t);
         else if ( rule->type==BCF_HT_REAL ) rule->type_size = sizeof(float);
         else if ( rule->type==BCF_HT_STR ) rule->type_size = sizeof(char); 
-        else error("The type is not supported: \"%s\"\n", rule->hdr_tag);
+        else error("The INFO rule \"%s\" is not supported; the tag \"%s\" type is %d\n", ss,rule->hdr_tag,rule->type);
 
         ss = strchr(ss, '\0'); ss++;
         if ( !*ss ) error("Could not parse INFO rules, missing logic of \"%s\"\n", rule->hdr_tag);
@@ -368,8 +398,17 @@ static void info_rules_init(args_t *args)
                     bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)==BCF_VL_G ||
                     bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)==BCF_VL_R
                     ) ? 1 : 0;
-            if ( is_join && is_agr )
-                error("Cannot -i %s:join on Number=[AGR] tags is not supported.\n", rule->hdr_tag);
+            if ( is_join && bcf_hdr_id2length(args->out_hdr,BCF_HL_INFO,id)!=BCF_VL_VAR )
+            {
+                bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->out_hdr, BCF_HL_INFO, "ID", rule->hdr_tag, NULL);
+                hrec = bcf_hrec_dup(hrec);
+                int i = bcf_hrec_find_key(hrec, "Number");
+                if ( i<0 ) error("Uh, could not find the entry Number in the header record of %s\n",rule->hdr_tag);
+                free(hrec->vals[i]);
+                hrec->vals[i] = strdup(".");
+                bcf_hdr_remove(args->out_hdr,BCF_HL_INFO, rule->hdr_tag);
+                bcf_hdr_add_hrec(args->out_hdr, hrec);
+            }
             if ( !is_join && !is_agr )
                 error("Only fixed-length vectors are supported with -i %s:%s\n", ss, rule->hdr_tag);
         }
@@ -691,7 +730,7 @@ maux_t *maux_init(args_t *args)
     assert( n_smpl==bcf_hdr_nsamples(args->out_hdr) );
     if ( args->do_gvcf )
     {
-        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));
+        ma->gvcf = (gvcf_aux_t*) calloc(ma->n,sizeof(gvcf_aux_t));  // -Walloc-size-larger-than gives a harmless warning caused by signed integer ma->n
         for (i=0; i<ma->n; i++)
             ma->gvcf[i].line = bcf_init1();
     }
@@ -701,6 +740,13 @@ maux_t *maux_init(args_t *args)
     for (i=0; i<ma->n; i++)
         ma->buf[i].rid = -1;
     ma->str = (kstring_t*) calloc(n_smpl,sizeof(kstring_t));
+    if ( args->local_alleles )
+    {
+        ma->laa = (int32_t*)malloc(sizeof(*ma->laa)*ma->nout_smpl*(1+args->local_alleles));
+        ma->pl2prob = (double*)malloc(PL2PROB_MAX*sizeof(*ma->pl2prob));
+        for (i=0; i<PL2PROB_MAX; i++)
+            ma->pl2prob[i] = pow(10,-0.1*i);
+    }
     return ma;
 }
 void maux_destroy(maux_t *ma)
@@ -739,6 +785,11 @@ void maux_destroy(maux_t *ma)
     free(ma->smpl_ploidy);
     free(ma->smpl_nGsize);
     free(ma->chr);
+    free(ma->laa);
+    free(ma->tmpi);
+    free(ma->k2k);
+    free(ma->tmpd);
+    free(ma->pl2prob);
     free(ma);
 }
 void maux_expand1(buffer_t *buf, int size)
@@ -1107,7 +1158,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
                 case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, int); break;
                 case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, int); break;
                 case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), float); break;
-                default: fprintf(bcftools_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+                default: fprintf(bcftools_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); bcftools_exit(1);
             }
             #undef BRANCH
         }
@@ -1137,7 +1188,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
                 case BCF_BT_INT16: BRANCH(int16_t, src[kori]==bcf_int16_missing, src[kori]==bcf_int16_vector_end, int); break;
                 case BCF_BT_INT32: BRANCH(int32_t, src[kori]==bcf_int32_missing, src[kori]==bcf_int32_vector_end, int); break;
                 case BCF_BT_FLOAT: BRANCH(float,   bcf_float_is_missing(src[kori]), bcf_float_is_vector_end(src[kori]), float); break;
-                default: fprintf(bcftools_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+                default: fprintf(bcftools_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); bcftools_exit(1);
             }
             #undef BRANCH
         }
@@ -1327,6 +1378,171 @@ static inline int max_used_gt_ploidy(bcf_fmt_t *fmt, int nsmpl)
     return max_ploidy;
 }
 
+// Sets ma->laa to local indexes relevant for each sample or missing/vector_end.
+// The indexes are with respect to the source indexes and must be translated as
+// the very last step.
+void init_local_alleles(args_t *args, bcf1_t *out, int ifmt_PL)
+{
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    int i,j,k,l, ismpl = 0, nlaa = 0;
+    static int warned = 0;
+
+    hts_expand(double,out->n_allele,ma->ntmpd,ma->tmpd); // allele probabilities
+    hts_expand(int,out->n_allele,ma->ntmpi,ma->tmpi);    // indexes of the sorted probabilities
+
+    // Let map[] be the mapping from src to output idx. Then k2k[] is mapping from src allele idxs to src allele idxs
+    // reordered so that if i<j then map[k2k[i]] < map[k2k[j]]
+    hts_expand(int,out->n_allele,ma->nk2k,ma->k2k);
+
+    // Determine local alleles: either take all that are present in the reader or use PL to determine the best
+    // subset for each sample. The alleles must be listed in the order of the alleles in the output file.
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = ma->fmt_map[files->nreaders*ifmt_PL+i];
+        bcf1_t *line = maux_get_line(args, i);
+        int nsmpl = bcf_hdr_nsamples(hdr);
+        if ( line )
+        {
+            if ( nlaa < line->n_allele - 1 )
+                nlaa = line->n_allele - 1 <= args->local_alleles ? line->n_allele - 1 : args->local_alleles;
+
+            for (j=0; j<line->n_allele; j++) ma->k2k[j] = j;
+
+            if ( line->n_allele <= args->local_alleles + 1 )
+            {
+                // sort to the output order, insertion sort, ascending 
+                int *map = ma->buf[i].rec[ma->buf[i].cur].map;
+                int *k2k = ma->k2k;
+                int tmp;
+                for (k=1; k<line->n_allele; k++)
+                    for (l=k; l>0 && map[k2k[l]] < map[k2k[l-1]]; l--)
+                        tmp = k2k[l], k2k[l] = k2k[l-1], k2k[l-1] = tmp;
+
+                // fewer than the allowed number of alleles, use all alleles from this file
+                for (j=0; j<nsmpl; j++)
+                {
+                    int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl;
+                    for (k=0; k<line->n_allele; k++) ptr[k] = k2k[k];
+                    for (; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end;
+                    ismpl++;
+                }
+                continue;
+            }
+        }
+        if ( !line || !fmt_ori )
+        {
+            // no values, fill in missing values
+            for (j=0; j<nsmpl; j++)
+            {
+                int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl;
+                ptr[0] = bcf_int32_missing;
+                for (k=1; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end;
+                ismpl++;
+            }
+            continue;
+        }
+
+        // there are more alternate alleles in the input files than is allowed on output, need to subset
+        if ( ifmt_PL==-1 )
+        {
+            if ( !warned )
+                fprintf(bcftools_stderr,"Warning: local alleles are determined from FORMAT/PL but the tag is missing, cannot apply --local-alleles\n");
+            warned = 1;
+            ma->nlaa = 0;
+            return;
+        }
+
+        if ( !IS_VL_G(hdr, fmt_ori->id) ) error("FORMAT/PL must be defined as Number=G\n");
+        if ( 2*fmt_ori->n != line->n_allele*(line->n_allele+1) ) error("Todo: haploid PL to LPL\n");
+
+        int *map = ma->buf[i].rec[ma->buf[i].cur].map;
+        double *allele_prob = ma->tmpd;
+        int *idx = ma->tmpi;
+        #define BRANCH(src_type_t, src_is_missing, src_is_vector_end, pl2prob_idx) { \
+            src_type_t *src = (src_type_t*) fmt_ori->p; \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                for (k=0; k<line->n_allele; k++) allele_prob[k] = 0; \
+                for (k=0; k<line->n_allele; k++) \
+                    for (l=0; l<=k; l++) \
+                    { \
+                        if ( src_is_missing || src_is_vector_end ) { src++; continue; } \
+                        double prob = ma->pl2prob[pl2prob_idx]; \
+                        allele_prob[k] += prob; \
+                        allele_prob[l] += prob; \
+                        src++; \
+                    } \
+                /* insertion sort by allele probability, descending order, with the twist that REF (idx=0) always comes first */ \
+                allele_prob++; idx[0] = -1; idx++; /* keep REF first */ \
+                int si,sj,tmp; \
+                for (si=0; si<line->n_allele-1; si++) idx[si] = si; \
+                for (si=1; si<line->n_allele-1; si++) \
+                    for (sj=si; sj>0 && allele_prob[idx[sj]] > allele_prob[idx[sj-1]]; sj--) \
+                        tmp = idx[sj], idx[sj] = idx[sj-1], idx[sj-1] = tmp; \
+                /*for debugging only: test order*/ \
+                for (si=1; si<line->n_allele-1; si++) \
+                    assert( allele_prob[idx[si-1]] >= allele_prob[idx[si]] ); \
+                allele_prob--; idx--; /* this was to keep REF first */ \
+                int32_t *ptr = ma->laa + (1+args->local_alleles)*ismpl; \
+                ptr[0] = 0; \
+                for (k=1; k<=args->local_alleles && k<line->n_allele; k++) ptr[k] = idx[k]+1; \
+                int kmax = k; \
+                for (; k<=args->local_alleles; k++) ptr[k] = bcf_int32_vector_end; \
+                /* insertion sort by indexes to the output order, ascending */ \
+                for (k=1; k<kmax; k++) \
+                    for (l=k; l>0 && map[ptr[l]] < map[ptr[l-1]]; l--) \
+                        tmp = ptr[l], ptr[l] = ptr[l-1], ptr[l-1] = tmp; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH( int8_t, *src==bcf_int8_missing,  *src==bcf_int8_vector_end,  *src); break;
+            case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, *src>=0 && *src<PL2PROB_MAX ? *src : PL2PROB_MAX-1); break;
+            case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, *src>=0 && *src<PL2PROB_MAX ? *src : PL2PROB_MAX-1); break;
+            default: error("Unexpected case: %d, PL\n", fmt_ori->type);
+        }
+        #undef BRANCH
+    }
+    ma->nlaa = nlaa;
+}
+
+void update_local_alleles(args_t *args, bcf1_t *out)
+{
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    int i,j,k,ismpl=0,nsamples = bcf_hdr_nsamples(args->out_hdr);
+    for (i=0; i<files->nreaders; i++)
+    {
+        int irec = ma->buf[i].cur;
+        bcf_sr_t *reader = &files->readers[i];
+        int nsmpl = bcf_hdr_nsamples(reader->header);
+        for (k=0; k<nsmpl; k++)
+        {
+            int32_t *src = ma->laa + ismpl*(1+args->local_alleles);
+            int32_t *dst = ma->laa + ismpl*ma->nlaa;
+            j = 0;
+            if ( irec>=0 )
+            {
+                for (; j<ma->nlaa; j++)
+                {
+                    if ( src[j+1]==bcf_int32_missing ) dst[j] = bcf_int32_missing;
+                    else if ( src[j+1]==bcf_int32_vector_end ) break;
+                    else
+                        dst[j] = ma->buf[i].rec[irec].map[src[j+1]];
+                }
+            }
+            if ( j==0 ) dst[j++] = bcf_int32_missing;
+            for (; j<ma->nlaa; j++) src[j] = bcf_int32_vector_end;
+            ismpl++;
+        }
+    }
+    bcf_update_format_int32(args->out_hdr, out, "LAA", ma->laa, nsamples*ma->nlaa);
+}
+
 void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 {
     bcf_srs_t *files = args->files;
@@ -1335,7 +1551,7 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     int i, ismpl = 0, nsamples = bcf_hdr_nsamples(out_hdr);
     static int warned = 0;
 
-    int nsize = 0, msize = sizeof(int32_t);
+    int nsize = 0;
     for (i=0; i<files->nreaders; i++)
     {
         bcf_fmt_t *fmt = fmt_map[i];
@@ -1345,17 +1561,18 @@ void merge_GT(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
     if ( nsize==0 ) nsize = 1;
 
-    if ( ma->ntmp_arr < nsamples*nsize*msize )
+    size_t msize = sizeof(int32_t)*nsize*nsamples;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nsize*msize;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",ma->ntmp_arr);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(bcftools_stderr,"Warning: Too many genotypes at %s:%"PRId64", requires %zu bytes, skipping.\n", bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(bcftools_stderr,"Warning: Too many genotypes at %s:%"PRId64", requires %zu bytes, skipping.\n", bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",msize);
+        ma->ntmp_arr = msize;
     }
     memset(ma->smpl_ploidy,0,nsamples*sizeof(int));
 
@@ -1511,6 +1728,7 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
                     int ret = copy_string_field(src, iori - ifrom, fmt_ori->size, str, inew);
                     if ( ret<-1 ) error("[E::%s] fixme: internal error at %s:%"PRId64" .. %d\n",__func__,bcf_seqname(hdr,line),(int64_t) line->pos+1,ret);
                 }
+                if ( nmax < str->l ) nmax = str->l;
                 src += fmt_ori->size;
             }
             continue;
@@ -1522,17 +1740,18 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
               "If you don't really need it, use `bcftools annotate -x` to remove the annotation before merging.\n", __func__,key);
     }
     // update the record
-    if ( ma->ntmp_arr < nsamples*nmax )
+    size_t msize = nsamples*nmax;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nmax;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",ma->ntmp_arr);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Could not allocate %zu bytes\n",msize);
+        ma->ntmp_arr = msize;
     }
     char *tgt = (char*) ma->tmp_arr;
     for (i=0; i<nsamples; i++)
@@ -1544,6 +1763,204 @@ void merge_format_string(args_t *args, const char *key, bcf_fmt_t **fmt_map, bcf
     bcf_update_format_char(out_hdr, out, key, (float*)ma->tmp_arr, nsamples*nmax);
 }
 
+// Note: only diploid Number=G tags only for now
+void merge_localized_numberG_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out, int irdr)
+{
+    int i,j,k, nsamples = bcf_hdr_nsamples(args->out_hdr);
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    bcf_fmt_t *fmt = fmt_map[irdr];
+    const char *key = files->readers[irdr].header->id[BCF_DT_ID][fmt_map[irdr]->id].key;
+    size_t nsize = (ma->nlaa+1)*(ma->nlaa+2)/2;             // max number of Number=G localized fields
+    size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
+    {
+        static int warned = 0;
+        if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
+    }
+    int ismpl = 0;
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = fmt_map[i];
+        bcf1_t *line = maux_get_line(args, i);
+        int nsmpl = bcf_hdr_nsamples(hdr);
+
+        if ( !fmt_ori )
+        {
+            // fill missing values
+            #define BRANCH(tgt_type_t, tgt_set_missing, tgt_set_vector_end) { \
+                for (j=0; j<nsmpl; j++) \
+                { \
+                    tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                    tgt_set_missing; \
+                    for (k=1; k<nsize; k++) { tgt++; tgt_set_vector_end; } \
+                    ismpl++; \
+                } \
+            }
+            switch (fmt->type)
+            {
+                case BCF_BT_INT8:  BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT16: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT32: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_FLOAT: BRANCH(float, bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break;
+                default: error("Unexpected case: %d, %s\n", fmt->type, key);
+            }
+            #undef BRANCH
+            continue;
+        }
+        if ( 2*fmt_ori->n!=line->n_allele*(line->n_allele+1) ) error("Todo: localization of missing or haploid Number=G tags\n");
+
+        // localize
+        #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                src_type_t *src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \
+                tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                int *laa = ma->laa + (1+args->local_alleles)*ismpl; \
+                int ii,ij,tgt_idx = 0; \
+                for (ii=0; ii<=ma->nlaa; ii++) \
+                { \
+                    if ( laa[ii]==bcf_int32_missing || laa[ii]==bcf_int32_vector_end ) break; \
+                    for (ij=0; ij<=ii; ij++) \
+                    { \
+                        int src_idx = bcf_alleles2gt(laa[ii],laa[ij]); \
+                        if ( src_is_missing ) tgt_set_missing; \
+                        else if ( src_is_vector_end ) break; \
+                        else tgt[tgt_idx] = src[src_idx]; \
+                        tgt_idx++; \
+                    } \
+                } \
+                if ( !tgt_idx ) { tgt_set_missing; tgt_idx++; } \
+                for (; tgt_idx<nsize; tgt_idx++) tgt_set_vector_end; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH(int32_t,  int8_t, src[src_idx]==bcf_int8_missing,  src[src_idx]==bcf_int8_vector_end,  tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break;
+            default: error("Unexpected case: %d, %s\n", fmt_ori->type, key);
+        }
+        #undef BRANCH
+    }
+    args->tmps.l = 0;
+    kputc('L',&args->tmps);
+    kputs(key,&args->tmps);
+    if ( fmt_map[irdr]->type==BCF_BT_FLOAT )
+        bcf_update_format_float(args->out_hdr, out, args->tmps.s, (float*)ma->tmp_arr, nsamples*nsize);
+    else
+        bcf_update_format_int32(args->out_hdr, out, args->tmps.s, (int32_t*)ma->tmp_arr, nsamples*nsize);
+    ma->laa_dirty = 1;
+}
+void merge_localized_numberAR_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out, int irdr)
+{
+    int i,j,k, nsamples = bcf_hdr_nsamples(args->out_hdr);
+    bcf_srs_t *files = args->files;
+    maux_t *ma = args->maux;
+    bcf_fmt_t *fmt = fmt_map[irdr];
+    const char *key = files->readers[irdr].header->id[BCF_DT_ID][fmt->id].key;
+    size_t nsize = IS_VL_R(files->readers[irdr].header, fmt->id) ? ma->nlaa + 1 : ma->nlaa;
+    size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
+    {
+        static int warned = 0;
+        if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
+    }
+    int ismpl = 0, ibeg = IS_VL_R(files->readers[irdr].header, fmt->id) ? 0 : 1;;
+    for (i=0; i<files->nreaders; i++)
+    {
+        bcf_sr_t *reader = &files->readers[i];
+        bcf_hdr_t *hdr = reader->header;
+        bcf_fmt_t *fmt_ori = fmt_map[i];
+        int nsmpl = bcf_hdr_nsamples(hdr);
+
+        if ( !fmt_ori )
+        {
+            // fill missing values
+            #define BRANCH(tgt_type_t, tgt_set_missing, tgt_set_vector_end) { \
+                for (j=0; j<nsmpl; j++) \
+                { \
+                    tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                    tgt_set_missing; \
+                    for (k=1; k<nsize; k++) { tgt++; tgt_set_vector_end; } \
+                    ismpl++; \
+                } \
+            }
+            switch (fmt->type)
+            {
+                case BCF_BT_INT8:  BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT16: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_INT32: BRANCH(int32_t, *tgt=bcf_int32_missing, *tgt=bcf_int32_vector_end); break;
+                case BCF_BT_FLOAT: BRANCH(float, bcf_float_set_missing(*tgt), bcf_float_set_vector_end(*tgt)); break;
+                default: error("Unexpected case: %d, %s\n", fmt->type, key);
+            }
+            #undef BRANCH
+            continue;
+        }
+
+        // localize
+        #define BRANCH(tgt_type_t, src_type_t, src_is_missing, src_is_vector_end, tgt_set_missing, tgt_set_vector_end) { \
+            for (j=0; j<nsmpl; j++) \
+            { \
+                src_type_t *src = (src_type_t*) fmt_ori->p + j*fmt_ori->n; \
+                tgt_type_t *tgt = (tgt_type_t *) ma->tmp_arr + ismpl*nsize; \
+                int *laa = ma->laa + (1+args->local_alleles)*ismpl; \
+                int ii,tgt_idx = 0; \
+                for (ii=ibeg; ii<=ma->nlaa; ii++) \
+                { \
+                    if ( laa[ii]==bcf_int32_missing || laa[ii]==bcf_int32_vector_end ) break; \
+                    int src_idx = laa[ii] - ibeg; \
+                    if ( src_is_missing ) tgt_set_missing; \
+                    else if ( src_is_vector_end ) break; \
+                    else tgt[tgt_idx] = src[src_idx]; \
+                    tgt_idx++; \
+                } \
+                if ( !tgt_idx ) { tgt_set_missing; tgt_idx++; } \
+                for (; tgt_idx<nsize; tgt_idx++) tgt_set_vector_end; \
+                ismpl++; \
+            } \
+        }
+        switch (fmt_ori->type)
+        {
+            case BCF_BT_INT8:  BRANCH(int32_t,  int8_t, src[src_idx]==bcf_int8_missing,  src[src_idx]==bcf_int8_vector_end,  tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT16: BRANCH(int32_t, int16_t, src[src_idx]==bcf_int16_missing, src[src_idx]==bcf_int16_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_INT32: BRANCH(int32_t, int32_t, src[src_idx]==bcf_int32_missing, src[src_idx]==bcf_int32_vector_end, tgt[tgt_idx]=bcf_int32_missing, tgt[tgt_idx]=bcf_int32_vector_end); break;
+            case BCF_BT_FLOAT: BRANCH(float, float, bcf_float_is_missing(src[src_idx]), bcf_float_is_vector_end(src[src_idx]), bcf_float_set_missing(tgt[tgt_idx]), bcf_float_set_vector_end(tgt[tgt_idx])); break;
+            default: error("Unexpected case: %d, %s\n", fmt_ori->type, key);
+        }
+        #undef BRANCH
+    }
+    args->tmps.l = 0;
+    kputc('L',&args->tmps);
+    kputs(key,&args->tmps);
+    if ( fmt_map[irdr]->type==BCF_BT_FLOAT )
+        bcf_update_format_float(args->out_hdr, out, args->tmps.s, (float*)ma->tmp_arr, nsamples*nsize);
+    else
+        bcf_update_format_int32(args->out_hdr, out, args->tmps.s, (int32_t*)ma->tmp_arr, nsamples*nsize);
+    ma->laa_dirty = 1;
+}
 void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
 {
     bcf_srs_t *files = args->files;
@@ -1581,6 +1998,13 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
         }
         if ( fmt_map[i]->n > nsize ) nsize = fmt_map[i]->n;
     }
+    if ( ma->nlaa && length!=BCF_VL_FIXED )
+    {
+        if ( length==BCF_VL_G ) merge_localized_numberG_format_field(args,fmt_map,out,i);
+        else if ( length==BCF_VL_A || length==BCF_VL_R ) merge_localized_numberAR_format_field(args,fmt_map,out,i);
+        return;
+    }
+
     if ( type==BCF_BT_CHAR )
     {
         merge_format_string(args, key, fmt_map, out, length, nsize);
@@ -1588,17 +2012,18 @@ void merge_format_field(args_t *args, bcf_fmt_t **fmt_map, bcf1_t *out)
     }
 
     size_t msize = sizeof(float)>sizeof(int32_t) ? sizeof(float) : sizeof(int32_t);
-    if ( ma->ntmp_arr < nsamples*nsize*msize )
+    msize *= nsamples*nsize;
+    if ( msize > 2147483647 )
     {
-        ma->ntmp_arr = nsamples*nsize*msize;
-        ma->tmp_arr  = realloc(ma->tmp_arr, ma->ntmp_arr);
-        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", ma->ntmp_arr,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
-        if ( ma->ntmp_arr > 2147483647 )
-        {
-            if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,ma->ntmp_arr);
-            warned = 1;
-            return;
-        }
+        if ( !warned ) fprintf(bcftools_stderr,"Warning: The row size is too big for FORMAT/%s at %s:%"PRId64", requires %zu bytes, skipping.\n", key,bcf_seqname(out_hdr,out),(int64_t) out->pos+1,msize);
+        warned = 1;
+        return;
+    }
+    if ( ma->ntmp_arr < msize )
+    {
+        ma->tmp_arr  = realloc(ma->tmp_arr, msize);
+        if ( !ma->tmp_arr ) error("Failed to allocate %zu bytes at %s:%"PRId64" for FORMAT/%s\n", msize,bcf_seqname(args->out_hdr,out),(int64_t) out->pos+1,key);
+        ma->ntmp_arr = msize;
     }
 
     // Fill the temp array for all samples by collecting values from all files
@@ -1792,7 +2217,7 @@ void merge_format(args_t *args, bcf1_t *out)
     khiter_t kitr;
     strdict_t *tmph = args->tmph;
     kh_clear(strdict, tmph);
-    int i, j, ret, has_GT = 0, max_ifmt = 0; // max fmt index
+    int i, j, ret, has_GT = 0, has_PL = -1, max_ifmt = 0; // max fmt index
     for (i=0; i<files->nreaders; i++)
     {
         bcf1_t *line = maux_get_line(args,i);
@@ -1822,6 +2247,7 @@ void merge_format(args_t *args, bcf1_t *out)
                         memset(ma->fmt_map+ma->nfmt_map*files->nreaders, 0, (max_ifmt-ma->nfmt_map+1)*files->nreaders*sizeof(bcf_fmt_t*));
                         ma->nfmt_map = max_ifmt+1;
                     }
+                    if ( key[0]=='P' && key[1]=='L' && key[2]==0  ) { has_PL = ifmt; }
                 }
                 kitr = kh_put(strdict, tmph, key, &ret);
                 kh_value(tmph, kitr) = ifmt;
@@ -1835,6 +2261,12 @@ void merge_format(args_t *args, bcf1_t *out)
         ma->buf[i].rec[irec].als_differ = j==line->n_allele ? 0 : 1;
     }
 
+    if ( args->local_alleles )
+    {
+        ma->laa_dirty = ma->nlaa = 0;
+        if ( out->n_allele > args->local_alleles + 1 ) init_local_alleles(args, out, has_PL);
+    }
+
     out->n_sample = bcf_hdr_nsamples(out_hdr);
     if ( has_GT )
         merge_GT(args, ma->fmt_map, out);
@@ -1842,6 +2274,10 @@ void merge_format(args_t *args, bcf1_t *out)
 
     for (i=1; i<=max_ifmt; i++)
         merge_format_field(args, &ma->fmt_map[i*files->nreaders], out);
+
+    if ( ma->laa_dirty )
+        update_local_alleles(args, out);
+
     out->d.indiv_dirty = 1;
 }
 
@@ -2043,6 +2479,23 @@ void gvcf_flush(args_t *args, int done)
     }
 }
 
+static inline int is_gvcf_block(bcf1_t *line)
+{
+    if ( line->rlen<=1 ) return 0;
+    if ( strlen(line->d.allele[0])==line->rlen ) return 0;
+    if ( line->n_allele==1 ) return 1;
+
+    int i;
+    for (i=1; i<line->n_allele; i++)
+    {
+        if ( !strcmp(line->d.allele[i],"<*>") ) return 1;
+        if ( !strcmp(line->d.allele[i],"<NON_REF>") ) return 1;
+        if ( !strcmp(line->d.allele[i],"<X>") ) return 1;
+    }
+    return 0;
+}
+static const int snp_mask = (VCF_SNP<<2)|(VCF_MNP<<2), indel_mask = VCF_INDEL<<2, ref_mask = 2;
+
 /*
     Check incoming lines for new gVCF blocks, set pointer to the current source
     buffer (gvcf or readers).  In contrast to gvcf_flush, this function can be
@@ -2061,6 +2514,7 @@ void gvcf_stage(args_t *args, int pos)
     maux->gvcf_min = INT_MAX;
     for (i=0; i<files->nreaders; i++)
     {
+        if ( gaux[i].active && gaux[i].end < pos ) gaux[i].active = 0;
         if ( gaux[i].active )
         {
             // gvcf block should not overlap with another record
@@ -2079,7 +2533,7 @@ void gvcf_stage(args_t *args, int pos)
         int irec = maux->buf[i].beg;
         bcf_hdr_t *hdr = bcf_sr_get_header(files, i);
         bcf1_t *line = args->files->readers[i].buffer[irec];
-        int ret = bcf_get_info_int32(hdr,line,"END",&end,&nend);
+        int ret = is_gvcf_block(line) ? bcf_get_info_int32(hdr,line,"END",&end,&nend) : 0;
         if ( ret==1 )
         {
             if ( end[0] == line->pos + 1 )  // POS and INFO/END are identical, treat as if a normal w/o INFO/END
@@ -2220,7 +2674,6 @@ void debug_state(args_t *args)
     fprintf(bcftools_stderr,"\n");
 }
 
-
 /*
    Determine which line should be merged from which reader: go through all
    readers and all buffered lines, expand REF,ALT and try to match lines with
@@ -2229,7 +2682,6 @@ void debug_state(args_t *args)
 int can_merge(args_t *args)
 {
     bcf_srs_t *files = args->files;
-    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     maux_t *maux = args->maux;
     gvcf_aux_t *gaux = maux->gvcf;
     char *id = NULL, ref = 'N';
@@ -2242,6 +2694,9 @@ int can_merge(args_t *args)
     }
     maux->var_types = maux->nals = 0;
 
+    // this is only for the `-m none -g` mode, ensure that <*> lines come last
+    #define VCF_GVCF_REF 1
+
     for (i=0; i<files->nreaders; i++)
     {
         buffer_t *buf = &maux->buf[i];
@@ -2259,12 +2714,17 @@ int can_merge(args_t *args)
             buf->rec[j].skip = SKIP_DIFF;
             ntodo++;
 
+            bcf1_t *line = buf->lines[j];
             if ( args->merge_by_id )
-                id = buf->lines[j]->d.id;
+                id = line->d.id;
             else
             {
-                int var_type = bcf_get_variant_types(buf->lines[j]);
-                maux->var_types |= var_type ? var_type<<1 : 1;
+                int var_type = bcf_get_variant_types(line);
+                maux->var_types |= var_type ? var_type<<2 : 2;
+
+                // for the `-m none -g` mode
+                if ( args->collapse==COLLAPSE_NONE && args->do_gvcf && is_gvcf_block(line) )
+                    maux->var_types |= VCF_GVCF_REF;
             }
         }
 
@@ -2296,7 +2756,7 @@ int can_merge(args_t *args)
             bcf1_t *line = buf->lines[j]; // ptr to reader's buffer or gvcf buffer
 
             int line_type = bcf_get_variant_types(line);
-            line_type = line_type ? line_type<<1 : 1;
+            line_type = line_type ? line_type<<2 : 2;
 
             // select relevant lines
             if ( args->merge_by_id )
@@ -2305,6 +2765,12 @@ int can_merge(args_t *args)
             }
             else
             {
+                // when merging gVCF in -m none mode, make sure that gVCF blocks with the same POS as variant
+                // records come last, otherwise infinite loop is created (#1164)
+                if ( args->collapse==COLLAPSE_NONE && args->do_gvcf )
+                {
+                    if ( is_gvcf_block(line) && (maux->var_types & (~(VCF_GVCF_REF|2))) ) continue;
+                }
                 if ( args->collapse==COLLAPSE_NONE && maux->nals )
                 {
                     // All alleles of the tested record must be present in the
@@ -2368,7 +2834,6 @@ int can_merge(args_t *args)
 */
 void stage_line(args_t *args)
 {
-    int snp_mask = (VCF_SNP<<1)|(VCF_MNP<<1), indel_mask = VCF_INDEL<<1, ref_mask = 1;
     bcf_srs_t *files = args->files;
     maux_t *maux = args->maux;
 
@@ -2438,13 +2903,9 @@ void stage_line(args_t *args)
 
 void merge_line(args_t *args)
 {
-    if ( args->regs )
-    {
-        if ( !regidx_overlap(args->regs,args->maux->chr,args->maux->pos,args->maux->pos,NULL) ) return;
-    }
-
     bcf1_t *out = args->out_line;
     merge_chrom2qual(args, out);
+    if ( args->regs && !regidx_overlap(args->regs,args->maux->chr,out->pos,out->pos+out->rlen-1,NULL) ) return;
     merge_filter(args, out);
     merge_info(args, out);
     if ( args->do_gvcf )
@@ -2492,9 +2953,59 @@ void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *c
     error_errno("[%s] Failed to add program information to header", __func__);
 }
 
+void hdr_add_localized_tags(args_t *args, bcf_hdr_t *hdr)
+{
+    char **str = NULL;
+    int i,j, nstr = 0, mstr = 0;
+    for (i=0; i<hdr->nhrec; i++)
+    {
+        if ( hdr->hrec[i]->type!=BCF_HL_FMT ) continue;
+        j = bcf_hrec_find_key(hdr->hrec[i],"ID");
+        if ( j<0 ) continue;
+        char *key = hdr->hrec[i]->vals[j];
+        int id = bcf_hdr_id2int(hdr, BCF_DT_ID, key);
+        assert( id>=0 );
+        int localize = 0;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_G ) localize = 1;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_A ) localize = 1;
+        if ( bcf_hdr_id2length(hdr,BCF_HL_FMT,id) == BCF_VL_R ) localize = 1;
+        if ( !localize ) continue;
+        args->tmps.l = 0;
+
+        uint32_t e = 0, nout = 0;
+        e |= ksprintf(&args->tmps, "##%s=<", hdr->hrec[i]->key) < 0;
+        for (j=0; j<hdr->hrec[i]->nkeys; j++)
+        {
+            if ( !strcmp("IDX",hdr->hrec[i]->keys[j]) ) continue;
+            if ( nout ) e |= kputc(',',&args->tmps) < 0;
+            if ( !strcmp("ID",hdr->hrec[i]->keys[j]) )
+                e |= ksprintf(&args->tmps,"%s=L%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]) < 0;
+            else if ( !strcmp("Number",hdr->hrec[i]->keys[j]) )
+                e |= ksprintf(&args->tmps,"Number=.") < 0;
+            else if ( !strcmp("Description",hdr->hrec[i]->keys[j]) && hdr->hrec[i]->vals[j][0]=='"' )
+                e |= ksprintf(&args->tmps,"Description=\"Localized field: %s", hdr->hrec[i]->vals[j]+1) < 0;
+            else
+                e |= ksprintf(&args->tmps,"%s=%s", hdr->hrec[i]->keys[j], hdr->hrec[i]->vals[j]) < 0;
+            nout++;
+        }
+        e |= ksprintf(&args->tmps,">\n") < 0;
+        if ( e ) error("Failed to format the header line for %s\n", key);
+        nstr++;
+        hts_expand(char*,nstr,mstr,str);
+        str[nstr-1] = strdup(args->tmps.s);
+    }
+    if ( !nstr ) return;
+    bcf_hdr_append(hdr,"##FORMAT=<ID=LAA,Number=.,Type=Integer,Description=\"Localized alleles: subset of alternate alleles relevant for each sample\">");
+    for (i=0; i<nstr; i++)
+    {
+        bcf_hdr_append(hdr, str[i]);
+        free(str[i]);
+    }
+    free(str);
+}
 void merge_vcf(args_t *args)
 {
-    args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    args->out_fh  = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads ) hts_set_opt(args->out_fh, HTS_OPT_THREAD_POOL, args->files->p); //hts_set_threads(args->out_fh, args->n_threads);
     args->out_hdr = bcf_hdr_init("w");
@@ -2511,6 +3022,7 @@ void merge_vcf(args_t *args)
             char buf[24]; snprintf(buf,sizeof buf,"%d",i+1);
             merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
         }
+        if ( args->local_alleles ) hdr_add_localized_tags(args, args->out_hdr);
         if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
         if (bcf_hdr_sync(args->out_hdr) < 0)
             error_errno("[%s] Failed to update header", __func__);
@@ -2582,7 +3094,9 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -g, --gvcf <-|ref.fa>              merge gVCF blocks, INFO/END tag is expected. Implies -i QS:sum,MinDP:min,I16:sum,IDV:max,IMF:max\n");
     fprintf(bcftools_stderr, "    -i, --info-rules <tag:method,..>   rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
     fprintf(bcftools_stderr, "    -l, --file-list <file>             read file names from the file\n");
+    fprintf(bcftools_stderr, "    -L, --local-alleles <int>          EXPERIMENTAL: if more than <int> ALT alleles are encountered, drop FMT/PL and output LAA+LPL instead; 0=unlimited [0]\n");
     fprintf(bcftools_stderr, "    -m, --merge <string>               allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+    fprintf(bcftools_stderr, "        --no-index                     merge unindexed files, the same chromosomal order is required and -r/-R are not allowed\n");
     fprintf(bcftools_stderr, "        --no-version                   do not append version and command line to the header\n");
     fprintf(bcftools_stderr, "    -o, --output <file>                write output to a file [standard output]\n");
     fprintf(bcftools_stderr, "    -O, --output-type <b|u|z|v>        'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
@@ -2590,7 +3104,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
     fprintf(bcftools_stderr, "        --threads <int>                use multithreading with <int> worker threads [0]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfmerge(int argc, char *argv[])
@@ -2610,6 +3124,7 @@ int main_vcfmerge(int argc, char *argv[])
     {
         {"help",no_argument,NULL,'h'},
         {"merge",required_argument,NULL,'m'},
+        {"local-alleles",required_argument,NULL,'L'},
         {"gvcf",required_argument,NULL,'g'},
         {"file-list",required_argument,NULL,'l'},
         {"missing-to-ref",no_argument,NULL,'0'},
@@ -2624,11 +3139,19 @@ int main_vcfmerge(int argc, char *argv[])
         {"regions-file",required_argument,NULL,'R'},
         {"info-rules",required_argument,NULL,'i'},
         {"no-version",no_argument,NULL,8},
+        {"no-index",no_argument,NULL,10},
         {"filter-logic",required_argument,NULL,'F'},
         {NULL,0,NULL,0}
     };
-    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0",loptions,NULL)) >= 0) {
+    char *tmp;
+    while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:g:F:0L:",loptions,NULL)) >= 0) {
         switch (c) {
+            case 'L':
+                args->local_alleles = strtol(optarg,&tmp,10);
+                if ( *tmp ) error("Could not parse argument: --local-alleles %s\n", optarg);
+                if ( args->local_alleles < 1 )
+                    error("Error: \"--local-alleles %s\" makes no sense, expected value bigger or equal than 1\n", optarg);
+                break;
             case 'F': 
                 if ( !strcmp(optarg,"+") ) args->filter_logic = FLT_LOGIC_ADD;
                 else if ( !strcmp(optarg,"x") ) args->filter_logic = FLT_LOGIC_REMOVE;
@@ -2674,6 +3197,7 @@ int main_vcfmerge(int argc, char *argv[])
             case  3 : args->force_samples = 1; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
+            case 10 : args->no_index = 1; break;
             case 'h':
             case '?': usage(); break;
             default: error("Unknown argument: %s\n", optarg);
@@ -2682,7 +3206,13 @@ int main_vcfmerge(int argc, char *argv[])
     if ( argc==optind && !args->file_list ) usage();
     if ( argc-optind<2 && !args->file_list ) usage();
 
-    args->files->require_index = 1;
+    if ( args->no_index )
+    {
+        if ( args->regions_list ) error("Error: cannot combine --no-index with -r/-R\n");
+        bcf_sr_set_opt(args->files,BCF_SR_ALLOW_NO_IDX);
+    }
+    else
+        bcf_sr_set_opt(args->files,BCF_SR_REQUIRE_IDX);
     if ( args->regions_list )
     {
         if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
diff --git a/bcftools/vcfnorm.c b/bcftools/vcfnorm.c
index dcaaba1..7b510b1 100644
--- a/bcftools/vcfnorm.c
+++ b/bcftools/vcfnorm.c
@@ -1,6 +1,6 @@
 /*  vcfnorm.c -- Left-align and normalize indels.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -26,6 +26,7 @@ THE SOFTWARE.  */
 #include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -38,6 +39,7 @@ THE SOFTWARE.  */
 #include <htslib/khash_str2int.h>
 #include "bcftools.h"
 #include "rbuf.h"
+#include "abuf.h"
 
 #define CHECK_REF_EXIT 1
 #define CHECK_REF_WARN 2
@@ -84,20 +86,25 @@ typedef struct
     int32_t *int32_arr;
     int ntmp_arr1, ntmp_arr2, nint32_arr;
     kstring_t *tmp_str;
-    kstring_t *tmp_als, tmp_als_str;
+    kstring_t *tmp_als, tmp_kstr;
     int ntmp_als;
     rbuf_t rbuf;
     int buf_win;            // maximum distance between two records to consider
     int aln_win;            // the realignment window size (maximum repeat size)
     bcf_srs_t *files;       // using the synced reader only for -r option
-    bcf_hdr_t *hdr;
+    bcf_hdr_t *hdr, *out_hdr;
     cmpals_t cmpals_in, cmpals_out;
     faidx_t *fai;
     struct { int tot, set, swap; } nref;
     char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
     int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
     int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
-    int record_cmd_line, force, force_warned;
+    int record_cmd_line, force, force_warned, keep_sum_ad;
+    abuf_t *abuf;
+    abuf_opt_t atomize;
+    int use_star_allele;
+    char *old_rec_tag;
+    htsFile *out;
 }
 args_t;
 
@@ -136,7 +143,7 @@ static void seq_to_upper(char *seq, int len)
 static void fix_ref(args_t *args, bcf1_t *line)
 {
     int reflen = strlen(line->d.allele[0]);
-    int i, maxlen = reflen, len;
+    int i,j, maxlen = reflen, len;
     for (i=1; i<line->n_allele; i++)
     {
         int len = strlen(line->d.allele[i]);
@@ -149,27 +156,57 @@ static void fix_ref(args_t *args, bcf1_t *line)
 
     args->nref.tot++;
 
-    // is the REF different?
+    // is the REF different? If not, we are done
     if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
 
-    // is the REF allele missing or N?
-    if ( reflen==1 && (line->d.allele[0][0]=='.' || line->d.allele[0][0]=='N' || line->d.allele[0][0]=='n') ) 
+    // is the REF allele missing?
+    if ( reflen==1 && line->d.allele[0][0]=='.' ) 
     { 
         line->d.allele[0][0] = ref[0]; 
         args->nref.set++; 
         free(ref);
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         return;
     }
 
-    // does REF contain non-standard bases?
-    if ( replace_iupac_codes(line->d.allele[0],strlen(line->d.allele[0])) )
+    // does REF or ALT contain non-standard bases?
+    int has_non_acgtn = 0;
+    for (i=0; i<line->n_allele; i++)
+    {
+        if ( line->d.allele[i][0]=='<' ) continue;
+        has_non_acgtn += replace_iupac_codes(line->d.allele[i],strlen(line->d.allele[i]));
+    }
+    if ( has_non_acgtn )
     {
         args->nref.set++;
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
     }
 
+    // does the REF allele contain N's ?
+    int fix = 0;
+    for (i=0; i<reflen; i++)
+    {
+        if ( line->d.allele[0][i]!='N' ) continue;
+        if ( ref[i]=='N' ) continue;
+        line->d.allele[0][i] = ref[i];
+        fix++;
+        for (j=1; j<line->n_allele; j++)
+        {
+            int len = strlen(line->d.allele[j]);
+            if ( len <= i || line->d.allele[j][i]!='N' ) continue;
+            line->d.allele[j][i] = ref[i];
+            fix++;
+        }
+    }
+    if ( fix )
+    {
+        args->nref.set++;
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
+        if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
+    }
+
+
     // is it swapped?
     for (i=1; i<line->n_allele; i++)
     {
@@ -178,45 +215,35 @@ static void fix_ref(args_t *args, bcf1_t *line)
     }
 
     kstring_t str = {0,0,0};
-    if ( i==line->n_allele )
+    if ( i==line->n_allele )    // none of the alternate alleles matches the reference
     {
-        // none of the alternate alleles matches the reference
-        if ( line->n_allele>1 )
-            args->nref.set++;
-        else
-            args->nref.swap++;
-
-        kputs(line->d.allele[0],&str);
-        kputc(',',&str);
+        args->nref.set++;
+        kputsn(ref,reflen,&str);
         for (i=1; i<line->n_allele; i++)
         {
-            kputs(line->d.allele[i],&str);
             kputc(',',&str);
+            kputs(line->d.allele[i],&str);
         }
-        kputc(ref[0],&str);
-        bcf_update_alleles_str(args->hdr,line,str.s);
-        str.l = 0;
+        bcf_update_alleles_str(args->out_hdr,line,str.s);
+        free(ref);
+        free(str.s);
+        return;
     }
-    else
-        args->nref.swap++;
-    free(ref);
 
-    // swap the alleles
-    int j;
+    // one of the alternate alleles matches the reference, assume it's a simple swap
     kputs(line->d.allele[i],&str);
-    for (j=1; j<i; j++)
-    {
-        kputc(',',&str);
-        kputs(line->d.allele[j],&str);
-    }
-    kputc(',',&str);
-    kputs(line->d.allele[0],&str);
-    for (j=i+1; j<line->n_allele; j++)
+    for (j=1; j<line->n_allele; j++)
     {
         kputc(',',&str);
-        kputs(line->d.allele[j],&str);
+        if ( j==i ) 
+            kputs(line->d.allele[0],&str);
+        else
+            kputs(line->d.allele[j],&str);
     }
-    bcf_update_alleles_str(args->hdr,line,str.s);
+    bcf_update_alleles_str(args->out_hdr,line,str.s);
+    args->nref.swap++;
+    free(ref);
+    free(str.s);
 
     // swap genotypes
     int ntmp = args->ntmp_arr1 / sizeof(int32_t); // reuse tmp_arr declared as uint8_t
@@ -231,7 +258,7 @@ static void fix_ref(args_t *args, bcf1_t *line)
         else if ( gts[j]==bcf_gt_unphased(i) ) gts[j] = bcf_gt_unphased(0);
         else if ( gts[j]==bcf_gt_phased(i) ) gts[j] = bcf_gt_phased(0);
     }
-    bcf_update_genotypes(args->hdr,line,gts,ngts);
+    bcf_update_genotypes(args->out_hdr,line,gts,ngts);
 
     // update AC
     int nac = bcf_get_info_int32(args->hdr, line, "AC", &args->tmp_arr1, &ntmp);
@@ -240,10 +267,8 @@ static void fix_ref(args_t *args, bcf1_t *line)
     {
         int32_t *ac = (int32_t*)args->tmp_arr1;
         ac[i-1] = ni;
-        bcf_update_info_int32(args->hdr, line, "AC", ac, nac);
+        bcf_update_info_int32(args->out_hdr, line, "AC", ac, nac);
     }
-    
-    free(str.s);
 }
 
 static void fix_dup_alt(args_t *args, bcf1_t *line)
@@ -268,7 +293,7 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
         if ( !args->tmp_arr1[i] ) continue;
         line->d.allele[j++] = line->d.allele[i];
     }
-    bcf_update_alleles(args->hdr, line, (const char**)line->d.allele, nals);
+    bcf_update_alleles(args->out_hdr, line, (const char**)line->d.allele, nals);
 
 
     // update genotypes
@@ -286,7 +311,36 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
         gts[i] = bcf_gt_is_phased(gts[i]) ? bcf_gt_phased(ial_new) : bcf_gt_unphased(ial_new);
         changed = 1;
     }
-    if ( changed ) bcf_update_genotypes(args->hdr,line,gts,ngts);
+    if ( changed ) bcf_update_genotypes(args->out_hdr,line,gts,ngts);
+}
+
+static void set_old_rec_tag(args_t *args, bcf1_t *dst, bcf1_t *src, int ialt)
+{
+    if ( !args->old_rec_tag ) return;
+
+    // only update if the tag is not present already, there can be multiple normalization steps
+    int i, id = bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, args->old_rec_tag);
+    bcf_unpack(dst, BCF_UN_INFO);
+    for (i=0; i<dst->n_info; i++)
+    {
+        bcf_info_t *inf = &dst->d.info[i];
+        if ( inf && inf->key == id ) return;
+    }
+
+    args->tmp_kstr.l = 0;
+    ksprintf(&args->tmp_kstr,"%s|%"PRIhts_pos"|%s|",bcf_seqname(args->hdr,src),src->pos+1,src->d.allele[0]);
+    for (i=1; i<src->n_allele; i++)
+    {
+        kputs(src->d.allele[i],&args->tmp_kstr);
+        if ( i+1<src->n_allele ) kputc(',',&args->tmp_kstr);
+    }
+    if ( ialt>0 )
+    {
+        kputc('|',&args->tmp_kstr);
+        kputw(ialt,&args->tmp_kstr);
+    }
+    if ( (bcf_update_info_string(args->out_hdr, dst, args->old_rec_tag, args->tmp_kstr.s))!=0 )
+            error("An error occurred while updating INFO/%s\n",args->old_rec_tag);
 }
 
 #define ERR_DUP_ALLELE       -2
@@ -333,7 +387,7 @@ static int realign(args_t *args, bcf1_t *line)
         if ( line->rlen > 1 )
         {
             line->d.allele[0][1] = 0;
-            bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+            bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         }
         return ERR_OK;
     }
@@ -363,7 +417,7 @@ static int realign(args_t *args, bcf1_t *line)
     }
 
     // trim from right
-    int ori_pos = line->pos;
+    int new_pos = line->pos;
     while (1)
     {
         // is the rightmost base identical in all alleles?
@@ -374,7 +428,7 @@ static int realign(args_t *args, bcf1_t *line)
             if ( als[i].l < min_len ) min_len = als[i].l;
         }
         if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
-        if ( min_len<=1 && line->pos==0 ) break;
+        if ( min_len<=1 && new_pos==0 ) break;
 
         int pad_from_left = 0;
         for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -384,10 +438,10 @@ static int realign(args_t *args, bcf1_t *line)
         }
         if ( pad_from_left )
         {
-            int npad = line->pos >= args->aln_win ? args->aln_win : line->pos;
+            int npad = new_pos >= args->aln_win ? args->aln_win : new_pos;
             free(ref);
-            ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos-npad, line->pos-1, &nref);
-            if ( !ref ) error("faidx_fetch_seq failed at %s:%"PRId64"\n", args->hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) line->pos-npad+1);
+            ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, new_pos-npad, new_pos-1, &nref);
+            if ( !ref ) error("faidx_fetch_seq failed at %s:%"PRId64"\n", args->hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) new_pos-npad+1);
             replace_iupac_codes(ref,nref);
             for (i=0; i<line->n_allele; i++)
             {
@@ -396,7 +450,7 @@ static int realign(args_t *args, bcf1_t *line)
                 memcpy(als[i].s,ref,npad);
                 als[i].l += npad;
             }
-            line->pos -= npad;
+            new_pos -= npad;
         }
     }
     free(ref);
@@ -422,39 +476,43 @@ static int realign(args_t *args, bcf1_t *line)
             memmove(als[i].s,als[i].s+ntrim_left,als[i].l-ntrim_left);
             als[i].l -= ntrim_left;
         }
-        line->pos += ntrim_left;
+        new_pos += ntrim_left;
     }
 
     // Have the alleles changed?
     als[0].s[ als[0].l ] = 0;  // in order for strcmp to work
-    if ( ori_pos==line->pos && !strcasecmp(line->d.allele[0],als[0].s) ) return ERR_OK;
+    if ( new_pos==line->pos && !strcasecmp(line->d.allele[0],als[0].s) ) return ERR_OK;
+
+    set_old_rec_tag(args, line, line, 0);
 
     // Create new block of alleles and update
-    args->tmp_als_str.l = 0;
+    args->tmp_kstr.l = 0;
     for (i=0; i<line->n_allele; i++)
     {
-        if (i>0) kputc(',',&args->tmp_als_str);
-        kputsn(als[i].s,als[i].l,&args->tmp_als_str);
+        if (i>0) kputc(',',&args->tmp_kstr);
+        kputsn(als[i].s,als[i].l,&args->tmp_kstr);
     }
-    args->tmp_als_str.s[ args->tmp_als_str.l ] = 0;
-    bcf_update_alleles_str(args->hdr,line,args->tmp_als_str.s);
+    args->tmp_kstr.s[ args->tmp_kstr.l ] = 0;
+    bcf_update_alleles_str(args->out_hdr,line,args->tmp_kstr.s);
     args->nchanged++;
 
     // Update INFO/END if necessary
     int new_reflen = strlen(line->d.allele[0]);
-    if ( (ori_pos!=line->pos || reflen!=new_reflen) && bcf_get_info_int32(args->hdr, line, "END", &args->int32_arr, &args->nint32_arr)==1 )
+    if ( (new_pos!=line->pos || reflen!=new_reflen) && bcf_get_info_int32(args->hdr, line, "END", &args->int32_arr, &args->nint32_arr)==1 )
     {
         // bcf_update_alleles_str() messed up rlen because line->pos changed. This will be fixed by bcf_update_info_int32()
+        line->pos = new_pos;
         args->int32_arr[0] = line->pos + new_reflen;
-        bcf_update_info_int32(args->hdr, line, "END", args->int32_arr, 1);
+        bcf_update_info_int32(args->out_hdr, line, "END", args->int32_arr, 1);
     }
+    line->pos = new_pos;
 
     return ERR_OK;
 }
 
 static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int ialt, bcf1_t *dst)
 {
-    #define BRANCH_NUMERIC(type,type_t) \
+    #define BRANCH_NUMERIC(type,type_t,is_vector_end,is_missing) \
     { \
         const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key); \
         int ntmp = args->ntmp_arr1 / sizeof(type_t); \
@@ -477,13 +535,13 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
                         tag,bcf_seqname(args->hdr,src),(int64_t) src->pos+1,src->n_allele-1,ret); \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,vals+ialt,1); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals+ialt,1); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -499,14 +557,24 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
                         tag,bcf_seqname(args->hdr,src),(int64_t) src->pos+1,src->n_allele,ret); \
             } \
-            if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,2); \
+            if ( args->keep_sum_ad >= 0 && args->keep_sum_ad==info->key ) \
+            { \
+                int j; \
+                for (j=1; j<info->len; j++) \
+                    if ( j!=ialt+1 && !(is_missing) && !(is_vector_end) ) vals[0] += vals[j]; \
+                vals[1] = vals[ialt+1]; \
+            } \
+            else \
+            { \
+                if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
+            } \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -522,7 +590,7 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -533,15 +601,15 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 vals[1] = vals[bcf_alleles2gt(0,ialt+1)]; \
                 vals[2] = vals[bcf_alleles2gt(ialt+1,ialt+1)]; \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,3); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,3); \
         } \
         else \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,ret); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,ret); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_INFO,info->key))
     {
-        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t); break;
-        case BCF_HT_REAL: BRANCH_NUMERIC(float, float); break;
+        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, vals[j]==bcf_int32_vector_end, vals[j]==bcf_int32_missing); break;
+        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(vals[j]), bcf_float_is_missing(vals[j])); break;
     }
     #undef BRANCH_NUMERIC
 }
@@ -589,7 +657,7 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(str.s,tmp,str.s+str.l,ialt,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_R )
     {
@@ -600,7 +668,7 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(&str.s[len],tmp,str.s+str.l,ialt,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_G )
     {
@@ -615,16 +683,16 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(&str.s[len],tmp,str.s+str.l,iaa-i0a-1,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
 }
 static void split_info_flag(args_t *args, bcf1_t *src, bcf_info_t *info, int ialt, bcf1_t *dst)
 {
     const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key);
     int ret = bcf_get_info_flag(args->hdr,src,tag,&args->tmp_arr1,&args->ntmp_arr1);
-    bcf_update_info_flag(args->hdr,dst,tag,NULL,ret);
+    bcf_update_info_flag(args->out_hdr,dst,tag,NULL,ret);
 }
 
 static void split_format_genotype(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int ialt, bcf1_t *dst)
@@ -650,11 +718,11 @@ static void split_format_genotype(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         }
         gt += ngts;
     }
-    bcf_update_genotypes(args->hdr,dst,args->tmp_arr1,ngts*nsmpl);
+    bcf_update_genotypes(args->out_hdr,dst,args->tmp_arr1,ngts*nsmpl);
 }
 static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int ialt, bcf1_t *dst)
 {
-    #define BRANCH_NUMERIC(type,type_t,is_vector_end,set_vector_end) \
+    #define BRANCH_NUMERIC(type,type_t,is_vector_end,is_missing,set_vector_end) \
     { \
         const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,fmt->id); \
         int ntmp = args->ntmp_arr1 / sizeof(type_t); \
@@ -663,10 +731,10 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         assert( nvals>0 ); \
         type_t *vals = (type_t *) args->tmp_arr1; \
         int len = bcf_hdr_id2length(args->hdr,BCF_HL_FMT,fmt->id); \
-        int i, nsmpl = bcf_hdr_nsamples(args->hdr); \
+        int i,j, nsmpl = bcf_hdr_nsamples(args->hdr); \
         if ( nvals==nsmpl ) /* all values are missing */ \
         { \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl); \
             return; \
         } \
         if ( len==BCF_VL_A ) \
@@ -683,7 +751,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -697,7 +765,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 dst_vals += 1; \
                 src_vals += nvals; \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -713,7 +781,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -721,14 +789,29 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
             } \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
-            for (i=0; i<nsmpl; i++) \
+            if ( args->keep_sum_ad >= 0 && args->keep_sum_ad==fmt->id ) \
             { \
-                dst_vals[0] = src_vals[0]; \
-                dst_vals[1] = src_vals[ialt+1]; \
-                dst_vals += 2; \
-                src_vals += nvals; \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    dst_vals[0] = src_vals[0]; \
+                    for (j=1; j<nvals; j++) \
+                        if ( j!=ialt+1 && !(is_missing) && !(is_vector_end) ) dst_vals[0] += src_vals[j]; \
+                    dst_vals[1] = src_vals[ialt+1]; \
+                    dst_vals += 2; \
+                    src_vals += nvals; \
+                } \
+            } \
+            else \
+            { \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    dst_vals[0] = src_vals[0]; \
+                    dst_vals[1] = src_vals[ialt+1]; \
+                    dst_vals += 2; \
+                    src_vals += nvals; \
+                } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl*2); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl*2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -744,7 +827,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error at %s:%"PRId64", the tag %s has wrong number of fields\n", bcf_seqname(args->hdr,src),(int64_t) src->pos+1,bcf_hdr_int2id(args->hdr,BCF_DT_ID,fmt->id)); \
@@ -775,15 +858,15 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 dst_vals += all_haploid ? 2 : 3; \
                 src_vals += nvals; \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,all_haploid ? nsmpl*2 : nsmpl*3); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,all_haploid ? nsmpl*2 : nsmpl*3); \
         } \
         else \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nvals); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nvals); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_FMT,fmt->id))
     {
-        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, src_vals[j]==bcf_int32_vector_end, dst_vals[2]=bcf_int32_vector_end); break;
-        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(src_vals[j]), bcf_float_set_vector_end(dst_vals[2])); break;
+        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, src_vals[j]==bcf_int32_vector_end, src_vals[j]==bcf_int32_missing, dst_vals[2]=bcf_int32_vector_end); break;
+        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(src_vals[j]), bcf_float_is_missing(src_vals[j]), bcf_float_set_vector_end(dst_vals[2])); break;
     }
     #undef BRANCH_NUMERIC
 }
@@ -825,7 +908,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else if ( len==BCF_VL_R )
     {
@@ -843,7 +926,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else if ( len==BCF_VL_G )
     {
@@ -871,7 +954,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
                 }
                 if ( args->force )
                 {
-                    bcf_update_format_char(args->hdr,dst,tag,NULL,0);
+                    bcf_update_format_char(args->out_hdr,dst,tag,NULL,0);
                     return;
                 }
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d or %d, found %d\n",
@@ -902,13 +985,12 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else
-        bcf_update_format_char(args->hdr,dst,tag,str.s,str.l);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,str.l);
 }
 
-
 static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
 {
     int i;
@@ -941,11 +1023,11 @@ static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
 
         // Not quite sure how to handle IDs, they can be assigned to a specific
         // ALT.  For now we leave the ID unchanged for all.
-        bcf_update_id(args->hdr, dst, line->d.id ? line->d.id : ".");
+        bcf_update_id(args->out_hdr, dst, line->d.id ? line->d.id : ".");
 
         tmp.l = rlen;
         kputs(line->d.allele[i+1],&tmp);
-        bcf_update_alleles_str(args->hdr,dst,tmp.s);
+        bcf_update_alleles_str(args->out_hdr,dst,tmp.s);
 
         if ( line->d.n_flt ) bcf_update_filter(args->hdr, dst, line->d.flt, line->d.n_flt);
 
@@ -958,6 +1040,7 @@ static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
             else if ( type==BCF_HT_FLAG ) split_info_flag(args, line, info, i, dst);
             else split_info_string(args, line, info, i, dst);
         }
+        set_old_rec_tag(args, dst, line, i + 1); // 1-based indexes
 
         dst->n_sample = line->n_sample;
         for (j=0; j<line->n_fmt; j++)
@@ -1021,7 +1104,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     vals[ args->maps[i].map[k+1] - 1 ] = vals2[k]; \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -1045,7 +1128,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     vals[ args->maps[i].map[k] ] = vals2[k]; \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -1079,10 +1162,10 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     } \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,nvals_ori); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,nvals_ori); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_INFO,info->key))
     {
@@ -1095,7 +1178,7 @@ static void merge_info_flag(args_t *args, bcf1_t **lines, int nlines, bcf_info_t
 {
     const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key);
     int ret = bcf_get_info_flag(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-    bcf_update_info_flag(args->hdr,dst,tag,NULL,ret);
+    bcf_update_info_flag(args->out_hdr,dst,tag,NULL,ret);
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
 static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info_t *info, bcf1_t *dst)
@@ -1123,7 +1206,7 @@ static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info
         str.s[str.l] = 0;
         args->tmp_arr1  = (uint8_t*) str.s;
         args->ntmp_arr1 = str.m;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_G )
     {
@@ -1150,12 +1233,12 @@ static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info
         str.s[str.l] = 0;
         args->tmp_arr1  = (uint8_t*) str.s;
         args->ntmp_arr1 = str.m;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else
     {
         bcf_get_info_string(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-        bcf_update_info_string(args->hdr,dst,tag,args->tmp_arr1);
+        bcf_update_info_string(args->out_hdr,dst,tag,args->tmp_arr1);
     }
 }
 static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_fmt_t *fmt, bcf1_t *dst)
@@ -1198,7 +1281,7 @@ static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_
             gt2 += ngts;
         }
     }
-    bcf_update_genotypes(args->hdr,dst,args->tmp_arr1,ngts*nsmpl);
+    bcf_update_genotypes(args->out_hdr,dst,args->tmp_arr1,ngts*nsmpl);
 }
 static int diploid_to_haploid(int size, int nsmpl, int nals, uint8_t *vals)
 {
@@ -1251,7 +1334,7 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals2; \
                 } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -1279,7 +1362,7 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals2; \
                 } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -1358,10 +1441,10 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals;\
                 }\
             }\
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals_ori*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals_ori*nsmpl); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_FMT,fmt->id))
     {
@@ -1378,7 +1461,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
     if ( len!=BCF_VL_A && len!=BCF_VL_R && len!=BCF_VL_G )
     {
         int nret = bcf_get_format_char(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-        bcf_update_format_char(args->hdr,dst,tag,args->tmp_arr1,nret);
+        bcf_update_format_char(args->out_hdr,dst,tag,args->tmp_arr1,nret);
         return;
     }
 
@@ -1397,7 +1480,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
         for (i=0; i<nlines; i++)
         {
             int nret = bcf_get_format_char(args->hdr,lines[i],tag,&args->tmp_arr1,&args->ntmp_arr1);
-            if (nret<0) continue; /* format tag does not exist in this record, skip */ \
+            if (nret<0) continue; /* format tag does not exist in this record, skip */
             nret /= nsmpl;
             for (k=0; k<nsmpl; k++)
             {
@@ -1444,7 +1527,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
             if ( i ) // we already have a copy
             {
                 nret = bcf_get_format_char(args->hdr,lines[i],tag,&args->tmp_arr1,&args->ntmp_arr1);
-                if (nret<0) continue; /* format tag does not exist in this record, skip */ \
+                if (nret<0) continue; /* format tag does not exist in this record, skip */
                 nret /= nsmpl;
             }
             for (k=0; k<nsmpl; k++)
@@ -1490,7 +1573,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
     }
     args->ntmp_arr2 = str.m;
     args->tmp_arr2  = (uint8_t*)str.s;
-    bcf_update_format_char(args->hdr,dst,tag,str.s,str.l);
+    bcf_update_format_char(args->out_hdr,dst,tag,str.s,str.l);
 }
 
 char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb);   // see vcfmerge.c
@@ -1511,7 +1594,7 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
             dst->qual = lines[i]->qual;
     }
 
-    bcf_update_id(args->hdr, dst, lines[0]->d.id);
+    bcf_update_id(args->out_hdr, dst, lines[0]->d.id);
 
     // Merge and set the alleles, create a mapping from source allele indexes to dst idxs
     hts_expand0(map_t,nlines,args->mmaps,args->maps);   // a mapping for each line
@@ -1525,20 +1608,20 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
     }
     for (i=1; i<nlines; i++)
     {
-        if (lines[i]->d.id[0]!='.' || lines[i]->d.id[1]) bcf_add_id(args->hdr, dst, lines[i]->d.id);
+        if (lines[i]->d.id[0]!='.' || lines[i]->d.id[1]) bcf_add_id(args->out_hdr, dst, lines[i]->d.id);
         args->maps[i].nals = lines[i]->n_allele;
         hts_expand(int,args->maps[i].nals,args->maps[i].mals,args->maps[i].map);
         args->als = merge_alleles(lines[i]->d.allele, lines[i]->n_allele, args->maps[i].map, args->als, &args->nals, &args->mals);
         if ( !args->als ) error("Failed to merge alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,dst),(int64_t) dst->pos+1);
     }
-    bcf_update_alleles(args->hdr, dst, (const char**)args->als, args->nals);
+    bcf_update_alleles(args->out_hdr, dst, (const char**)args->als, args->nals);
     for (i=0; i<args->nals; i++)
     {
         free(args->als[i]);
         args->als[i] = NULL;
     }
 
-    if ( lines[0]->d.n_flt ) bcf_update_filter(args->hdr, dst, lines[0]->d.flt, lines[0]->d.n_flt);
+    if ( lines[0]->d.n_flt ) bcf_update_filter(args->out_hdr, dst, lines[0]->d.flt, lines[0]->d.n_flt);
     for (i=1; i<nlines; i++) {
         int j;
         for (j=0; j<lines[i]->d.n_flt; j++) {
@@ -1546,13 +1629,13 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
             // otherwise accumulate FILTERs
             if (lines[i]->d.flt[j] == bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PASS")) {
                 if (args->strict_filter) {
-                    bcf_update_filter(args->hdr, dst, lines[i]->d.flt, lines[i]->d.n_flt);
+                    bcf_update_filter(args->out_hdr, dst, lines[i]->d.flt, lines[i]->d.n_flt);
                     break;
                 }
                 else
                     continue;
             }
-            bcf_add_filter(args->hdr, dst, lines[i]->d.flt[j]);
+            bcf_add_filter(args->out_hdr, dst, lines[i]->d.flt[j]);
         }
     }
 
@@ -1722,7 +1805,7 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
             if ( mrows_ready_to_flush(args, args->lines[k]) )
             {
                 while ( (line=mrows_flush(args)) )
-                    if ( bcf_write1(file, args->hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+                    if ( bcf_write1(file, args->out_hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
             }
             int merge = 1;
             if ( args->mrows_collapse!=COLLAPSE_BOTH && args->mrows_collapse!=COLLAPSE_ANY )
@@ -1755,18 +1838,30 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
             prev_type |= line_type;
             if ( args->rmdup & BCF_SR_PAIR_EXACT ) cmpals_add(&args->cmpals_out, args->lines[k]);
         }
-        if ( bcf_write1(file, args->hdr, args->lines[k])!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+        if ( bcf_write1(file, args->out_hdr, args->lines[k])!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     }
     if ( args->mrows_op==MROWS_MERGE && !args->rbuf.n )
     {
         while ( (line=mrows_flush(args)) )
-            if ( bcf_write1(file, args->hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+            if ( bcf_write1(file, args->out_hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     }
 }
 
 static void init_data(args_t *args)
 {
     args->hdr = args->files->readers[0].header;
+    if ( args->keep_sum_ad )
+    {
+        args->keep_sum_ad = bcf_hdr_id2int(args->hdr,BCF_DT_ID,"AD");
+        if ( args->keep_sum_ad < 0 ) error("Error: --keep-sum-ad requested but the tag AD is not present\n");
+    }
+    else
+        args->keep_sum_ad = -1;
+
+    args->out_hdr = bcf_hdr_dup(args->hdr);
+    if ( args->old_rec_tag )
+        bcf_hdr_printf(args->out_hdr,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">",args->old_rec_tag); 
+
     rbuf_init(&args->rbuf, 100);
     args->lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
     if ( args->ref_fname )
@@ -1780,6 +1875,14 @@ static void init_data(args_t *args)
         args->tmp_str = (kstring_t*) calloc(bcf_hdr_nsamples(args->hdr),sizeof(kstring_t));
         args->diploid = (uint8_t*) malloc(bcf_hdr_nsamples(args->hdr));
     }
+    if ( args->atomize==SPLIT )
+    {
+        args->abuf = abuf_init(args->hdr, SPLIT); 
+        abuf_set_opt(args->abuf, bcf_hdr_t*, BCF_HDR, args->out_hdr);
+        if ( args->old_rec_tag )
+            abuf_set_opt(args->abuf, const char*, INFO_TAG, args->old_rec_tag);
+        abuf_set_opt(args->abuf, int, STAR_ALLELE, args->use_star_allele);
+    }
 }
 
 static void destroy_data(args_t *args)
@@ -1804,7 +1907,7 @@ static void destroy_data(args_t *args)
     for (i=0; i<args->ntmp_als; i++)
         free(args->tmp_als[i].s);
     free(args->tmp_als);
-    free(args->tmp_als_str.s);
+    free(args->tmp_kstr.s);
     if ( args->tmp_str )
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) free(args->tmp_str[i].s);
@@ -1816,15 +1919,16 @@ static void destroy_data(args_t *args)
     free(args->tmp_arr1);
     free(args->tmp_arr2);
     free(args->diploid);
+    if ( args->abuf ) abuf_destroy(args->abuf);
+    bcf_hdr_destroy(args->out_hdr);
     if ( args->mrow_out ) bcf_destroy1(args->mrow_out);
     if ( args->fai ) fai_destroy(args->fai);
     if ( args->mseq ) free(args->seq);
 }
 
 
-static void normalize_line(args_t *args, bcf1_t **line_ptr)
+static void normalize_line(args_t *args, bcf1_t *line)
 {
-    bcf1_t *line = *line_ptr;
     if ( args->fai )
     {
         if ( args->check_ref & CHECK_REF_FIX ) fix_ref(args, line);
@@ -1854,8 +1958,8 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
     rbuf_expand0(&args->rbuf,bcf1_t*,args->rbuf.n+1,args->lines);
     int i,j;
     i = j = rbuf_append(&args->rbuf);
-    if ( !args->lines[i] ) args->lines[i] = bcf_init1();
-    SWAP(bcf1_t*, (*line_ptr), args->lines[i]);
+    if ( args->lines[i] ) bcf_destroy(args->lines[i]);
+    args->lines[i] = bcf_dup(line);
     while ( rbuf_prev(&args->rbuf,&i) )
     {
         if ( args->lines[i]->pos > args->lines[j]->pos ) SWAP(bcf1_t*, args->lines[i], args->lines[j]);
@@ -1863,21 +1967,38 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
     }
 }
 
+static bcf1_t *next_atomized_line(args_t *args)
+{
+    bcf1_t *rec = NULL;
+    if ( args->atomize==SPLIT )
+    {
+        rec = abuf_flush(args->abuf, 0);
+        if ( rec ) return rec;
+    }
+
+    if ( !bcf_sr_next_line(args->files) ) return NULL;
+
+    if ( args->atomize==SPLIT )
+    {
+        abuf_push(args->abuf,bcf_sr_get_line(args->files,0));
+        return abuf_flush(args->abuf, 0);
+    }
+    return bcf_sr_get_line(args->files,0);
+}
 static void normalize_vcf(args_t *args)
 {
-    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
-    if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
+    args->out = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
+    if ( args->out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads )
-        hts_set_opt(out, HTS_OPT_THREAD_POOL, args->files->p);
-    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
-    if ( bcf_hdr_write(out, args->hdr)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+        hts_set_opt(args->out, HTS_OPT_THREAD_POOL, args->files->p);
+    if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_norm");
+    if ( bcf_hdr_write(args->out, args->out_hdr)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
 
+    bcf1_t *line;
     int prev_rid = -1, prev_pos = -1, prev_type = 0;
-    while ( bcf_sr_next_line(args->files) )
+    while ( (line = next_atomized_line(args)) )
     {
         args->ntotal++;
-
-        bcf1_t *line = args->files->readers[0].buffer[0];
         if ( args->rmdup )
         {
             int line_type = bcf_get_variant_types(line);
@@ -1901,7 +2022,7 @@ static void normalize_vcf(args_t *args)
 
         // still on the same chromosome?
         int i,j,ilast = rbuf_last(&args->rbuf);
-        if ( ilast>=0 && line->rid != args->lines[ilast]->rid ) flush_buffer(args, out, args->rbuf.n); // new chromosome
+        if ( ilast>=0 && line->rid != args->lines[ilast]->rid ) flush_buffer(args, args->out, args->rbuf.n); // new chromosome
 
         int split = 0;
         if ( args->mrows_op==MROWS_SPLIT )
@@ -1916,13 +2037,13 @@ static void normalize_vcf(args_t *args)
                 args->nsplit++;
                 split_multiallelic_to_biallelics(args, line);
                 for (j=0; j<args->ntmp_lines; j++)
-                    normalize_line(args, &args->tmp_lines[j]);
+                    normalize_line(args, args->tmp_lines[j]);
             }
             else
                 split = 0;
         }
         if ( !split )
-            normalize_line(args, &args->files->readers[0].buffer[0]);
+            normalize_line(args, line);
 
         // find out how many sites to flush
         ilast = rbuf_last(&args->rbuf);
@@ -1932,10 +2053,10 @@ static void normalize_vcf(args_t *args)
             if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
             j++;
         }
-        if ( j>0 ) flush_buffer(args, out, j);
+        if ( j>0 ) flush_buffer(args, args->out, j);
     }
-    flush_buffer(args, out, args->rbuf.n);
-    if ( hts_close(out)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname);
+    flush_buffer(args, args->out, args->rbuf.n);
+    if ( hts_close(args->out)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname);
 
     fprintf(stderr,"Lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
     if ( args->check_ref & CHECK_REF_FIX )
@@ -1951,23 +2072,27 @@ static void usage(void)
     fprintf(stderr, "Usage:   bcftools norm [options] <in.vcf.gz>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
-    fprintf(stderr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
-    fprintf(stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|all|exact\n");
-    fprintf(stderr, "    -f, --fasta-ref <file>            reference sequence\n");
-    fprintf(stderr, "        --force                       try to proceed even if malformed tags are encountered. Experimental, use at your own risk\n");
-    fprintf(stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
-    fprintf(stderr, "        --no-version                  do not append version and command line to the header\n");
-    fprintf(stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
-    fprintf(stderr, "    -o, --output <file>               write output to a file [standard output]\n");
-    fprintf(stderr, "    -O, --output-type <type>          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
-    fprintf(stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
-    fprintf(stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
-    fprintf(stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
-    fprintf(stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "        --threads <int>               use multithreading with <int> worker threads [0]\n");
-    fprintf(stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
+    fprintf(stderr, "    -a, --atomize                   Decompose complex variants (e.g. MNVs become consecutive SNVs)\n");
+    fprintf(stderr, "        --atom-overlaps '*'|.       Use the star allele (*) for overlapping alleles or set to missing (.) [*]\n");
+    fprintf(stderr, "    -c, --check-ref e|w|x|s         Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
+    fprintf(stderr, "    -D, --remove-duplicates         Remove duplicate lines of the same type.\n");
+    fprintf(stderr, "    -d, --rm-dup TYPE               Remove duplicate snps|indels|both|all|exact\n");
+    fprintf(stderr, "    -f, --fasta-ref FILE            Reference sequence\n");
+    fprintf(stderr, "        --force                     Try to proceed even if malformed tags are encountered. Experimental, use at your own risk\n");
+    fprintf(stderr, "        --keep-sum TAG,..           Keep vector sum constant when splitting multiallelics (see github issue #360)\n");
+    fprintf(stderr, "    -m, --multiallelics -|+TYPE     Split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+    fprintf(stderr, "        --no-version                Do not append version and command line to the header\n");
+    fprintf(stderr, "    -N, --do-not-normalize          Do not normalize indels (with -m or -c s)\n");
+    fprintf(stderr, "        --old-rec-tag STR           Annotate modified records with INFO/STR indicating the original variant\n");
+    fprintf(stderr, "    -o, --output FILE               Write output to a file [standard output]\n");
+    fprintf(stderr, "    -O, --output-type TYPE          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(stderr, "    -r, --regions REGION            Restrict to comma-separated list of regions\n");
+    fprintf(stderr, "    -R, --regions-file FILE         Restrict to regions listed in a file\n");
+    fprintf(stderr, "    -s, --strict-filter             When merging (-m+), merged site is PASS only if all sites being merged PASS\n");
+    fprintf(stderr, "    -t, --targets REGION            Similar to -r but streams rather than index-jumps\n");
+    fprintf(stderr, "    -T, --targets-file FILE         Similar to -R but streams rather than index-jumps\n");
+    fprintf(stderr, "        --threads INT               Use multithreading with <int> worker threads [0]\n");
+    fprintf(stderr, "    -w, --site-win INT              Buffer for sorting lines which changed position during realignment [1000]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Examples:\n");
     fprintf(stderr, "   # normalize and left-align indels\n");
@@ -1995,11 +2120,16 @@ int main_vcfnorm(int argc, char *argv[])
     args->do_indels = 1;
     int region_is_file  = 0;
     int targets_is_file = 0;
+    args->use_star_allele = 1;
 
     static struct option loptions[] =
     {
         {"help",no_argument,NULL,'h'},
         {"force",no_argument,NULL,7},
+        {"atomize",no_argument,NULL,'a'},
+        {"atom-overlaps",required_argument,NULL,11},
+        {"old-rec-tag",required_argument,NULL,12},
+        {"keep-sum",required_argument,NULL,10},
         {"fasta-ref",required_argument,NULL,'f'},
         {"do-not-normalize",no_argument,NULL,'N'},
         {"multiallelics",required_argument,NULL,'m'},
@@ -2019,8 +2149,21 @@ int main_vcfnorm(int argc, char *argv[])
         {NULL,0,NULL,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hr:R:f:w:Dd:o:O:c:m:t:T:sN",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hr:R:f:w:Dd:o:O:c:m:t:T:sNa",loptions,NULL)) >= 0) {
         switch (c) {
+            case  10:
+                // possibly generalize this also to INFO/AD and other tags
+                if ( strcasecmp("ad",optarg) )
+                    error("Error: only --keep-sum AD is currently supported. See https://github.com/samtools/bcftools/issues/360 for more.\n");
+                args->keep_sum_ad = 1;  // this will be set to the header id or -1 in init_data
+                break;
+            case 'a': args->atomize = SPLIT; break;
+            case 11 :
+                if ( optarg[0]=='*' ) args->use_star_allele = 1;
+                else if ( optarg[0]=='.' ) args->use_star_allele = 0;
+                else error("Invalid argument to --atom-overlaps. Perhaps you wanted: \"--atom-overlaps '*'\"?\n");
+                break;
+            case 12 : args->old_rec_tag = optarg; break;
             case 'N': args->do_indels = 0; break;
             case 'd':
                 if ( !strcmp("snps",optarg) ) args->rmdup = BCF_SR_PAIR_SNPS;
@@ -2092,7 +2235,7 @@ int main_vcfnorm(int argc, char *argv[])
     }
     else fname = argv[optind];
 
-    if ( !args->ref_fname && !args->mrows_op && !args->rmdup ) error("Expected -f, -m, -D or -d option\n");
+    if ( !args->ref_fname && !args->mrows_op && !args->rmdup && args->atomize==NONE ) error("Expected -a, -f, -m, -D or -d option\n");
     if ( !args->check_ref && args->ref_fname ) args->check_ref = CHECK_REF_EXIT;
     if ( args->check_ref && !args->ref_fname ) error("Expected --fasta-ref with --check-ref\n");
 
diff --git a/bcftools/vcfnorm.c.pysam.c b/bcftools/vcfnorm.c.pysam.c
index 6125a1b..e48443f 100644
--- a/bcftools/vcfnorm.c.pysam.c
+++ b/bcftools/vcfnorm.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfnorm.c -- Left-align and normalize indels.
 
-    Copyright (C) 2013-2019 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -28,6 +28,7 @@ THE SOFTWARE.  */
 #include <strings.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -40,6 +41,7 @@ THE SOFTWARE.  */
 #include <htslib/khash_str2int.h>
 #include "bcftools.h"
 #include "rbuf.h"
+#include "abuf.h"
 
 #define CHECK_REF_EXIT 1
 #define CHECK_REF_WARN 2
@@ -86,20 +88,25 @@ typedef struct
     int32_t *int32_arr;
     int ntmp_arr1, ntmp_arr2, nint32_arr;
     kstring_t *tmp_str;
-    kstring_t *tmp_als, tmp_als_str;
+    kstring_t *tmp_als, tmp_kstr;
     int ntmp_als;
     rbuf_t rbuf;
     int buf_win;            // maximum distance between two records to consider
     int aln_win;            // the realignment window size (maximum repeat size)
     bcf_srs_t *files;       // using the synced reader only for -r option
-    bcf_hdr_t *hdr;
+    bcf_hdr_t *hdr, *out_hdr;
     cmpals_t cmpals_in, cmpals_out;
     faidx_t *fai;
     struct { int tot, set, swap; } nref;
     char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
     int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
     int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
-    int record_cmd_line, force, force_warned;
+    int record_cmd_line, force, force_warned, keep_sum_ad;
+    abuf_t *abuf;
+    abuf_opt_t atomize;
+    int use_star_allele;
+    char *old_rec_tag;
+    htsFile *out;
 }
 args_t;
 
@@ -138,7 +145,7 @@ static void seq_to_upper(char *seq, int len)
 static void fix_ref(args_t *args, bcf1_t *line)
 {
     int reflen = strlen(line->d.allele[0]);
-    int i, maxlen = reflen, len;
+    int i,j, maxlen = reflen, len;
     for (i=1; i<line->n_allele; i++)
     {
         int len = strlen(line->d.allele[i]);
@@ -151,27 +158,57 @@ static void fix_ref(args_t *args, bcf1_t *line)
 
     args->nref.tot++;
 
-    // is the REF different?
+    // is the REF different? If not, we are done
     if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
 
-    // is the REF allele missing or N?
-    if ( reflen==1 && (line->d.allele[0][0]=='.' || line->d.allele[0][0]=='N' || line->d.allele[0][0]=='n') ) 
+    // is the REF allele missing?
+    if ( reflen==1 && line->d.allele[0][0]=='.' ) 
     { 
         line->d.allele[0][0] = ref[0]; 
         args->nref.set++; 
         free(ref);
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         return;
     }
 
-    // does REF contain non-standard bases?
-    if ( replace_iupac_codes(line->d.allele[0],strlen(line->d.allele[0])) )
+    // does REF or ALT contain non-standard bases?
+    int has_non_acgtn = 0;
+    for (i=0; i<line->n_allele; i++)
+    {
+        if ( line->d.allele[i][0]=='<' ) continue;
+        has_non_acgtn += replace_iupac_codes(line->d.allele[i],strlen(line->d.allele[i]));
+    }
+    if ( has_non_acgtn )
     {
         args->nref.set++;
-        bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
     }
 
+    // does the REF allele contain N's ?
+    int fix = 0;
+    for (i=0; i<reflen; i++)
+    {
+        if ( line->d.allele[0][i]!='N' ) continue;
+        if ( ref[i]=='N' ) continue;
+        line->d.allele[0][i] = ref[i];
+        fix++;
+        for (j=1; j<line->n_allele; j++)
+        {
+            int len = strlen(line->d.allele[j]);
+            if ( len <= i || line->d.allele[j][i]!='N' ) continue;
+            line->d.allele[j][i] = ref[i];
+            fix++;
+        }
+    }
+    if ( fix )
+    {
+        args->nref.set++;
+        bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
+        if ( !strncasecmp(line->d.allele[0],ref,reflen) ) { free(ref); return; }
+    }
+
+
     // is it swapped?
     for (i=1; i<line->n_allele; i++)
     {
@@ -180,45 +217,35 @@ static void fix_ref(args_t *args, bcf1_t *line)
     }
 
     kstring_t str = {0,0,0};
-    if ( i==line->n_allele )
+    if ( i==line->n_allele )    // none of the alternate alleles matches the reference
     {
-        // none of the alternate alleles matches the reference
-        if ( line->n_allele>1 )
-            args->nref.set++;
-        else
-            args->nref.swap++;
-
-        kputs(line->d.allele[0],&str);
-        kputc(',',&str);
+        args->nref.set++;
+        kputsn(ref,reflen,&str);
         for (i=1; i<line->n_allele; i++)
         {
-            kputs(line->d.allele[i],&str);
             kputc(',',&str);
+            kputs(line->d.allele[i],&str);
         }
-        kputc(ref[0],&str);
-        bcf_update_alleles_str(args->hdr,line,str.s);
-        str.l = 0;
+        bcf_update_alleles_str(args->out_hdr,line,str.s);
+        free(ref);
+        free(str.s);
+        return;
     }
-    else
-        args->nref.swap++;
-    free(ref);
 
-    // swap the alleles
-    int j;
+    // one of the alternate alleles matches the reference, assume it's a simple swap
     kputs(line->d.allele[i],&str);
-    for (j=1; j<i; j++)
-    {
-        kputc(',',&str);
-        kputs(line->d.allele[j],&str);
-    }
-    kputc(',',&str);
-    kputs(line->d.allele[0],&str);
-    for (j=i+1; j<line->n_allele; j++)
+    for (j=1; j<line->n_allele; j++)
     {
         kputc(',',&str);
-        kputs(line->d.allele[j],&str);
+        if ( j==i ) 
+            kputs(line->d.allele[0],&str);
+        else
+            kputs(line->d.allele[j],&str);
     }
-    bcf_update_alleles_str(args->hdr,line,str.s);
+    bcf_update_alleles_str(args->out_hdr,line,str.s);
+    args->nref.swap++;
+    free(ref);
+    free(str.s);
 
     // swap genotypes
     int ntmp = args->ntmp_arr1 / sizeof(int32_t); // reuse tmp_arr declared as uint8_t
@@ -233,7 +260,7 @@ static void fix_ref(args_t *args, bcf1_t *line)
         else if ( gts[j]==bcf_gt_unphased(i) ) gts[j] = bcf_gt_unphased(0);
         else if ( gts[j]==bcf_gt_phased(i) ) gts[j] = bcf_gt_phased(0);
     }
-    bcf_update_genotypes(args->hdr,line,gts,ngts);
+    bcf_update_genotypes(args->out_hdr,line,gts,ngts);
 
     // update AC
     int nac = bcf_get_info_int32(args->hdr, line, "AC", &args->tmp_arr1, &ntmp);
@@ -242,10 +269,8 @@ static void fix_ref(args_t *args, bcf1_t *line)
     {
         int32_t *ac = (int32_t*)args->tmp_arr1;
         ac[i-1] = ni;
-        bcf_update_info_int32(args->hdr, line, "AC", ac, nac);
+        bcf_update_info_int32(args->out_hdr, line, "AC", ac, nac);
     }
-    
-    free(str.s);
 }
 
 static void fix_dup_alt(args_t *args, bcf1_t *line)
@@ -270,7 +295,7 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
         if ( !args->tmp_arr1[i] ) continue;
         line->d.allele[j++] = line->d.allele[i];
     }
-    bcf_update_alleles(args->hdr, line, (const char**)line->d.allele, nals);
+    bcf_update_alleles(args->out_hdr, line, (const char**)line->d.allele, nals);
 
 
     // update genotypes
@@ -288,7 +313,36 @@ static void fix_dup_alt(args_t *args, bcf1_t *line)
         gts[i] = bcf_gt_is_phased(gts[i]) ? bcf_gt_phased(ial_new) : bcf_gt_unphased(ial_new);
         changed = 1;
     }
-    if ( changed ) bcf_update_genotypes(args->hdr,line,gts,ngts);
+    if ( changed ) bcf_update_genotypes(args->out_hdr,line,gts,ngts);
+}
+
+static void set_old_rec_tag(args_t *args, bcf1_t *dst, bcf1_t *src, int ialt)
+{
+    if ( !args->old_rec_tag ) return;
+
+    // only update if the tag is not present already, there can be multiple normalization steps
+    int i, id = bcf_hdr_id2int(args->out_hdr, BCF_DT_ID, args->old_rec_tag);
+    bcf_unpack(dst, BCF_UN_INFO);
+    for (i=0; i<dst->n_info; i++)
+    {
+        bcf_info_t *inf = &dst->d.info[i];
+        if ( inf && inf->key == id ) return;
+    }
+
+    args->tmp_kstr.l = 0;
+    ksprintf(&args->tmp_kstr,"%s|%"PRIhts_pos"|%s|",bcf_seqname(args->hdr,src),src->pos+1,src->d.allele[0]);
+    for (i=1; i<src->n_allele; i++)
+    {
+        kputs(src->d.allele[i],&args->tmp_kstr);
+        if ( i+1<src->n_allele ) kputc(',',&args->tmp_kstr);
+    }
+    if ( ialt>0 )
+    {
+        kputc('|',&args->tmp_kstr);
+        kputw(ialt,&args->tmp_kstr);
+    }
+    if ( (bcf_update_info_string(args->out_hdr, dst, args->old_rec_tag, args->tmp_kstr.s))!=0 )
+            error("An error occurred while updating INFO/%s\n",args->old_rec_tag);
 }
 
 #define ERR_DUP_ALLELE       -2
@@ -335,7 +389,7 @@ static int realign(args_t *args, bcf1_t *line)
         if ( line->rlen > 1 )
         {
             line->d.allele[0][1] = 0;
-            bcf_update_alleles(args->hdr,line,(const char**)line->d.allele,line->n_allele);
+            bcf_update_alleles(args->out_hdr,line,(const char**)line->d.allele,line->n_allele);
         }
         return ERR_OK;
     }
@@ -365,7 +419,7 @@ static int realign(args_t *args, bcf1_t *line)
     }
 
     // trim from right
-    int ori_pos = line->pos;
+    int new_pos = line->pos;
     while (1)
     {
         // is the rightmost base identical in all alleles?
@@ -376,7 +430,7 @@ static int realign(args_t *args, bcf1_t *line)
             if ( als[i].l < min_len ) min_len = als[i].l;
         }
         if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
-        if ( min_len<=1 && line->pos==0 ) break;
+        if ( min_len<=1 && new_pos==0 ) break;
 
         int pad_from_left = 0;
         for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -386,10 +440,10 @@ static int realign(args_t *args, bcf1_t *line)
         }
         if ( pad_from_left )
         {
-            int npad = line->pos >= args->aln_win ? args->aln_win : line->pos;
+            int npad = new_pos >= args->aln_win ? args->aln_win : new_pos;
             free(ref);
-            ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, line->pos-npad, line->pos-1, &nref);
-            if ( !ref ) error("faidx_fetch_seq failed at %s:%"PRId64"\n", args->hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) line->pos-npad+1);
+            ref = faidx_fetch_seq(args->fai, (char*)args->hdr->id[BCF_DT_CTG][line->rid].key, new_pos-npad, new_pos-1, &nref);
+            if ( !ref ) error("faidx_fetch_seq failed at %s:%"PRId64"\n", args->hdr->id[BCF_DT_CTG][line->rid].key, (int64_t) new_pos-npad+1);
             replace_iupac_codes(ref,nref);
             for (i=0; i<line->n_allele; i++)
             {
@@ -398,7 +452,7 @@ static int realign(args_t *args, bcf1_t *line)
                 memcpy(als[i].s,ref,npad);
                 als[i].l += npad;
             }
-            line->pos -= npad;
+            new_pos -= npad;
         }
     }
     free(ref);
@@ -424,39 +478,43 @@ static int realign(args_t *args, bcf1_t *line)
             memmove(als[i].s,als[i].s+ntrim_left,als[i].l-ntrim_left);
             als[i].l -= ntrim_left;
         }
-        line->pos += ntrim_left;
+        new_pos += ntrim_left;
     }
 
     // Have the alleles changed?
     als[0].s[ als[0].l ] = 0;  // in order for strcmp to work
-    if ( ori_pos==line->pos && !strcasecmp(line->d.allele[0],als[0].s) ) return ERR_OK;
+    if ( new_pos==line->pos && !strcasecmp(line->d.allele[0],als[0].s) ) return ERR_OK;
+
+    set_old_rec_tag(args, line, line, 0);
 
     // Create new block of alleles and update
-    args->tmp_als_str.l = 0;
+    args->tmp_kstr.l = 0;
     for (i=0; i<line->n_allele; i++)
     {
-        if (i>0) kputc(',',&args->tmp_als_str);
-        kputsn(als[i].s,als[i].l,&args->tmp_als_str);
+        if (i>0) kputc(',',&args->tmp_kstr);
+        kputsn(als[i].s,als[i].l,&args->tmp_kstr);
     }
-    args->tmp_als_str.s[ args->tmp_als_str.l ] = 0;
-    bcf_update_alleles_str(args->hdr,line,args->tmp_als_str.s);
+    args->tmp_kstr.s[ args->tmp_kstr.l ] = 0;
+    bcf_update_alleles_str(args->out_hdr,line,args->tmp_kstr.s);
     args->nchanged++;
 
     // Update INFO/END if necessary
     int new_reflen = strlen(line->d.allele[0]);
-    if ( (ori_pos!=line->pos || reflen!=new_reflen) && bcf_get_info_int32(args->hdr, line, "END", &args->int32_arr, &args->nint32_arr)==1 )
+    if ( (new_pos!=line->pos || reflen!=new_reflen) && bcf_get_info_int32(args->hdr, line, "END", &args->int32_arr, &args->nint32_arr)==1 )
     {
         // bcf_update_alleles_str() messed up rlen because line->pos changed. This will be fixed by bcf_update_info_int32()
+        line->pos = new_pos;
         args->int32_arr[0] = line->pos + new_reflen;
-        bcf_update_info_int32(args->hdr, line, "END", args->int32_arr, 1);
+        bcf_update_info_int32(args->out_hdr, line, "END", args->int32_arr, 1);
     }
+    line->pos = new_pos;
 
     return ERR_OK;
 }
 
 static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int ialt, bcf1_t *dst)
 {
-    #define BRANCH_NUMERIC(type,type_t) \
+    #define BRANCH_NUMERIC(type,type_t,is_vector_end,is_missing) \
     { \
         const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key); \
         int ntmp = args->ntmp_arr1 / sizeof(type_t); \
@@ -479,13 +537,13 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
                         tag,bcf_seqname(args->hdr,src),(int64_t) src->pos+1,src->n_allele-1,ret); \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,vals+ialt,1); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals+ialt,1); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -501,14 +559,24 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
                         tag,bcf_seqname(args->hdr,src),(int64_t) src->pos+1,src->n_allele,ret); \
             } \
-            if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,2); \
+            if ( args->keep_sum_ad >= 0 && args->keep_sum_ad==info->key ) \
+            { \
+                int j; \
+                for (j=1; j<info->len; j++) \
+                    if ( j!=ialt+1 && !(is_missing) && !(is_vector_end) ) vals[0] += vals[j]; \
+                vals[1] = vals[ialt+1]; \
+            } \
+            else \
+            { \
+                if ( ialt!=0 ) vals[1] = vals[ialt+1]; \
+            } \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -524,7 +592,7 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_info_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_info_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in INFO/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -535,15 +603,15 @@ static void split_info_numeric(args_t *args, bcf1_t *src, bcf_info_t *info, int
                 vals[1] = vals[bcf_alleles2gt(0,ialt+1)]; \
                 vals[2] = vals[bcf_alleles2gt(ialt+1,ialt+1)]; \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,3); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,3); \
         } \
         else \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,ret); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,ret); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_INFO,info->key))
     {
-        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t); break;
-        case BCF_HT_REAL: BRANCH_NUMERIC(float, float); break;
+        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, vals[j]==bcf_int32_vector_end, vals[j]==bcf_int32_missing); break;
+        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(vals[j]), bcf_float_is_missing(vals[j])); break;
     }
     #undef BRANCH_NUMERIC
 }
@@ -591,7 +659,7 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(str.s,tmp,str.s+str.l,ialt,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_R )
     {
@@ -602,7 +670,7 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(&str.s[len],tmp,str.s+str.l,ialt,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_G )
     {
@@ -617,16 +685,16 @@ static void split_info_string(args_t *args, bcf1_t *src, bcf_info_t *info, int i
         STR_MOVE_NTH(&str.s[len],tmp,str.s+str.l,iaa-i0a-1,len);
         if ( len<0 ) return;   // wrong number of fields: skip
         str.s[len] = 0;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
 }
 static void split_info_flag(args_t *args, bcf1_t *src, bcf_info_t *info, int ialt, bcf1_t *dst)
 {
     const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key);
     int ret = bcf_get_info_flag(args->hdr,src,tag,&args->tmp_arr1,&args->ntmp_arr1);
-    bcf_update_info_flag(args->hdr,dst,tag,NULL,ret);
+    bcf_update_info_flag(args->out_hdr,dst,tag,NULL,ret);
 }
 
 static void split_format_genotype(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int ialt, bcf1_t *dst)
@@ -652,11 +720,11 @@ static void split_format_genotype(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         }
         gt += ngts;
     }
-    bcf_update_genotypes(args->hdr,dst,args->tmp_arr1,ngts*nsmpl);
+    bcf_update_genotypes(args->out_hdr,dst,args->tmp_arr1,ngts*nsmpl);
 }
 static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int ialt, bcf1_t *dst)
 {
-    #define BRANCH_NUMERIC(type,type_t,is_vector_end,set_vector_end) \
+    #define BRANCH_NUMERIC(type,type_t,is_vector_end,is_missing,set_vector_end) \
     { \
         const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,fmt->id); \
         int ntmp = args->ntmp_arr1 / sizeof(type_t); \
@@ -665,10 +733,10 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
         assert( nvals>0 ); \
         type_t *vals = (type_t *) args->tmp_arr1; \
         int len = bcf_hdr_id2length(args->hdr,BCF_HL_FMT,fmt->id); \
-        int i, nsmpl = bcf_hdr_nsamples(args->hdr); \
+        int i,j, nsmpl = bcf_hdr_nsamples(args->hdr); \
         if ( nvals==nsmpl ) /* all values are missing */ \
         { \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl); \
             return; \
         } \
         if ( len==BCF_VL_A ) \
@@ -685,7 +753,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -699,7 +767,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 dst_vals += 1; \
                 src_vals += nvals; \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -715,7 +783,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d, found %d\n", \
@@ -723,14 +791,29 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
             } \
             nvals /= nsmpl; \
             type_t *src_vals = vals, *dst_vals = vals; \
-            for (i=0; i<nsmpl; i++) \
+            if ( args->keep_sum_ad >= 0 && args->keep_sum_ad==fmt->id ) \
             { \
-                dst_vals[0] = src_vals[0]; \
-                dst_vals[1] = src_vals[ialt+1]; \
-                dst_vals += 2; \
-                src_vals += nvals; \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    dst_vals[0] = src_vals[0]; \
+                    for (j=1; j<nvals; j++) \
+                        if ( j!=ialt+1 && !(is_missing) && !(is_vector_end) ) dst_vals[0] += src_vals[j]; \
+                    dst_vals[1] = src_vals[ialt+1]; \
+                    dst_vals += 2; \
+                    src_vals += nvals; \
+                } \
+            } \
+            else \
+            { \
+                for (i=0; i<nsmpl; i++) \
+                { \
+                    dst_vals[0] = src_vals[0]; \
+                    dst_vals[1] = src_vals[ialt+1]; \
+                    dst_vals += 2; \
+                    src_vals += nvals; \
+                } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nsmpl*2); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nsmpl*2); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -746,7 +829,7 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 } \
                 if ( args->force ) \
                 { \
-                    bcf_update_format_##type(args->hdr,dst,tag,NULL,0); \
+                    bcf_update_format_##type(args->out_hdr,dst,tag,NULL,0); \
                     return; \
                 } \
                 error("Error at %s:%"PRId64", the tag %s has wrong number of fields\n", bcf_seqname(args->hdr,src),(int64_t) src->pos+1,bcf_hdr_int2id(args->hdr,BCF_DT_ID,fmt->id)); \
@@ -777,15 +860,15 @@ static void split_format_numeric(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int
                 dst_vals += all_haploid ? 2 : 3; \
                 src_vals += nvals; \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,all_haploid ? nsmpl*2 : nsmpl*3); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,all_haploid ? nsmpl*2 : nsmpl*3); \
         } \
         else \
-            bcf_update_format_##type(args->hdr,dst,tag,vals,nvals); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,vals,nvals); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_FMT,fmt->id))
     {
-        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, src_vals[j]==bcf_int32_vector_end, dst_vals[2]=bcf_int32_vector_end); break;
-        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(src_vals[j]), bcf_float_set_vector_end(dst_vals[2])); break;
+        case BCF_HT_INT:  BRANCH_NUMERIC(int32, int32_t, src_vals[j]==bcf_int32_vector_end, src_vals[j]==bcf_int32_missing, dst_vals[2]=bcf_int32_vector_end); break;
+        case BCF_HT_REAL: BRANCH_NUMERIC(float, float, bcf_float_is_vector_end(src_vals[j]), bcf_float_is_missing(src_vals[j]), bcf_float_set_vector_end(dst_vals[2])); break;
     }
     #undef BRANCH_NUMERIC
 }
@@ -827,7 +910,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else if ( len==BCF_VL_R )
     {
@@ -845,7 +928,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else if ( len==BCF_VL_G )
     {
@@ -873,7 +956,7 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
                 }
                 if ( args->force )
                 {
-                    bcf_update_format_char(args->hdr,dst,tag,NULL,0);
+                    bcf_update_format_char(args->out_hdr,dst,tag,NULL,0);
                     return;
                 }
                 error("Error: wrong number of fields in FMT/%s at %s:%"PRId64", expected %d or %d, found %d\n",
@@ -904,13 +987,12 @@ static void split_format_string(args_t *args, bcf1_t *src, bcf_fmt_t *fmt, int i
             ptr += blen;
         }
         if ( maxlen<blen ) squeeze_format_char(str.s,blen,maxlen,nsmpl);
-        bcf_update_format_char(args->hdr,dst,tag,str.s,nsmpl*maxlen);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,nsmpl*maxlen);
     }
     else
-        bcf_update_format_char(args->hdr,dst,tag,str.s,str.l);
+        bcf_update_format_char(args->out_hdr,dst,tag,str.s,str.l);
 }
 
-
 static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
 {
     int i;
@@ -943,11 +1025,11 @@ static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
 
         // Not quite sure how to handle IDs, they can be assigned to a specific
         // ALT.  For now we leave the ID unchanged for all.
-        bcf_update_id(args->hdr, dst, line->d.id ? line->d.id : ".");
+        bcf_update_id(args->out_hdr, dst, line->d.id ? line->d.id : ".");
 
         tmp.l = rlen;
         kputs(line->d.allele[i+1],&tmp);
-        bcf_update_alleles_str(args->hdr,dst,tmp.s);
+        bcf_update_alleles_str(args->out_hdr,dst,tmp.s);
 
         if ( line->d.n_flt ) bcf_update_filter(args->hdr, dst, line->d.flt, line->d.n_flt);
 
@@ -960,6 +1042,7 @@ static void split_multiallelic_to_biallelics(args_t *args, bcf1_t *line)
             else if ( type==BCF_HT_FLAG ) split_info_flag(args, line, info, i, dst);
             else split_info_string(args, line, info, i, dst);
         }
+        set_old_rec_tag(args, dst, line, i + 1); // 1-based indexes
 
         dst->n_sample = line->n_sample;
         for (j=0; j<line->n_fmt; j++)
@@ -1023,7 +1106,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     vals[ args->maps[i].map[k+1] - 1 ] = vals2[k]; \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -1047,7 +1130,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     vals[ args->maps[i].map[k] ] = vals2[k]; \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -1081,10 +1164,10 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
                     } \
                 } \
             } \
-            bcf_update_info_##type(args->hdr,dst,tag,args->tmp_arr1,nvals); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals); \
         } \
         else \
-            bcf_update_info_##type(args->hdr,dst,tag,vals,nvals_ori); \
+            bcf_update_info_##type(args->out_hdr,dst,tag,vals,nvals_ori); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_INFO,info->key))
     {
@@ -1097,7 +1180,7 @@ static void merge_info_flag(args_t *args, bcf1_t **lines, int nlines, bcf_info_t
 {
     const char *tag = bcf_hdr_int2id(args->hdr,BCF_DT_ID,info->key);
     int ret = bcf_get_info_flag(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-    bcf_update_info_flag(args->hdr,dst,tag,NULL,ret);
+    bcf_update_info_flag(args->out_hdr,dst,tag,NULL,ret);
 }
 int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst); // see vcfmerge.c
 static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info_t *info, bcf1_t *dst)
@@ -1125,7 +1208,7 @@ static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info
         str.s[str.l] = 0;
         args->tmp_arr1  = (uint8_t*) str.s;
         args->ntmp_arr1 = str.m;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else if ( len==BCF_VL_G )
     {
@@ -1152,12 +1235,12 @@ static void merge_info_string(args_t *args, bcf1_t **lines, int nlines, bcf_info
         str.s[str.l] = 0;
         args->tmp_arr1  = (uint8_t*) str.s;
         args->ntmp_arr1 = str.m;
-        bcf_update_info_string(args->hdr,dst,tag,str.s);
+        bcf_update_info_string(args->out_hdr,dst,tag,str.s);
     }
     else
     {
         bcf_get_info_string(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-        bcf_update_info_string(args->hdr,dst,tag,args->tmp_arr1);
+        bcf_update_info_string(args->out_hdr,dst,tag,args->tmp_arr1);
     }
 }
 static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_fmt_t *fmt, bcf1_t *dst)
@@ -1200,7 +1283,7 @@ static void merge_format_genotype(args_t *args, bcf1_t **lines, int nlines, bcf_
             gt2 += ngts;
         }
     }
-    bcf_update_genotypes(args->hdr,dst,args->tmp_arr1,ngts*nsmpl);
+    bcf_update_genotypes(args->out_hdr,dst,args->tmp_arr1,ngts*nsmpl);
 }
 static int diploid_to_haploid(int size, int nsmpl, int nals, uint8_t *vals)
 {
@@ -1253,7 +1336,7 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals2; \
                 } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else if ( len==BCF_VL_R ) \
         { \
@@ -1281,7 +1364,7 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals2; \
                 } \
             } \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else if ( len==BCF_VL_G ) \
         { \
@@ -1360,10 +1443,10 @@ static void merge_format_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_f
                     vals2 += nvals;\
                 }\
             }\
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals*nsmpl); \
         } \
         else \
-            bcf_update_format_##type(args->hdr,dst,tag,args->tmp_arr1,nvals_ori*nsmpl); \
+            bcf_update_format_##type(args->out_hdr,dst,tag,args->tmp_arr1,nvals_ori*nsmpl); \
     }
     switch (bcf_hdr_id2type(args->hdr,BCF_HL_FMT,fmt->id))
     {
@@ -1380,7 +1463,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
     if ( len!=BCF_VL_A && len!=BCF_VL_R && len!=BCF_VL_G )
     {
         int nret = bcf_get_format_char(args->hdr,lines[0],tag,&args->tmp_arr1,&args->ntmp_arr1);
-        bcf_update_format_char(args->hdr,dst,tag,args->tmp_arr1,nret);
+        bcf_update_format_char(args->out_hdr,dst,tag,args->tmp_arr1,nret);
         return;
     }
 
@@ -1399,7 +1482,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
         for (i=0; i<nlines; i++)
         {
             int nret = bcf_get_format_char(args->hdr,lines[i],tag,&args->tmp_arr1,&args->ntmp_arr1);
-            if (nret<0) continue; /* format tag does not exist in this record, skip */ \
+            if (nret<0) continue; /* format tag does not exist in this record, skip */
             nret /= nsmpl;
             for (k=0; k<nsmpl; k++)
             {
@@ -1446,7 +1529,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
             if ( i ) // we already have a copy
             {
                 nret = bcf_get_format_char(args->hdr,lines[i],tag,&args->tmp_arr1,&args->ntmp_arr1);
-                if (nret<0) continue; /* format tag does not exist in this record, skip */ \
+                if (nret<0) continue; /* format tag does not exist in this record, skip */
                 nret /= nsmpl;
             }
             for (k=0; k<nsmpl; k++)
@@ -1492,7 +1575,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
     }
     args->ntmp_arr2 = str.m;
     args->tmp_arr2  = (uint8_t*)str.s;
-    bcf_update_format_char(args->hdr,dst,tag,str.s,str.l);
+    bcf_update_format_char(args->out_hdr,dst,tag,str.s,str.l);
 }
 
 char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb);   // see vcfmerge.c
@@ -1513,7 +1596,7 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
             dst->qual = lines[i]->qual;
     }
 
-    bcf_update_id(args->hdr, dst, lines[0]->d.id);
+    bcf_update_id(args->out_hdr, dst, lines[0]->d.id);
 
     // Merge and set the alleles, create a mapping from source allele indexes to dst idxs
     hts_expand0(map_t,nlines,args->mmaps,args->maps);   // a mapping for each line
@@ -1527,20 +1610,20 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
     }
     for (i=1; i<nlines; i++)
     {
-        if (lines[i]->d.id[0]!='.' || lines[i]->d.id[1]) bcf_add_id(args->hdr, dst, lines[i]->d.id);
+        if (lines[i]->d.id[0]!='.' || lines[i]->d.id[1]) bcf_add_id(args->out_hdr, dst, lines[i]->d.id);
         args->maps[i].nals = lines[i]->n_allele;
         hts_expand(int,args->maps[i].nals,args->maps[i].mals,args->maps[i].map);
         args->als = merge_alleles(lines[i]->d.allele, lines[i]->n_allele, args->maps[i].map, args->als, &args->nals, &args->mals);
         if ( !args->als ) error("Failed to merge alleles at %s:%"PRId64"\n", bcf_seqname(args->hdr,dst),(int64_t) dst->pos+1);
     }
-    bcf_update_alleles(args->hdr, dst, (const char**)args->als, args->nals);
+    bcf_update_alleles(args->out_hdr, dst, (const char**)args->als, args->nals);
     for (i=0; i<args->nals; i++)
     {
         free(args->als[i]);
         args->als[i] = NULL;
     }
 
-    if ( lines[0]->d.n_flt ) bcf_update_filter(args->hdr, dst, lines[0]->d.flt, lines[0]->d.n_flt);
+    if ( lines[0]->d.n_flt ) bcf_update_filter(args->out_hdr, dst, lines[0]->d.flt, lines[0]->d.n_flt);
     for (i=1; i<nlines; i++) {
         int j;
         for (j=0; j<lines[i]->d.n_flt; j++) {
@@ -1548,13 +1631,13 @@ static void merge_biallelics_to_multiallelic(args_t *args, bcf1_t *dst, bcf1_t *
             // otherwise accumulate FILTERs
             if (lines[i]->d.flt[j] == bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PASS")) {
                 if (args->strict_filter) {
-                    bcf_update_filter(args->hdr, dst, lines[i]->d.flt, lines[i]->d.n_flt);
+                    bcf_update_filter(args->out_hdr, dst, lines[i]->d.flt, lines[i]->d.n_flt);
                     break;
                 }
                 else
                     continue;
             }
-            bcf_add_filter(args->hdr, dst, lines[i]->d.flt[j]);
+            bcf_add_filter(args->out_hdr, dst, lines[i]->d.flt[j]);
         }
     }
 
@@ -1724,7 +1807,7 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
             if ( mrows_ready_to_flush(args, args->lines[k]) )
             {
                 while ( (line=mrows_flush(args)) )
-                    if ( bcf_write1(file, args->hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+                    if ( bcf_write1(file, args->out_hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
             }
             int merge = 1;
             if ( args->mrows_collapse!=COLLAPSE_BOTH && args->mrows_collapse!=COLLAPSE_ANY )
@@ -1757,18 +1840,30 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
             prev_type |= line_type;
             if ( args->rmdup & BCF_SR_PAIR_EXACT ) cmpals_add(&args->cmpals_out, args->lines[k]);
         }
-        if ( bcf_write1(file, args->hdr, args->lines[k])!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+        if ( bcf_write1(file, args->out_hdr, args->lines[k])!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     }
     if ( args->mrows_op==MROWS_MERGE && !args->rbuf.n )
     {
         while ( (line=mrows_flush(args)) )
-            if ( bcf_write1(file, args->hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+            if ( bcf_write1(file, args->out_hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     }
 }
 
 static void init_data(args_t *args)
 {
     args->hdr = args->files->readers[0].header;
+    if ( args->keep_sum_ad )
+    {
+        args->keep_sum_ad = bcf_hdr_id2int(args->hdr,BCF_DT_ID,"AD");
+        if ( args->keep_sum_ad < 0 ) error("Error: --keep-sum-ad requested but the tag AD is not present\n");
+    }
+    else
+        args->keep_sum_ad = -1;
+
+    args->out_hdr = bcf_hdr_dup(args->hdr);
+    if ( args->old_rec_tag )
+        bcf_hdr_printf(args->out_hdr,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">",args->old_rec_tag); 
+
     rbuf_init(&args->rbuf, 100);
     args->lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
     if ( args->ref_fname )
@@ -1782,6 +1877,14 @@ static void init_data(args_t *args)
         args->tmp_str = (kstring_t*) calloc(bcf_hdr_nsamples(args->hdr),sizeof(kstring_t));
         args->diploid = (uint8_t*) malloc(bcf_hdr_nsamples(args->hdr));
     }
+    if ( args->atomize==SPLIT )
+    {
+        args->abuf = abuf_init(args->hdr, SPLIT); 
+        abuf_set_opt(args->abuf, bcf_hdr_t*, BCF_HDR, args->out_hdr);
+        if ( args->old_rec_tag )
+            abuf_set_opt(args->abuf, const char*, INFO_TAG, args->old_rec_tag);
+        abuf_set_opt(args->abuf, int, STAR_ALLELE, args->use_star_allele);
+    }
 }
 
 static void destroy_data(args_t *args)
@@ -1806,7 +1909,7 @@ static void destroy_data(args_t *args)
     for (i=0; i<args->ntmp_als; i++)
         free(args->tmp_als[i].s);
     free(args->tmp_als);
-    free(args->tmp_als_str.s);
+    free(args->tmp_kstr.s);
     if ( args->tmp_str )
     {
         for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) free(args->tmp_str[i].s);
@@ -1818,15 +1921,16 @@ static void destroy_data(args_t *args)
     free(args->tmp_arr1);
     free(args->tmp_arr2);
     free(args->diploid);
+    if ( args->abuf ) abuf_destroy(args->abuf);
+    bcf_hdr_destroy(args->out_hdr);
     if ( args->mrow_out ) bcf_destroy1(args->mrow_out);
     if ( args->fai ) fai_destroy(args->fai);
     if ( args->mseq ) free(args->seq);
 }
 
 
-static void normalize_line(args_t *args, bcf1_t **line_ptr)
+static void normalize_line(args_t *args, bcf1_t *line)
 {
-    bcf1_t *line = *line_ptr;
     if ( args->fai )
     {
         if ( args->check_ref & CHECK_REF_FIX ) fix_ref(args, line);
@@ -1856,8 +1960,8 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
     rbuf_expand0(&args->rbuf,bcf1_t*,args->rbuf.n+1,args->lines);
     int i,j;
     i = j = rbuf_append(&args->rbuf);
-    if ( !args->lines[i] ) args->lines[i] = bcf_init1();
-    SWAP(bcf1_t*, (*line_ptr), args->lines[i]);
+    if ( args->lines[i] ) bcf_destroy(args->lines[i]);
+    args->lines[i] = bcf_dup(line);
     while ( rbuf_prev(&args->rbuf,&i) )
     {
         if ( args->lines[i]->pos > args->lines[j]->pos ) SWAP(bcf1_t*, args->lines[i], args->lines[j]);
@@ -1865,21 +1969,38 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
     }
 }
 
+static bcf1_t *next_atomized_line(args_t *args)
+{
+    bcf1_t *rec = NULL;
+    if ( args->atomize==SPLIT )
+    {
+        rec = abuf_flush(args->abuf, 0);
+        if ( rec ) return rec;
+    }
+
+    if ( !bcf_sr_next_line(args->files) ) return NULL;
+
+    if ( args->atomize==SPLIT )
+    {
+        abuf_push(args->abuf,bcf_sr_get_line(args->files,0));
+        return abuf_flush(args->abuf, 0);
+    }
+    return bcf_sr_get_line(args->files,0);
+}
 static void normalize_vcf(args_t *args)
 {
-    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
-    if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
+    args->out = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
+    if ( args->out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
     if ( args->n_threads )
-        hts_set_opt(out, HTS_OPT_THREAD_POOL, args->files->p);
-    if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
-    if ( bcf_hdr_write(out, args->hdr)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
+        hts_set_opt(args->out, HTS_OPT_THREAD_POOL, args->files->p);
+    if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_norm");
+    if ( bcf_hdr_write(args->out, args->out_hdr)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
 
+    bcf1_t *line;
     int prev_rid = -1, prev_pos = -1, prev_type = 0;
-    while ( bcf_sr_next_line(args->files) )
+    while ( (line = next_atomized_line(args)) )
     {
         args->ntotal++;
-
-        bcf1_t *line = args->files->readers[0].buffer[0];
         if ( args->rmdup )
         {
             int line_type = bcf_get_variant_types(line);
@@ -1903,7 +2024,7 @@ static void normalize_vcf(args_t *args)
 
         // still on the same chromosome?
         int i,j,ilast = rbuf_last(&args->rbuf);
-        if ( ilast>=0 && line->rid != args->lines[ilast]->rid ) flush_buffer(args, out, args->rbuf.n); // new chromosome
+        if ( ilast>=0 && line->rid != args->lines[ilast]->rid ) flush_buffer(args, args->out, args->rbuf.n); // new chromosome
 
         int split = 0;
         if ( args->mrows_op==MROWS_SPLIT )
@@ -1918,13 +2039,13 @@ static void normalize_vcf(args_t *args)
                 args->nsplit++;
                 split_multiallelic_to_biallelics(args, line);
                 for (j=0; j<args->ntmp_lines; j++)
-                    normalize_line(args, &args->tmp_lines[j]);
+                    normalize_line(args, args->tmp_lines[j]);
             }
             else
                 split = 0;
         }
         if ( !split )
-            normalize_line(args, &args->files->readers[0].buffer[0]);
+            normalize_line(args, line);
 
         // find out how many sites to flush
         ilast = rbuf_last(&args->rbuf);
@@ -1934,10 +2055,10 @@ static void normalize_vcf(args_t *args)
             if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
             j++;
         }
-        if ( j>0 ) flush_buffer(args, out, j);
+        if ( j>0 ) flush_buffer(args, args->out, j);
     }
-    flush_buffer(args, out, args->rbuf.n);
-    if ( hts_close(out)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname);
+    flush_buffer(args, args->out, args->rbuf.n);
+    if ( hts_close(args->out)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname);
 
     fprintf(bcftools_stderr,"Lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
     if ( args->check_ref & CHECK_REF_FIX )
@@ -1953,23 +2074,27 @@ static void usage(void)
     fprintf(bcftools_stderr, "Usage:   bcftools norm [options] <in.vcf.gz>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "    -c, --check-ref <e|w|x|s>         check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
-    fprintf(bcftools_stderr, "    -D, --remove-duplicates           remove duplicate lines of the same type.\n");
-    fprintf(bcftools_stderr, "    -d, --rm-dup <type>               remove duplicate snps|indels|both|all|exact\n");
-    fprintf(bcftools_stderr, "    -f, --fasta-ref <file>            reference sequence\n");
-    fprintf(bcftools_stderr, "        --force                       try to proceed even if malformed tags are encountered. Experimental, use at your own risk\n");
-    fprintf(bcftools_stderr, "    -m, --multiallelics <-|+>[type]   split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
-    fprintf(bcftools_stderr, "        --no-version                  do not append version and command line to the header\n");
-    fprintf(bcftools_stderr, "    -N, --do-not-normalize            do not normalize indels (with -m or -c s)\n");
-    fprintf(bcftools_stderr, "    -o, --output <file>               write output to a file [standard output]\n");
-    fprintf(bcftools_stderr, "    -O, --output-type <type>          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
-    fprintf(bcftools_stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
-    fprintf(bcftools_stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
-    fprintf(bcftools_stderr, "    -s, --strict-filter               when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
-    fprintf(bcftools_stderr, "    -t, --targets <region>            similar to -r but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "    -T, --targets-file <file>         similar to -R but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "        --threads <int>               use multithreading with <int> worker threads [0]\n");
-    fprintf(bcftools_stderr, "    -w, --site-win <int>              buffer for sorting lines which changed position during realignment [1000]\n");
+    fprintf(bcftools_stderr, "    -a, --atomize                   Decompose complex variants (e.g. MNVs become consecutive SNVs)\n");
+    fprintf(bcftools_stderr, "        --atom-overlaps '*'|.       Use the star allele (*) for overlapping alleles or set to missing (.) [*]\n");
+    fprintf(bcftools_stderr, "    -c, --check-ref e|w|x|s         Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
+    fprintf(bcftools_stderr, "    -D, --remove-duplicates         Remove duplicate lines of the same type.\n");
+    fprintf(bcftools_stderr, "    -d, --rm-dup TYPE               Remove duplicate snps|indels|both|all|exact\n");
+    fprintf(bcftools_stderr, "    -f, --fasta-ref FILE            Reference sequence\n");
+    fprintf(bcftools_stderr, "        --force                     Try to proceed even if malformed tags are encountered. Experimental, use at your own risk\n");
+    fprintf(bcftools_stderr, "        --keep-sum TAG,..           Keep vector sum constant when splitting multiallelics (see github issue #360)\n");
+    fprintf(bcftools_stderr, "    -m, --multiallelics -|+TYPE     Split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+    fprintf(bcftools_stderr, "        --no-version                Do not append version and command line to the header\n");
+    fprintf(bcftools_stderr, "    -N, --do-not-normalize          Do not normalize indels (with -m or -c s)\n");
+    fprintf(bcftools_stderr, "        --old-rec-tag STR           Annotate modified records with INFO/STR indicating the original variant\n");
+    fprintf(bcftools_stderr, "    -o, --output FILE               Write output to a file [standard output]\n");
+    fprintf(bcftools_stderr, "    -O, --output-type TYPE          'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+    fprintf(bcftools_stderr, "    -r, --regions REGION            Restrict to comma-separated list of regions\n");
+    fprintf(bcftools_stderr, "    -R, --regions-file FILE         Restrict to regions listed in a file\n");
+    fprintf(bcftools_stderr, "    -s, --strict-filter             When merging (-m+), merged site is PASS only if all sites being merged PASS\n");
+    fprintf(bcftools_stderr, "    -t, --targets REGION            Similar to -r but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "    -T, --targets-file FILE         Similar to -R but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "        --threads INT               Use multithreading with <int> worker threads [0]\n");
+    fprintf(bcftools_stderr, "    -w, --site-win INT              Buffer for sorting lines which changed position during realignment [1000]\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Examples:\n");
     fprintf(bcftools_stderr, "   # normalize and left-align indels\n");
@@ -1978,7 +2103,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "   # split multi-allelic sites\n");
     fprintf(bcftools_stderr, "   bcftools norm -m- in.vcf\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfnorm(int argc, char *argv[])
@@ -1997,11 +2122,16 @@ int main_vcfnorm(int argc, char *argv[])
     args->do_indels = 1;
     int region_is_file  = 0;
     int targets_is_file = 0;
+    args->use_star_allele = 1;
 
     static struct option loptions[] =
     {
         {"help",no_argument,NULL,'h'},
         {"force",no_argument,NULL,7},
+        {"atomize",no_argument,NULL,'a'},
+        {"atom-overlaps",required_argument,NULL,11},
+        {"old-rec-tag",required_argument,NULL,12},
+        {"keep-sum",required_argument,NULL,10},
         {"fasta-ref",required_argument,NULL,'f'},
         {"do-not-normalize",no_argument,NULL,'N'},
         {"multiallelics",required_argument,NULL,'m'},
@@ -2021,8 +2151,21 @@ int main_vcfnorm(int argc, char *argv[])
         {NULL,0,NULL,0}
     };
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hr:R:f:w:Dd:o:O:c:m:t:T:sN",loptions,NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "hr:R:f:w:Dd:o:O:c:m:t:T:sNa",loptions,NULL)) >= 0) {
         switch (c) {
+            case  10:
+                // possibly generalize this also to INFO/AD and other tags
+                if ( strcasecmp("ad",optarg) )
+                    error("Error: only --keep-sum AD is currently supported. See https://github.com/samtools/bcftools/issues/360 for more.\n");
+                args->keep_sum_ad = 1;  // this will be set to the header id or -1 in init_data
+                break;
+            case 'a': args->atomize = SPLIT; break;
+            case 11 :
+                if ( optarg[0]=='*' ) args->use_star_allele = 1;
+                else if ( optarg[0]=='.' ) args->use_star_allele = 0;
+                else error("Invalid argument to --atom-overlaps. Perhaps you wanted: \"--atom-overlaps '*'\"?\n");
+                break;
+            case 12 : args->old_rec_tag = optarg; break;
             case 'N': args->do_indels = 0; break;
             case 'd':
                 if ( !strcmp("snps",optarg) ) args->rmdup = BCF_SR_PAIR_SNPS;
@@ -2094,7 +2237,7 @@ int main_vcfnorm(int argc, char *argv[])
     }
     else fname = argv[optind];
 
-    if ( !args->ref_fname && !args->mrows_op && !args->rmdup ) error("Expected -f, -m, -D or -d option\n");
+    if ( !args->ref_fname && !args->mrows_op && !args->rmdup && args->atomize==NONE ) error("Expected -a, -f, -m, -D or -d option\n");
     if ( !args->check_ref && args->ref_fname ) args->check_ref = CHECK_REF_EXIT;
     if ( args->check_ref && !args->ref_fname ) error("Expected --fasta-ref with --check-ref\n");
 
diff --git a/bcftools/vcfplugin.c b/bcftools/vcfplugin.c
index a161529..c4ea52d 100644
--- a/bcftools/vcfplugin.c
+++ b/bcftools/vcfplugin.c
@@ -1,6 +1,6 @@
 /*  vcfplugin.c -- plugin modules for operating on VCF/BCF files.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -148,7 +148,7 @@ typedef struct _args_t
     char **plugin_paths;
 
     char **argv, *output_fname, *regions_list, *targets_list;
-    int argc, drop_header, verbose, record_cmd_line;
+    int argc, drop_header, verbose, record_cmd_line, plist_only;
 }
 args_t;
 
@@ -178,7 +178,7 @@ static void add_plugin_paths(args_t *args, const char *path)
                 args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
                 args->plugin_paths[args->nplugin_paths] = dir;
                 args->nplugin_paths++;
-                if ( args->verbose > 1 ) fprintf(stderr, "plugin directory %s .. ok\n", dir);
+                if ( args->verbose > 1 && strcmp(".",dir) ) fprintf(stderr, "plugin directory %s .. ok\n", dir);
             }
             else
             {
@@ -220,6 +220,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
     if ( fname[0]=='/' ) is_absolute_path = 1;
 #endif
+
+    kstring_t err = {0,0,0};
     if ( !is_absolute_path )
     {
         int i;
@@ -231,16 +233,14 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
             handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
 #endif
-            if ( args->verbose > 1 )
-            {
-                if ( !handle )
+            if ( !handle )
 #ifdef _WIN32
-                    fprintf(stderr,"%s:\n\tLoadLibraryA   .. %lu\n", tmp, GetLastError());
+                ksprintf(&err,"LoadLibraryA   .. %lu\n", GetLastError());
 #else
-                    fprintf(stderr,"%s:\n\tdlopen   .. %s\n", tmp, dlerror());
+                ksprintf(&err,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
 #endif
-                else fprintf(stderr,"%s:\n\tplugin open   .. ok\n", tmp);
-            }
+            else if ( args->verbose > 1 )
+                fprintf(stderr,"%s:\n\tplugin open   .. ok\n", tmp);
             free(tmp);
             if ( handle ) return handle;
         }
@@ -251,33 +251,46 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
     handle = dlopen(fname, RTLD_NOW);
 #endif
-    if ( args->verbose > 1 )
-    {
-        if ( !handle )
+    if ( !handle )
 #ifdef _WIN32
-            fprintf(stderr,"%s:\n\tLoadLibraryA   .. %lu\n", fname, GetLastError());
+        ksprintf(&err,"LoadLibraryA   .. %lu\n", GetLastError());
 #else
-            fprintf(stderr,"%s:\n\tdlopen   .. %s\n", fname, dlerror());
+        ksprintf(&err,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
 #endif
-        else fprintf(stderr,"%s:\n\tplugin open   .. ok\n", fname);
-    }
+    else if ( args->verbose > 1 )
+        fprintf(stderr,"%s:\n\tplugin open   .. ok\n", fname);
+
+    if ( !handle && (!args->plist_only || args->verbose>1) )
+        fprintf(stderr,"%s",err.s);
+    free(err.s);
 
     return handle;
 }
 
-static void print_plugin_usage_hint(void)
+static void print_plugin_usage_hint(const char *name)
 {
-    fprintf(stderr, "\nNo functional bcftools plugins were found");
+    if ( name )
+        fprintf(stderr, "\nThe bcftools plugin \"%s\" was not found or is not functional", name);
+    else
+        fprintf(stderr, "\nNo functional bcftools plugins were found");
     if ( !getenv("BCFTOOLS_PLUGINS") )
-        fprintf(stderr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
+    {
+        fprintf(stderr,". The environment variable BCFTOOLS_PLUGINS is not set");
+#ifdef PLUGINPATH
+        fprintf(stderr,"\nand no usable plugins were found in %s", PLUGINPATH);
+#endif
+        fprintf(stderr,".\n\n");
+    }
     else
+    {
         fprintf(stderr,
                 " in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
                 "- Is the plugin path correct?\n\n"
-                "- Run \"bcftools plugin -lv\" for more detailed error output.\n"
+                "- Run \"bcftools plugin -l\" or \"bcftools plugin -lvv\" for a list of available plugins.\n"
                 "\n",
                 getenv("BCFTOOLS_PLUGINS")
                );
+    }
 }
 
 static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugin_t *plugin)
@@ -289,7 +302,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     {
         if ( exit_on_error )
         {
-            print_plugin_usage_hint();
+            print_plugin_usage_hint(fname);
             error("Could not load \"%s\".\n\n", fname);
         }
         return -1;
@@ -410,12 +423,9 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     return 0;
 }
 
-static void init_plugin(args_t *args)
+static void check_version(args_t *args)
 {
     static int warned_bcftools = 0, warned_htslib = 0;
-
-    int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out);
-    if ( ret<0 ) error("The plugin exited with an error.\n");
     const char *bver, *hver;
     args->plugin.version(&bver, &hver);
     if ( strcmp(bver,bcftools_version()) && !warned_bcftools )
@@ -428,6 +438,13 @@ static void init_plugin(args_t *args)
         fprintf(stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
         warned_htslib = 1;
     }
+}
+
+static void init_plugin(args_t *args)
+{
+    int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out);
+    if ( ret<0 ) error("The plugin exited with an error.\n");
+    check_version(args);
     args->drop_header += ret;
 }
 
@@ -487,7 +504,7 @@ static int list_plugins(args_t *args)
         if ( args->verbose ) printf("\n");
     }
     else
-        print_plugin_usage_hint();
+        print_plugin_usage_hint(NULL);
     free(str.s);
     return nplugins ? 0 : 1;
 }
@@ -505,7 +522,7 @@ static void init_data(args_t *args)
     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
     if ( !args->drop_header )
     {
-        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
         if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
         if ( bcf_hdr_write(args->out_fh, args->hdr_out)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
@@ -592,10 +609,9 @@ int main_plugin(int argc, char *argv[])
     args->n_threads = 0;
     args->record_cmd_line = 1;
     args->nplugin_paths = -1;
-    int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
+    int regions_is_file = 0, targets_is_file = 0, usage_only = 0, version_only = 0;
 
     if ( argc==1 ) usage(args);
-
     char *plugin_name = NULL;
     if ( argv[1][0]!='-' )
     {
@@ -606,6 +622,7 @@ int main_plugin(int argc, char *argv[])
         load_plugin(args, plugin_name, 1, &args->plugin);
         if ( args->plugin.run )
         {
+            check_version(args);
             int ret = args->plugin.run(argc, argv);
             destroy_data(args);
             free(args);
@@ -646,13 +663,17 @@ int main_plugin(int argc, char *argv[])
                     default: error("The output type \"%s\" not recognised\n", optarg);
                 };
                 break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
-            case 'l': plist_only = 1; break;
+            case 'l': args->plist_only = 1; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case '?':
@@ -660,8 +681,8 @@ int main_plugin(int argc, char *argv[])
             default: error("Unknown argument: %s\n", optarg);
         }
     }
-    if ( plist_only )  return list_plugins(args);
-    if ( usage_only && ! plugin_name ) usage(args);
+    if ( args->plist_only )  return list_plugins(args);
+    if ( !plugin_name ) usage(args);
 
     if ( version_only )
     {
@@ -682,7 +703,7 @@ int main_plugin(int argc, char *argv[])
     }
 
     char *fname = NULL;
-    if ( optind>=argc || argv[optind][0]=='-' )
+    if ( optind>=argc || (argv[optind][0]=='-' && argv[optind][1]) )
     {
         args->plugin.argc = argc - optind + 1;
         args->plugin.argv = argv + optind - 1;
diff --git a/bcftools/vcfplugin.c.pysam.c b/bcftools/vcfplugin.c.pysam.c
index 3b63c8c..2143a0a 100644
--- a/bcftools/vcfplugin.c.pysam.c
+++ b/bcftools/vcfplugin.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfplugin.c -- plugin modules for operating on VCF/BCF files.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -150,7 +150,7 @@ typedef struct _args_t
     char **plugin_paths;
 
     char **argv, *output_fname, *regions_list, *targets_list;
-    int argc, drop_header, verbose, record_cmd_line;
+    int argc, drop_header, verbose, record_cmd_line, plist_only;
 }
 args_t;
 
@@ -180,7 +180,7 @@ static void add_plugin_paths(args_t *args, const char *path)
                 args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
                 args->plugin_paths[args->nplugin_paths] = dir;
                 args->nplugin_paths++;
-                if ( args->verbose > 1 ) fprintf(bcftools_stderr, "plugin directory %s .. ok\n", dir);
+                if ( args->verbose > 1 && strcmp(".",dir) ) fprintf(bcftools_stderr, "plugin directory %s .. ok\n", dir);
             }
             else
             {
@@ -222,6 +222,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
     if ( fname[0]=='/' ) is_absolute_path = 1;
 #endif
+
+    kstring_t err = {0,0,0};
     if ( !is_absolute_path )
     {
         int i;
@@ -233,16 +235,14 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
             handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
 #endif
-            if ( args->verbose > 1 )
-            {
-                if ( !handle )
+            if ( !handle )
 #ifdef _WIN32
-                    fprintf(bcftools_stderr,"%s:\n\tLoadLibraryA   .. %lu\n", tmp, GetLastError());
+                ksprintf(&err,"LoadLibraryA   .. %lu\n", GetLastError());
 #else
-                    fprintf(bcftools_stderr,"%s:\n\tdlopen   .. %s\n", tmp, dlerror());
+                ksprintf(&err,"%s:\n\tdlopen   .. %s\n", tmp,dlerror());
 #endif
-                else fprintf(bcftools_stderr,"%s:\n\tplugin open   .. ok\n", tmp);
-            }
+            else if ( args->verbose > 1 )
+                fprintf(bcftools_stderr,"%s:\n\tplugin open   .. ok\n", tmp);
             free(tmp);
             if ( handle ) return handle;
         }
@@ -253,33 +253,46 @@ static void *dlopen_plugin(args_t *args, const char *fname)
 #else
     handle = dlopen(fname, RTLD_NOW);
 #endif
-    if ( args->verbose > 1 )
-    {
-        if ( !handle )
+    if ( !handle )
 #ifdef _WIN32
-            fprintf(bcftools_stderr,"%s:\n\tLoadLibraryA   .. %lu\n", fname, GetLastError());
+        ksprintf(&err,"LoadLibraryA   .. %lu\n", GetLastError());
 #else
-            fprintf(bcftools_stderr,"%s:\n\tdlopen   .. %s\n", fname, dlerror());
+        ksprintf(&err,"%s:\n\tdlopen   .. %s\n", fname,dlerror());
 #endif
-        else fprintf(bcftools_stderr,"%s:\n\tplugin open   .. ok\n", fname);
-    }
+    else if ( args->verbose > 1 )
+        fprintf(bcftools_stderr,"%s:\n\tplugin open   .. ok\n", fname);
+
+    if ( !handle && (!args->plist_only || args->verbose>1) )
+        fprintf(bcftools_stderr,"%s",err.s);
+    free(err.s);
 
     return handle;
 }
 
-static void print_plugin_usage_hint(void)
+static void print_plugin_usage_hint(const char *name)
 {
-    fprintf(bcftools_stderr, "\nNo functional bcftools plugins were found");
+    if ( name )
+        fprintf(bcftools_stderr, "\nThe bcftools plugin \"%s\" was not found or is not functional", name);
+    else
+        fprintf(bcftools_stderr, "\nNo functional bcftools plugins were found");
     if ( !getenv("BCFTOOLS_PLUGINS") )
-        fprintf(bcftools_stderr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
+    {
+        fprintf(bcftools_stderr,". The environment variable BCFTOOLS_PLUGINS is not set");
+#ifdef PLUGINPATH
+        fprintf(bcftools_stderr,"\nand no usable plugins were found in %s", PLUGINPATH);
+#endif
+        fprintf(bcftools_stderr,".\n\n");
+    }
     else
+    {
         fprintf(bcftools_stderr,
                 " in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
                 "- Is the plugin path correct?\n\n"
-                "- Run \"bcftools plugin -lv\" for more detailed error output.\n"
+                "- Run \"bcftools plugin -l\" or \"bcftools plugin -lvv\" for a list of available plugins.\n"
                 "\n",
                 getenv("BCFTOOLS_PLUGINS")
                );
+    }
 }
 
 static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugin_t *plugin)
@@ -291,7 +304,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     {
         if ( exit_on_error )
         {
-            print_plugin_usage_hint();
+            print_plugin_usage_hint(fname);
             error("Could not load \"%s\".\n\n", fname);
         }
         return -1;
@@ -412,12 +425,9 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
     return 0;
 }
 
-static void init_plugin(args_t *args)
+static void check_version(args_t *args)
 {
     static int warned_bcftools = 0, warned_htslib = 0;
-
-    int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out);
-    if ( ret<0 ) error("The plugin exited with an error.\n");
     const char *bver, *hver;
     args->plugin.version(&bver, &hver);
     if ( strcmp(bver,bcftools_version()) && !warned_bcftools )
@@ -430,6 +440,13 @@ static void init_plugin(args_t *args)
         fprintf(bcftools_stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
         warned_htslib = 1;
     }
+}
+
+static void init_plugin(args_t *args)
+{
+    int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out);
+    if ( ret<0 ) error("The plugin exited with an error.\n");
+    check_version(args);
     args->drop_header += ret;
 }
 
@@ -489,7 +506,7 @@ static int list_plugins(args_t *args)
         if ( args->verbose ) fprintf(bcftools_stdout, "\n");
     }
     else
-        print_plugin_usage_hint();
+        print_plugin_usage_hint(NULL);
     free(str.s);
     return nplugins ? 0 : 1;
 }
@@ -507,7 +524,7 @@ static void init_data(args_t *args)
     if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
     if ( !args->drop_header )
     {
-        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+        args->out_fh = hts_open(args->output_fname,hts_bcf_wmode2(args->output_type,args->output_fname));
         if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
         if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
         if ( bcf_hdr_write(args->out_fh, args->hdr_out)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
@@ -560,7 +577,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "   -v, --verbose               print verbose information, -vv increases verbosity\n");
     fprintf(bcftools_stderr, "   -V, --version               print version string and exit\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 static int is_verbose(int argc, char *argv[])
@@ -594,10 +611,9 @@ int main_plugin(int argc, char *argv[])
     args->n_threads = 0;
     args->record_cmd_line = 1;
     args->nplugin_paths = -1;
-    int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
+    int regions_is_file = 0, targets_is_file = 0, usage_only = 0, version_only = 0;
 
     if ( argc==1 ) usage(args);
-
     char *plugin_name = NULL;
     if ( argv[1][0]!='-' )
     {
@@ -608,6 +624,7 @@ int main_plugin(int argc, char *argv[])
         load_plugin(args, plugin_name, 1, &args->plugin);
         if ( args->plugin.run )
         {
+            check_version(args);
             int ret = args->plugin.run(argc, argv);
             destroy_data(args);
             free(args);
@@ -648,13 +665,17 @@ int main_plugin(int argc, char *argv[])
                     default: error("The output type \"%s\" not recognised\n", optarg);
                 };
                 break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
             case 'T': args->targets_list = optarg; targets_is_file = 1; break;
-            case 'l': plist_only = 1; break;
+            case 'l': args->plist_only = 1; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case  8 : args->record_cmd_line = 0; break;
             case '?':
@@ -662,8 +683,8 @@ int main_plugin(int argc, char *argv[])
             default: error("Unknown argument: %s\n", optarg);
         }
     }
-    if ( plist_only )  return list_plugins(args);
-    if ( usage_only && ! plugin_name ) usage(args);
+    if ( args->plist_only )  return list_plugins(args);
+    if ( !plugin_name ) usage(args);
 
     if ( version_only )
     {
@@ -684,7 +705,7 @@ int main_plugin(int argc, char *argv[])
     }
 
     char *fname = NULL;
-    if ( optind>=argc || argv[optind][0]=='-' )
+    if ( optind>=argc || (argv[optind][0]=='-' && argv[optind][1]) )
     {
         args->plugin.argc = argc - optind + 1;
         args->plugin.argv = argv + optind - 1;
diff --git a/bcftools/vcfquery.c b/bcftools/vcfquery.c
index 806ecf1..6568c82 100644
--- a/bcftools/vcfquery.c
+++ b/bcftools/vcfquery.c
@@ -1,6 +1,6 @@
 /*  vcfquery.c -- Extracts fields from VCF/BCF file.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -226,7 +226,7 @@ static void usage(void)
     fprintf(stderr, "    -H, --print-header                print header\n");
     fprintf(stderr, "    -i, --include <expr>              select sites for which the expression is true (see man page for details)\n");
     fprintf(stderr, "    -l, --list-samples                print the list of samples and exit\n");
-    fprintf(stderr, "    -o, --output-file <file>          output file name [stdout]\n");
+    fprintf(stderr, "    -o, --output <file>               output file name [stdout]\n");
     fprintf(stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
     fprintf(stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
     fprintf(stderr, "    -s, --samples <list>              list of samples to include\n");
@@ -257,6 +257,7 @@ int main_vcfquery(int argc, char *argv[])
         {"exclude",1,0,'e'},
         {"format",1,0,'f'},
         {"output-file",1,0,'o'},
+        {"output",1,0,'o'},
         {"regions",1,0,'r'},
         {"regions-file",1,0,'R'},
         {"targets",1,0,'t'},
@@ -296,8 +297,12 @@ int main_vcfquery(int argc, char *argv[])
                     args->format_str = str.s;
                     break;
                 }
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
diff --git a/bcftools/vcfquery.c.pysam.c b/bcftools/vcfquery.c.pysam.c
index 66afb08..fc264b7 100644
--- a/bcftools/vcfquery.c.pysam.c
+++ b/bcftools/vcfquery.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfquery.c -- Extracts fields from VCF/BCF file.
 
-    Copyright (C) 2013-2017 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -228,7 +228,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -H, --print-header                print header\n");
     fprintf(bcftools_stderr, "    -i, --include <expr>              select sites for which the expression is true (see man page for details)\n");
     fprintf(bcftools_stderr, "    -l, --list-samples                print the list of samples and exit\n");
-    fprintf(bcftools_stderr, "    -o, --output-file <file>          output file name [bcftools_stdout]\n");
+    fprintf(bcftools_stderr, "    -o, --output <file>               output file name [bcftools_stdout]\n");
     fprintf(bcftools_stderr, "    -r, --regions <region>            restrict to comma-separated list of regions\n");
     fprintf(bcftools_stderr, "    -R, --regions-file <file>         restrict to regions listed in a file\n");
     fprintf(bcftools_stderr, "    -s, --samples <list>              list of samples to include\n");
@@ -241,7 +241,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "Examples:\n");
     fprintf(bcftools_stderr, "\tbcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%SAMPLE=%%GT]\\n' file.vcf.gz\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfquery(int argc, char *argv[])
@@ -259,6 +259,7 @@ int main_vcfquery(int argc, char *argv[])
         {"exclude",1,0,'e'},
         {"format",1,0,'f'},
         {"output-file",1,0,'o'},
+        {"output",1,0,'o'},
         {"regions",1,0,'r'},
         {"regions-file",1,0,'R'},
         {"targets",1,0,'t'},
@@ -298,8 +299,12 @@ int main_vcfquery(int argc, char *argv[])
                     args->format_str = str.s;
                     break;
                 }
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'r': args->regions_list = optarg; break;
             case 'R': args->regions_list = optarg; regions_is_file = 1; break;
             case 't': args->targets_list = optarg; break;
diff --git a/bcftools/vcfroh.c b/bcftools/vcfroh.c
index 1c822cb..8e95c9a 100644
--- a/bcftools/vcfroh.c
+++ b/bcftools/vcfroh.c
@@ -1,6 +1,6 @@
 /*  vcfroh.c -- HMM model for detecting runs of autozygosity.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -103,7 +103,7 @@ typedef struct _args_t
     int ntot;                   // some stats to detect if things didn't go wrong
     int nno_af;                 // number of sites rejected because AF could not be determined
     int nfiltered;              // .. because of filters
-    int nnot_biallelic, ndup;
+    int nno_alt, nmultiallelic, ndup;
     smpl_t *smpl;               // HMM data for each sample
     smpl_ilist_t *af_smpl;      // list of samples to estimate AF from (--estimate-AF)
     smpl_ilist_t *roh_smpl;     // list of samples to analyze (--samples, --samples-file)
@@ -111,6 +111,7 @@ typedef struct _args_t
     int af_from_PL;             // estimate AF from FMT/PL rather than FMT/GT
     char **argv, *targets_list, *regions_list, *af_fname, *af_tag, *samples, *buffer_size, *output_fname;
     int argc, fake_PLs, snps_only, vi_training, samples_is_file, output_type, skip_homref, n_threads;
+    int include_noalt_sites;
     BGZF *out;
     kstring_t str;
 
@@ -548,6 +549,7 @@ static void flush_viterbi(args_t *args, int ismpl)
                     {
                         smpl->rg.state = 1;
                         smpl->rg.beg = smpl->sites[i];
+                        smpl->rg.end = smpl->sites[i];
                         smpl->rg.rid = args->prev_rid;
                         smpl->rg.qual  = qual;
                         smpl->rg.nqual = 1;
@@ -656,8 +658,10 @@ static void flush_viterbi(args_t *args, int ismpl)
     }
 }
 
-int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
+int read_AF(args_t *args, bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
 {
+    if ( tgt->nals < 2 )
+        error("Expected two comma-separated alleles (REF,ALT) in the third column of %s, found:\n\t%s\n", args->af_fname,tgt->line.s);
     if ( tgt->nals != line->n_allele ) return -1;    // number of alleles does not match
 
     int i;
@@ -837,7 +841,7 @@ int process_line(args_t *args, bcf1_t *line, int ial)
     else if ( args->af_fname ) 
     {
         // Read AF from a file
-        ret = read_AF(args->files->targets, line, &alt_freq);
+        ret = read_AF(args, args->files->targets, line, &alt_freq);
     }
     else if ( args->dflt_AF > 0 )
     {
@@ -997,33 +1001,32 @@ static void vcfroh(args_t *args, bcf1_t *line)
 
     // Skip unwanted lines, for simplicity we consider only biallelic sites 
     if ( line->rid == args->skip_rid ) return;
-    if ( line->n_allele==1 ) { args->nnot_biallelic++; return; }   // no ALT allele
-    if ( line->n_allele > 3 ) { args->nnot_biallelic++; return; }   // cannot be bi-allelic, even with <*>
 
     // This can be raw callable VCF with the symbolic unseen allele <*>
-    int ial = 0;
+    int ial = 0, nalt = line->n_allele - 1;
     for (i=1; i<line->n_allele; i++)
-        if ( !strcmp("<*>",line->d.allele[i]) ) { ial = i; break; }
-    if ( ial==0 )    // normal VCF, the symbolic allele is not present
     {
-        if ( line->n_allele!=2 ) { args->nnot_biallelic++; return; }   // not biallelic
-        ial = 1;
+        if ( !strcmp("<*>",line->d.allele[i]) || !strcmp("<NON_REF>",line->d.allele[i]) ) nalt--;
+        else if ( !ial ) ial = i;
     }
-    else
+
+    if ( !nalt ) // no ALT allele
     {
-        if ( line->n_allele!=3 ) return;    // not biallelic
-        ial = ial==1 ? 2 : 1;               // <*> can come in any order
+        args->nno_alt++;
+        if ( !args->include_noalt_sites ) return;
+    }
+    else if ( nalt>1 )
+    {
+        args->nmultiallelic++;
+        return;
     }
+
     if ( args->snps_only && !bcf_is_snp(line) ) return;
 
     // Initialize genetic map
     int skip_rid = 0;
     if ( args->prev_rid<0 )
-    {
-        args->prev_rid = line->rid;
-        args->prev_pos = line->pos;
         skip_rid = load_genmap(args, bcf_seqname(args->hdr,line));
-    }
 
     // New chromosome?
     if ( args->prev_rid!=line->rid )
@@ -1071,7 +1074,7 @@ static void usage(args_t *args)
     fprintf(stderr, "General Options:\n");
     fprintf(stderr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
     fprintf(stderr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
-    fprintf(stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF\\tALT\\tAF)\n");
+    fprintf(stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
     fprintf(stderr, "    -b  --buffer-size <int[,int]>      buffer size and the number of overlapping sites, 0 for unlimited [0]\n");
     fprintf(stderr, "                                           If the first number is negative, it is interpreted as the maximum memory to\n");
     fprintf(stderr, "                                           use, in MB. The default overlap is set to roughly 1%% of the buffer size.\n");
@@ -1082,6 +1085,7 @@ static void usage(args_t *args)
     fprintf(stderr, "                                           Safe value to use is 30 to account for GT errors.\n");
     fprintf(stderr, "        --include <expr>               select sites for which the expression is true\n");
     fprintf(stderr, "    -i, --ignore-homref                skip hom-ref genotypes (0/0)\n");
+    fprintf(stderr, "        --include-noalt                include sites with no ALT allele (ignored by default)\n");
     fprintf(stderr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
     fprintf(stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\"\n");
     fprintf(stderr, "                                           is replaced with chromosome name\n");
@@ -1122,6 +1126,7 @@ int main_vcfroh(int argc, char *argv[])
         {"AF-dflt",1,0,2},
         {"include",1,0,3},
         {"exclude",1,0,4},
+        {"include-noalt",0,0,5},
         {"buffer-size",1,0,'b'},
         {"ignore-homref",0,0,'i'},
         {"estimate-AF",1,0,'e'},
@@ -1154,8 +1159,13 @@ int main_vcfroh(int argc, char *argv[])
                 args->dflt_AF = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                 break;
-            case 3: args->filter_str = optarg; args->filter_logic = FLT_INCLUDE; break;
-            case 4: args->filter_str = optarg; args->filter_logic = FLT_EXCLUDE; break;
+            case  3 :
+                if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case  4 :
+                if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 5: args->include_noalt_sites = 1; break;
             case 'o': args->output_fname = optarg; break;
             case 'O': 
                 if ( strchr(optarg,'s') || strchr(optarg,'S') ) args->output_type |= OUTPUT_ST;
@@ -1257,7 +1267,7 @@ int main_vcfroh(int argc, char *argv[])
         fprintf(stderr,"Number of lines overlapping with --AF-file/processed: %d/%d\n", args->ntot,nmin);
     else
         fprintf(stderr,"Number of lines total/processed: %d/%d\n", args->ntot,nmin);
-    fprintf(stderr,"Number of lines filtered/no AF/not biallelic/dup: %d/%d/%d/%d\n", args->nfiltered,args->nno_af,args->nnot_biallelic,args->ndup);
+    fprintf(stderr,"Number of lines filtered/no AF/no alt/multiallelic/dup: %d/%d/%d/%d/%d\n", args->nfiltered,args->nno_af,args->nno_alt,args->nmultiallelic,args->ndup);
     if ( nmin==0 )
     {
         fprintf(stderr,"No usable sites were found.\n");
diff --git a/bcftools/vcfroh.c.pysam.c b/bcftools/vcfroh.c.pysam.c
index 33defa4..b742faa 100644
--- a/bcftools/vcfroh.c.pysam.c
+++ b/bcftools/vcfroh.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfroh.c -- HMM model for detecting runs of autozygosity.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -105,7 +105,7 @@ typedef struct _args_t
     int ntot;                   // some stats to detect if things didn't go wrong
     int nno_af;                 // number of sites rejected because AF could not be determined
     int nfiltered;              // .. because of filters
-    int nnot_biallelic, ndup;
+    int nno_alt, nmultiallelic, ndup;
     smpl_t *smpl;               // HMM data for each sample
     smpl_ilist_t *af_smpl;      // list of samples to estimate AF from (--estimate-AF)
     smpl_ilist_t *roh_smpl;     // list of samples to analyze (--samples, --samples-file)
@@ -113,6 +113,7 @@ typedef struct _args_t
     int af_from_PL;             // estimate AF from FMT/PL rather than FMT/GT
     char **argv, *targets_list, *regions_list, *af_fname, *af_tag, *samples, *buffer_size, *output_fname;
     int argc, fake_PLs, snps_only, vi_training, samples_is_file, output_type, skip_homref, n_threads;
+    int include_noalt_sites;
     BGZF *out;
     kstring_t str;
 
@@ -550,6 +551,7 @@ static void flush_viterbi(args_t *args, int ismpl)
                     {
                         smpl->rg.state = 1;
                         smpl->rg.beg = smpl->sites[i];
+                        smpl->rg.end = smpl->sites[i];
                         smpl->rg.rid = args->prev_rid;
                         smpl->rg.qual  = qual;
                         smpl->rg.nqual = 1;
@@ -658,8 +660,10 @@ static void flush_viterbi(args_t *args, int ismpl)
     }
 }
 
-int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
+int read_AF(args_t *args, bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq)
 {
+    if ( tgt->nals < 2 )
+        error("Expected two comma-separated alleles (REF,ALT) in the third column of %s, found:\n\t%s\n", args->af_fname,tgt->line.s);
     if ( tgt->nals != line->n_allele ) return -1;    // number of alleles does not match
 
     int i;
@@ -769,7 +773,7 @@ int estimate_AF_from_PL(args_t *args, bcf_fmt_t *fmt_pl, int ial, double *alt_fr
             case BCF_BT_INT8:  BRANCH(int8_t); break;
             case BCF_BT_INT16: BRANCH(int16_t); break;
             case BCF_BT_INT32: BRANCH(int32_t); break;
-            default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+            default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); bcftools_exit(1);
         }
         #undef BRANCH
     }
@@ -799,7 +803,7 @@ int estimate_AF_from_PL(args_t *args, bcf_fmt_t *fmt_pl, int ial, double *alt_fr
             case BCF_BT_INT8:  BRANCH(int8_t); break;
             case BCF_BT_INT16: BRANCH(int16_t); break;
             case BCF_BT_INT32: BRANCH(int32_t); break;
-            default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+            default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); bcftools_exit(1);
         }
         #undef BRANCH
     }
@@ -839,7 +843,7 @@ int process_line(args_t *args, bcf1_t *line, int ial)
     else if ( args->af_fname ) 
     {
         // Read AF from a file
-        ret = read_AF(args->files->targets, line, &alt_freq);
+        ret = read_AF(args, args->files->targets, line, &alt_freq);
     }
     else if ( args->dflt_AF > 0 )
     {
@@ -941,7 +945,7 @@ int process_line(args_t *args, bcf1_t *line, int ial)
                 case BCF_BT_INT8:  BRANCH(int8_t); break;
                 case BCF_BT_INT16: BRANCH(int16_t); break;
                 case BCF_BT_INT32: BRANCH(int32_t); break;
-                default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); exit(1);
+                default: fprintf(bcftools_stderr,"Unknown format type for PL: %s:%d .. fmt->type=%d\n", __FILE__,__LINE__, fmt_pl->type); bcftools_exit(1);
             }
             #undef BRANCH
         }
@@ -999,33 +1003,32 @@ static void vcfroh(args_t *args, bcf1_t *line)
 
     // Skip unwanted lines, for simplicity we consider only biallelic sites 
     if ( line->rid == args->skip_rid ) return;
-    if ( line->n_allele==1 ) { args->nnot_biallelic++; return; }   // no ALT allele
-    if ( line->n_allele > 3 ) { args->nnot_biallelic++; return; }   // cannot be bi-allelic, even with <*>
 
     // This can be raw callable VCF with the symbolic unseen allele <*>
-    int ial = 0;
+    int ial = 0, nalt = line->n_allele - 1;
     for (i=1; i<line->n_allele; i++)
-        if ( !strcmp("<*>",line->d.allele[i]) ) { ial = i; break; }
-    if ( ial==0 )    // normal VCF, the symbolic allele is not present
     {
-        if ( line->n_allele!=2 ) { args->nnot_biallelic++; return; }   // not biallelic
-        ial = 1;
+        if ( !strcmp("<*>",line->d.allele[i]) || !strcmp("<NON_REF>",line->d.allele[i]) ) nalt--;
+        else if ( !ial ) ial = i;
     }
-    else
+
+    if ( !nalt ) // no ALT allele
     {
-        if ( line->n_allele!=3 ) return;    // not biallelic
-        ial = ial==1 ? 2 : 1;               // <*> can come in any order
+        args->nno_alt++;
+        if ( !args->include_noalt_sites ) return;
+    }
+    else if ( nalt>1 )
+    {
+        args->nmultiallelic++;
+        return;
     }
+
     if ( args->snps_only && !bcf_is_snp(line) ) return;
 
     // Initialize genetic map
     int skip_rid = 0;
     if ( args->prev_rid<0 )
-    {
-        args->prev_rid = line->rid;
-        args->prev_pos = line->pos;
         skip_rid = load_genmap(args, bcf_seqname(args->hdr,line));
-    }
 
     // New chromosome?
     if ( args->prev_rid!=line->rid )
@@ -1073,7 +1076,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "General Options:\n");
     fprintf(bcftools_stderr, "        --AF-dflt <float>              if AF is not known, use this allele frequency [skip]\n");
     fprintf(bcftools_stderr, "        --AF-tag <TAG>                 use TAG for allele frequency\n");
-    fprintf(bcftools_stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF\\tALT\\tAF)\n");
+    fprintf(bcftools_stderr, "        --AF-file <file>               read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
     fprintf(bcftools_stderr, "    -b  --buffer-size <int[,int]>      buffer size and the number of overlapping sites, 0 for unlimited [0]\n");
     fprintf(bcftools_stderr, "                                           If the first number is negative, it is interpreted as the maximum memory to\n");
     fprintf(bcftools_stderr, "                                           use, in MB. The default overlap is set to roughly 1%% of the buffer size.\n");
@@ -1084,6 +1087,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "                                           Safe value to use is 30 to account for GT errors.\n");
     fprintf(bcftools_stderr, "        --include <expr>               select sites for which the expression is true\n");
     fprintf(bcftools_stderr, "    -i, --ignore-homref                skip hom-ref genotypes (0/0)\n");
+    fprintf(bcftools_stderr, "        --include-noalt                include sites with no ALT allele (ignored by default)\n");
     fprintf(bcftools_stderr, "    -I, --skip-indels                  skip indels as their genotypes are enriched for errors\n");
     fprintf(bcftools_stderr, "    -m, --genetic-map <file>           genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\"\n");
     fprintf(bcftools_stderr, "                                           is replaced with chromosome name\n");
@@ -1103,7 +1107,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "    -H, --az-to-hw <float>             P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
     fprintf(bcftools_stderr, "    -V, --viterbi-training <float>     estimate HMM parameters, <float> is the convergence threshold, e.g. 1e-10 (experimental)\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfroh(int argc, char *argv[])
@@ -1124,6 +1128,7 @@ int main_vcfroh(int argc, char *argv[])
         {"AF-dflt",1,0,2},
         {"include",1,0,3},
         {"exclude",1,0,4},
+        {"include-noalt",0,0,5},
         {"buffer-size",1,0,'b'},
         {"ignore-homref",0,0,'i'},
         {"estimate-AF",1,0,'e'},
@@ -1156,8 +1161,13 @@ int main_vcfroh(int argc, char *argv[])
                 args->dflt_AF = strtod(optarg,&tmp);
                 if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                 break;
-            case 3: args->filter_str = optarg; args->filter_logic = FLT_INCLUDE; break;
-            case 4: args->filter_str = optarg; args->filter_logic = FLT_EXCLUDE; break;
+            case  3 :
+                if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case  4 :
+                if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 5: args->include_noalt_sites = 1; break;
             case 'o': args->output_fname = optarg; break;
             case 'O': 
                 if ( strchr(optarg,'s') || strchr(optarg,'S') ) args->output_type |= OUTPUT_ST;
@@ -1259,7 +1269,7 @@ int main_vcfroh(int argc, char *argv[])
         fprintf(bcftools_stderr,"Number of lines overlapping with --AF-file/processed: %d/%d\n", args->ntot,nmin);
     else
         fprintf(bcftools_stderr,"Number of lines total/processed: %d/%d\n", args->ntot,nmin);
-    fprintf(bcftools_stderr,"Number of lines filtered/no AF/not biallelic/dup: %d/%d/%d/%d\n", args->nfiltered,args->nno_af,args->nnot_biallelic,args->ndup);
+    fprintf(bcftools_stderr,"Number of lines filtered/no AF/no alt/multiallelic/dup: %d/%d/%d/%d/%d\n", args->nfiltered,args->nno_af,args->nno_alt,args->nmultiallelic,args->ndup);
     if ( nmin==0 )
     {
         fprintf(bcftools_stderr,"No usable sites were found.\n");
diff --git a/bcftools/vcfsom.c b/bcftools/vcfsom.c
index ed86422..db01d24 100644
--- a/bcftools/vcfsom.c
+++ b/bcftools/vcfsom.c
@@ -1,6 +1,6 @@
 /*  vcfsom.c -- SOM (Self-Organizing Map) filtering.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2020 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -165,15 +166,16 @@ void annots_reader_close(args_t *args)
 static void som_write_map(char *prefix, som_t **som, int nsom)
 {
     FILE *fp = open_file(NULL,"w","%s.som",prefix);
-    fwrite("SOMv1",5,1,fp);
-    fwrite(&nsom,sizeof(int),1,fp);
+    size_t nw;
+    if ( (nw=fwrite("SOMv1",5,1,fp))!=5 ) error("Failed to write 5 bytes\n");
+    if ( (nw=fwrite(&nsom,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
     int i;
     for (i=0; i<nsom; i++)
     {
-        fwrite(&som[i]->size,sizeof(int),1,fp);
-        fwrite(&som[i]->kdim,sizeof(int),1,fp);
-        fwrite(som[i]->w,sizeof(double),som[i]->size*som[i]->kdim,fp);
-        fwrite(som[i]->c,sizeof(double),som[i]->size,fp);
+        if ( (nw=fwrite(&som[i]->size,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
+        if ( (nw=fwrite(&som[i]->kdim,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
+        if ( (nw=fwrite(som[i]->w,sizeof(double),som[i]->size*som[i]->kdim,fp))!=sizeof(double)*som[i]->size*som[i]->kdim ) error("Failed to write %zu bytes\n",sizeof(double)*som[i]->size*som[i]->kdim);
+        if ( (nw=fwrite(som[i]->c,sizeof(double),som[i]->size,fp))!=sizeof(double)*som[i]->size ) error("Failed to write %zu bytes\n",sizeof(double)*som[i]->size);
     }
     if ( fclose(fp) ) error("%s.som: fclose failed\n",prefix);
 }
diff --git a/bcftools/vcfsom.c.pysam.c b/bcftools/vcfsom.c.pysam.c
index b8368f6..effd352 100644
--- a/bcftools/vcfsom.c.pysam.c
+++ b/bcftools/vcfsom.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfsom.c -- SOM (Self-Organizing Map) filtering.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2020 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ THE SOFTWARE.  */
 #include <stdio.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <ctype.h>
 #include <string.h>
 #include <errno.h>
@@ -167,15 +168,16 @@ void annots_reader_close(args_t *args)
 static void som_write_map(char *prefix, som_t **som, int nsom)
 {
     FILE *fp = open_file(NULL,"w","%s.som",prefix);
-    fwrite("SOMv1",5,1,fp);
-    fwrite(&nsom,sizeof(int),1,fp);
+    size_t nw;
+    if ( (nw=fwrite("SOMv1",5,1,fp))!=5 ) error("Failed to write 5 bytes\n");
+    if ( (nw=fwrite(&nsom,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
     int i;
     for (i=0; i<nsom; i++)
     {
-        fwrite(&som[i]->size,sizeof(int),1,fp);
-        fwrite(&som[i]->kdim,sizeof(int),1,fp);
-        fwrite(som[i]->w,sizeof(double),som[i]->size*som[i]->kdim,fp);
-        fwrite(som[i]->c,sizeof(double),som[i]->size,fp);
+        if ( (nw=fwrite(&som[i]->size,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
+        if ( (nw=fwrite(&som[i]->kdim,sizeof(int),1,fp))!=sizeof(int) ) error("Failed to write %zu bytes\n",sizeof(int));
+        if ( (nw=fwrite(som[i]->w,sizeof(double),som[i]->size*som[i]->kdim,fp))!=sizeof(double)*som[i]->size*som[i]->kdim ) error("Failed to write %zu bytes\n",sizeof(double)*som[i]->size*som[i]->kdim);
+        if ( (nw=fwrite(som[i]->c,sizeof(double),som[i]->size,fp))!=sizeof(double)*som[i]->size ) error("Failed to write %zu bytes\n",sizeof(double)*som[i]->size);
     }
     if ( fclose(fp) ) error("%s.som: fclose failed\n",prefix);
 }
@@ -638,7 +640,7 @@ static void usage(void)
     fprintf(bcftools_stderr, "    -n, --ntrain-sites <int>           effective number of training sites [number of good sites]\n");
     fprintf(bcftools_stderr, "    -r, --random-seed <int>            random seed, 0 for time() [1]\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfsom(int argc, char *argv[])
diff --git a/bcftools/vcfsort.c b/bcftools/vcfsort.c
index 99aa598..7ec13fb 100644
--- a/bcftools/vcfsort.c
+++ b/bcftools/vcfsort.c
@@ -1,6 +1,6 @@
 /*  vcfsort.c -- sort subcommand
 
-   Copyright (C) 2017 Genome Research Ltd.
+   Copyright (C) 2017-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -227,7 +227,7 @@ void merge_blocks(args_t *args)
         blk_read(args, bhp, args->hdr, blk);
     }
 
-    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( bcf_hdr_write(out, args->hdr)!=0 ) clean_files_and_throw(args, "[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     while ( bhp->ndat )
     {
@@ -252,19 +252,23 @@ static void usage(args_t *args)
     fprintf(stderr, "Usage:   bcftools sort [OPTIONS] <FILE.vcf>\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "    -m, --max-mem <float>[kMG]    maximum memory to use [768M]\n");    // using metric units, 1M=1e6
-    fprintf(stderr, "    -o, --output-file <file>      output file name [stdout]\n");
-    fprintf(stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(stderr, "    -T, --temp-dir <dir>          temporary files [/tmp/bcftools-sort.XXXXXX]\n");
+    fprintf(stderr, "    -m, --max-mem FLOAT[kMG]    maximum memory to use [768M]\n");    // using metric units, 1M=1e6
+    fprintf(stderr, "    -o, --output FILE           output file name [stdout]\n");
+    fprintf(stderr, "    -O, --output-type b|u|z|v   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+#ifdef _WIN32
+    fprintf(stderr, "    -T, --temp-dir DIR          temporary files [/bcftools.XXXXXX]\n");
+#else
+    fprintf(stderr, "    -T, --temp-dir DIR          temporary files [/tmp/bcftools.XXXXXX]\n");
+#endif
     fprintf(stderr, "\n");
     exit(1);
 }
 
-size_t parse_mem_string(char *str) 
+size_t parse_mem_string(const char *str) 
 {
     char *tmp;
     double mem = strtod(str, &tmp);
-    if ( tmp==str ) error("Could not parse: --max-mem %s\n", str);
+    if ( tmp==str ) error("Could not parse the memory string: \"%s\"\n", str);
     if ( !strcasecmp("k",tmp) ) mem *= 1000;
     else if ( !strcasecmp("m",tmp) ) mem *= 1000*1000;
     else if ( !strcasecmp("g",tmp) ) mem *= 1000*1000*1000;
@@ -274,21 +278,8 @@ size_t parse_mem_string(char *str)
 void mkdir_p(const char *fmt, ...);
 static void init(args_t *args)
 {
-#ifdef _WIN32
-    char tmp_path[MAX_PATH];
-    int ret = GetTempPath(MAX_PATH, tmp_path);
-    if (!ret || ret > MAX_PATH)
-        error("Could not get the path to the temporary folder\n");
-    if (strlen(tmp_path) + strlen("/bcftools-sort.XXXXXX") >= MAX_PATH)
-        error("Full path to the temporary folder is too long\n");
-    strcat(tmp_path, "/bcftools-sort.XXXXXX");
-    args->tmp_dir = strdup(tmp_path);
-#else
-    args->tmp_dir = args->tmp_dir ? strdup(args->tmp_dir) : strdup("/tmp/bcftools-sort.XXXXXX");
-#endif
-    size_t len = strlen(args->tmp_dir);
-    if ( !strcmp("XXXXXX",args->tmp_dir+len-6) )
-    {
+    args->tmp_dir = init_tmp_prefix(args->tmp_dir);
+
 #ifdef _WIN32
         int ret = mkdir(mktemp(args->tmp_dir), 0700);
         if ( ret ) error("mkdir(%s) failed: %s\n", args->tmp_dir,strerror(errno));
@@ -298,10 +289,6 @@ static void init(args_t *args)
         int ret = chmod(tmp, S_IRUSR|S_IWUSR|S_IXUSR);
         if ( ret ) error("chmod(%s,S_IRUSR|S_IWUSR|S_IXUSR) failed: %s\n", args->tmp_dir,strerror(errno));
 #endif
-    }
-    else {
-        mkdir_p("%s/",args->tmp_dir);
-    }
 
     fprintf(stderr,"Writing to %s\n", args->tmp_dir);
 }
@@ -326,6 +313,7 @@ int main_sort(int argc, char *argv[])
         {"temp-dir",required_argument,NULL,'T'},
         {"output-type",required_argument,NULL,'O'},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {"help",no_argument,NULL,'h'},
         {0,0,0,0}
     };
diff --git a/bcftools/vcfsort.c.pysam.c b/bcftools/vcfsort.c.pysam.c
index 542fc28..1fd74d3 100644
--- a/bcftools/vcfsort.c.pysam.c
+++ b/bcftools/vcfsort.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfsort.c -- sort subcommand
 
-   Copyright (C) 2017 Genome Research Ltd.
+   Copyright (C) 2017-2021 Genome Research Ltd.
 
    Author: Petr Danecek <pd3@sanger.ac.uk>
    
@@ -90,7 +90,7 @@ void clean_files_and_throw(args_t *args, const char *format, ...)
     vfprintf(bcftools_stderr, format, ap);
     va_end(ap);
     clean_files(args);
-    exit(-1);
+    bcftools_exit(-1);
 }
 
 int cmp_bcf_pos(const void *aptr, const void *bptr)
@@ -229,7 +229,7 @@ void merge_blocks(args_t *args)
         blk_read(args, bhp, args->hdr, blk);
     }
 
-    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
+    htsFile *out = hts_open(args->output_fname, hts_bcf_wmode2(args->output_type,args->output_fname));
     if ( bcf_hdr_write(out, args->hdr)!=0 ) clean_files_and_throw(args, "[%s] Error: cannot write to %s\n", __func__,args->output_fname);
     while ( bhp->ndat )
     {
@@ -254,19 +254,23 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "Usage:   bcftools sort [OPTIONS] <FILE.vcf>\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "    -m, --max-mem <float>[kMG]    maximum memory to use [768M]\n");    // using metric units, 1M=1e6
-    fprintf(bcftools_stderr, "    -o, --output-file <file>      output file name [bcftools_stdout]\n");
-    fprintf(bcftools_stderr, "    -O, --output-type <b|u|z|v>   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
-    fprintf(bcftools_stderr, "    -T, --temp-dir <dir>          temporary files [/tmp/bcftools-sort.XXXXXX]\n");
+    fprintf(bcftools_stderr, "    -m, --max-mem FLOAT[kMG]    maximum memory to use [768M]\n");    // using metric units, 1M=1e6
+    fprintf(bcftools_stderr, "    -o, --output FILE           output file name [bcftools_stdout]\n");
+    fprintf(bcftools_stderr, "    -O, --output-type b|u|z|v   b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+#ifdef _WIN32
+    fprintf(bcftools_stderr, "    -T, --temp-dir DIR          temporary files [/bcftools.XXXXXX]\n");
+#else
+    fprintf(bcftools_stderr, "    -T, --temp-dir DIR          temporary files [/tmp/bcftools.XXXXXX]\n");
+#endif
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
-size_t parse_mem_string(char *str) 
+size_t parse_mem_string(const char *str) 
 {
     char *tmp;
     double mem = strtod(str, &tmp);
-    if ( tmp==str ) error("Could not parse: --max-mem %s\n", str);
+    if ( tmp==str ) error("Could not parse the memory string: \"%s\"\n", str);
     if ( !strcasecmp("k",tmp) ) mem *= 1000;
     else if ( !strcasecmp("m",tmp) ) mem *= 1000*1000;
     else if ( !strcasecmp("g",tmp) ) mem *= 1000*1000*1000;
@@ -276,21 +280,8 @@ size_t parse_mem_string(char *str)
 void mkdir_p(const char *fmt, ...);
 static void init(args_t *args)
 {
-#ifdef _WIN32
-    char tmp_path[MAX_PATH];
-    int ret = GetTempPath(MAX_PATH, tmp_path);
-    if (!ret || ret > MAX_PATH)
-        error("Could not get the path to the temporary folder\n");
-    if (strlen(tmp_path) + strlen("/bcftools-sort.XXXXXX") >= MAX_PATH)
-        error("Full path to the temporary folder is too long\n");
-    strcat(tmp_path, "/bcftools-sort.XXXXXX");
-    args->tmp_dir = strdup(tmp_path);
-#else
-    args->tmp_dir = args->tmp_dir ? strdup(args->tmp_dir) : strdup("/tmp/bcftools-sort.XXXXXX");
-#endif
-    size_t len = strlen(args->tmp_dir);
-    if ( !strcmp("XXXXXX",args->tmp_dir+len-6) )
-    {
+    args->tmp_dir = init_tmp_prefix(args->tmp_dir);
+
 #ifdef _WIN32
         int ret = mkdir(mktemp(args->tmp_dir), 0700);
         if ( ret ) error("mkdir(%s) failed: %s\n", args->tmp_dir,strerror(errno));
@@ -300,10 +291,6 @@ static void init(args_t *args)
         int ret = chmod(tmp, S_IRUSR|S_IWUSR|S_IXUSR);
         if ( ret ) error("chmod(%s,S_IRUSR|S_IWUSR|S_IXUSR) failed: %s\n", args->tmp_dir,strerror(errno));
 #endif
-    }
-    else {
-        mkdir_p("%s/",args->tmp_dir);
-    }
 
     fprintf(bcftools_stderr,"Writing to %s\n", args->tmp_dir);
 }
@@ -328,6 +315,7 @@ int main_sort(int argc, char *argv[])
         {"temp-dir",required_argument,NULL,'T'},
         {"output-type",required_argument,NULL,'O'},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {"help",no_argument,NULL,'h'},
         {0,0,0,0}
     };
diff --git a/bcftools/vcfstats.c b/bcftools/vcfstats.c
index ffa367d..601c557 100644
--- a/bcftools/vcfstats.c
+++ b/bcftools/vcfstats.c
@@ -1,6 +1,6 @@
 /*  vcfstats.c -- Produces stats which can be plotted using plot-vcfstats.
 
-    Copyright (C) 2012-2017 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -31,6 +31,7 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <math.h>
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
@@ -40,6 +41,7 @@ THE SOFTWARE.  */
 #include "bcftools.h"
 #include "filter.h"
 #include "bin.h"
+#include "dist.h"
 
 // Logic of the filters: include or exclude sites which match the filters?
 #define FLT_INCLUDE 1
@@ -57,7 +59,7 @@ typedef struct
     float min, max;
     uint64_t *vals_ts, *vals_tv;
     void *val;
-    int nbins, type, m_val;
+    int nbins, type, m_val, idx;
 }
 user_stats_t;
 
@@ -81,7 +83,9 @@ typedef struct
     #endif
     int ts_alt1, tv_alt1;
     #if QUAL_STATS
-        int *qual_ts, *qual_tv, *qual_snps, *qual_indels;
+        // Values are rounded to one significant digit and 1 is added (Q*10+1); missing and negative values go in the first bin
+        // Only SNPs that are the 1st alternate allele are counted
+        dist_t *qual_ts, *qual_tv, *qual_indels;
     #endif
     int *insertions, *deletions, m_indel;   // maximum indel length
     int in_frame, out_frame, na_frame, in_frame_alt1, out_frame_alt1, na_frame_alt1;
@@ -186,13 +190,6 @@ static inline int idist_i2bin(idist_t *d, int i)
     return i-1+d->min;
 }
 
-static inline int clip_nonnegative(float x, int limit)
-{
-    if (x >= limit || isnan(x)) return limit - 1;
-    else if (x <= 0.0) return 0;
-    else return (int) x;
-}
-
 #define IC_DBG 0
 #if IC_DBG
 static void _indel_ctx_print1(_idc1_t *idc)
@@ -349,12 +346,29 @@ static void add_user_stats(args_t *args, char *str)
     args->usr = (user_stats_t*) realloc(args->usr,sizeof(user_stats_t)*args->nusr);
     user_stats_t *usr = &args->usr[args->nusr-1];
     memset(usr,0,sizeof(*usr));
-    usr->min  = 0;
-    usr->max  = 1;
+    usr->min   = 0;
+    usr->max   = 1;
     usr->nbins = 100;
+    usr->idx   = 0;
 
     char *tmp = str;
     while ( *tmp && *tmp!=':' ) tmp++;
+
+    // Tag with an index or just tag? (e.g. PV4[1] vs DP)
+    if ( tmp > str && tmp[-1]==']' )
+    {
+        char *ptr = tmp;
+        while ( ptr>str && *ptr!='[' ) ptr--;
+        if ( *ptr=='[' )
+        {
+            char *ptr2;
+            usr->idx = strtol(ptr+1, &ptr2, 10);
+            if ( ptr+1==ptr2 || ptr2 != tmp-1 ) error("Could not parse the index in \"%s\" (ptr=%s;ptr2=%s(%p),tmp=%s(%p),idx=%d)\n", str,ptr,ptr2,ptr2,tmp,tmp,usr->idx);
+            if ( usr->idx<0 ) error("Error: negative index is not allowed: \"%s\"\n", str);
+            *ptr = 0;
+        }
+    }
+
     usr->tag = (char*)calloc(tmp-str+2,sizeof(char));
     memcpy(usr->tag,str,tmp-str);
 
@@ -465,10 +479,9 @@ static void init_stats(args_t *args)
         int j;
         for (j=0; j<3; j++) stats->af_repeats[j] = (int*) calloc(args->m_af,sizeof(int));
         #if QUAL_STATS
-            stats->qual_ts     = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_tv     = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_snps   = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_indels = (int*) calloc(args->m_qual,sizeof(int));
+            stats->qual_ts     = dist_init(5);
+            stats->qual_tv     = dist_init(5);
+            stats->qual_indels = dist_init(5);
         #endif
         if ( args->files->n_smpl )
         {
@@ -548,10 +561,9 @@ static void destroy_stats(args_t *args)
         for (j=0; j<3; j++)
             if (stats->af_repeats[j]) free(stats->af_repeats[j]);
         #if QUAL_STATS
-            if (stats->qual_ts) free(stats->qual_ts);
-            if (stats->qual_tv) free(stats->qual_tv);
-            if (stats->qual_snps) free(stats->qual_snps);
-            if (stats->qual_indels) free(stats->qual_indels);
+            if (stats->qual_ts) dist_destroy(stats->qual_ts);
+            if (stats->qual_tv) dist_destroy(stats->qual_tv);
+            if (stats->qual_indels) dist_destroy(stats->qual_indels);
         #endif
         #if HWE_STATS
             free(stats->af_hwe);
@@ -678,8 +690,8 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     bcf1_t *line = reader->buffer[0];
 
     #if QUAL_STATS
-        int iqual = clip_nonnegative(line->qual, args->m_qual);
-        stats->qual_indels[iqual]++;
+        int iqual = (isnan(line->qual) || line->qual<0) ? 0 : 1 + (int)(line->qual*10);
+        dist_insert(stats->qual_indels, iqual);
     #endif
 
     // Check if the indel is near an exon for the frameshift statistics
@@ -780,7 +792,7 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
 
 static void do_user_stats(stats_t *stats, bcf_sr_t *reader, int is_ts)
 {
-    int i;
+    int i, nval;
     for (i=0; i<stats->nusr; i++)
     {
         user_stats_t *usr = &stats->usr[i];
@@ -788,13 +800,15 @@ static void do_user_stats(stats_t *stats, bcf_sr_t *reader, int is_ts)
         float val;
         if ( usr->type==BCF_HT_REAL )
         {
-            if ( bcf_get_info_float(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val)<=0 ) continue;
-            val = ((float*)usr->val)[0];
+            if ( (nval=bcf_get_info_float(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val))<=0 ) continue;
+            if ( usr->idx >= nval ) continue;
+            val = ((float*)usr->val)[usr->idx];
         }
         else
         {
-            if ( bcf_get_info_int32(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val)<=0 ) continue;
-            val = ((int32_t*)usr->val)[0];
+            if ( (nval=bcf_get_info_int32(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val))<=0 ) continue;
+            if ( usr->idx >= nval ) continue;
+            val = ((int32_t*)usr->val)[usr->idx];
         }
         int idx;
         if ( val<=usr->min ) idx = 0;
@@ -813,8 +827,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     if ( ref<0 ) return;
 
     #if QUAL_STATS
-        int iqual = clip_nonnegative(line->qual, args->m_qual);
-        stats->qual_snps[iqual]++;
+        int iqual = (isnan(line->qual) || line->qual<0) ? 0 : 1 + (int)(line->qual*10);
     #endif
 
     int i;
@@ -833,7 +846,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
             {
                 stats->ts_alt1++;
                 #if QUAL_STATS
-                    stats->qual_ts[iqual]++;
+                    dist_insert(stats->qual_ts,iqual);
                 #endif
                 do_user_stats(stats, reader, 1);
             }
@@ -845,7 +858,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
             {
                 stats->tv_alt1++;
                 #if QUAL_STATS
-                    stats->qual_tv[iqual]++;
+                    dist_insert(stats->qual_tv,iqual);
                 #endif
                 do_user_stats(stats, reader, 0);
             }
@@ -1354,21 +1367,50 @@ static void print_stats(args_t *args)
         }
     }
     #if QUAL_STATS
-        printf("# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
+        printf("# QUAL, Stats by quality\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
         for (id=0; id<args->nstats; id++)
         {
             stats_t *stats = &args->stats[id];
-            for (i=0; i<args->m_qual; i++)
+            int ndist_ts = dist_nbins(stats->qual_ts);
+            int ndist_tv = dist_nbins(stats->qual_tv);
+            int ndist_in = dist_nbins(stats->qual_indels);
+            int ndist_max = ndist_ts;
+            if ( ndist_max < ndist_tv ) ndist_max = ndist_tv;
+            if ( ndist_max < ndist_in ) ndist_max = ndist_in;
+            uint32_t beg, end;
+            uint32_t nts, ntv, nin;
+            for (i=0; i<ndist_max; i++)
             {
-                if ( stats->qual_snps[i]+stats->qual_ts[i]+stats->qual_tv[i]+stats->qual_indels[i] == 0  ) continue;
-                printf("QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
+                nts = ntv = nin = 0;
+                float qval = -1;
+                if ( i < ndist_ts )
+                {
+                    nts = dist_get(stats->qual_ts, i, &beg, &end);
+                    qval = beg>0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( i < ndist_tv )
+                {
+                    ntv = dist_get(stats->qual_tv, i, &beg, &end);
+                    if ( qval==-1 ) qval = beg > 0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( i < ndist_in )
+                {
+                    nin = dist_get(stats->qual_indels, i, &beg, &end);
+                    if ( qval==-1 ) qval = beg > 0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( nts+ntv+nin==0 ) continue;
+
+                printf("QUAL\t%d\t",id);
+                if ( qval==-1 ) printf(".");
+                else printf("%.1f",qval);
+                printf("\t%d\t%d\t%d\t%d\n",nts+ntv,nts,ntv,nin);
             }
         }
     #endif
     for (i=0; i<args->nusr; i++)
     {
-        printf("# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
-            args->usr[i].tag,args->usr[i].tag,args->usr[i].tag,args->usr[i].tag);
+        printf("# USR:%s/%d\t[2]id\t[3]%s/%d\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
+            args->usr[i].tag,args->usr[i].idx,args->usr[i].tag,args->usr[i].idx);
         for (id=0; id<args->nstats; id++)
         {
             user_stats_t *usr = &args->stats[id].usr[i];
@@ -1377,8 +1419,8 @@ static void print_stats(args_t *args)
             {
                 if ( usr->vals_ts[j]+usr->vals_tv[j] == 0 ) continue;   // skip empty bins
                 float val = usr->min + (usr->max - usr->min)*j/(usr->nbins-1);
-                const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s\t%d\t%.0f\t%d\t%d\t%d\n";
-                printf(fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
+                const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s/%d\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s/%d\t%d\t%.0f\t%d\t%d\t%d\n";
+                printf(fmt,usr->tag,usr->idx,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
             }
         }
     }
@@ -1482,10 +1524,10 @@ static void print_stats(args_t *args)
                 printf("# NRD and discordance is calculated as follows:\n");
                 printf("#   m .. number of matches\n");
                 printf("#   x .. number of mismatches\n");
-                printf("#   NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
-                printf("#   RR discordance = xRR / (xRR + mRR)\n");
-                printf("#   RA discordance = xRA / (xRA + mRA)\n");
-                printf("#   AA discordance = xAA / (xAA + mAA)\n");
+                printf("#   NRD = 100 * (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
+                printf("#   RR discordance = 100 * xRR / (xRR + mRR)\n");
+                printf("#   RA discordance = 100 * xRA / (xRA + mRA)\n");
+                printf("#   AA discordance = 100 * xAA / (xAA + mAA)\n");
                 printf("# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
             }
             else
@@ -1704,26 +1746,27 @@ static void usage(void)
     fprintf(stderr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
     fprintf(stderr, "\n");
     fprintf(stderr, "Options:\n");
-    fprintf(stderr, "        --af-bins <list>               allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
-    fprintf(stderr, "        --af-tag <string>              allele frequency tag to use, by default estimated from AN,AC or GT\n");
-    fprintf(stderr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
-    fprintf(stderr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
-    fprintf(stderr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
-    fprintf(stderr, "    -e, --exclude <expr>               exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "    -E, --exons <file.gz>              tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
-    fprintf(stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(stderr, "    -F, --fasta-ref <file>             faidx indexed reference sequence file to determine INDEL context\n");
-    fprintf(stderr, "    -i, --include <expr>               select sites for which the expression is true (see man page for details)\n");
-    fprintf(stderr, "    -I, --split-by-ID                  collect stats for sites with ID separately (known vs novel)\n");
-    fprintf(stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(stderr, "    -s, --samples <list>               list of samples for sample stats, \"-\" to include all samples\n");
-    fprintf(stderr, "    -S, --samples-file <file>          file of samples to include\n");
-    fprintf(stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
-    fprintf(stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
-    fprintf(stderr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
-    fprintf(stderr, "        --threads <int>                use multithreading with <int> worker threads [0]\n");
-    fprintf(stderr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
+    fprintf(stderr, "        --af-bins LIST               Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
+    fprintf(stderr, "        --af-tag STRING              Allele frequency tag to use, by default estimated from AN,AC or GT\n");
+    fprintf(stderr, "    -1, --1st-allele-only            Include only 1st allele at multiallelic sites\n");
+    fprintf(stderr, "    -c, --collapse STRING            Treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+    fprintf(stderr, "    -d, --depth INT,INT,INT          Depth distribution: min,max,bin size [0,500,1]\n");
+    fprintf(stderr, "    -e, --exclude EXPR               Exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "    -E, --exons FILE.gz              Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, bgzip compressed)\n");
+    fprintf(stderr, "    -f, --apply-filters LIST         Require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(stderr, "    -F, --fasta-ref FILE             Faidx indexed reference sequence file to determine INDEL context\n");
+    fprintf(stderr, "    -i, --include EXPR               Select sites for which the expression is true (see man page for details)\n");
+    fprintf(stderr, "    -I, --split-by-ID                Collect stats for sites with ID separately (known vs novel)\n");
+    fprintf(stderr, "    -r, --regions REGION             Restrict to comma-separated list of regions\n");
+    fprintf(stderr, "    -R, --regions-file FILE          Restrict to regions listed in a file\n");
+    fprintf(stderr, "    -s, --samples LIST               List of samples for sample stats, \"-\" to include all samples\n");
+    fprintf(stderr, "    -S, --samples-file FILE          File of samples to include\n");
+    fprintf(stderr, "    -t, --targets REGION             Similar to -r but streams rather than index-jumps\n");
+    fprintf(stderr, "    -T, --targets-file FILE          Similar to -R but streams rather than index-jumps\n");
+    fprintf(stderr, "    -u, --user-tstv TAG[:min:max:n]  Collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+    fprintf(stderr, "                                       A subfield can be selected as e.g. 'PV4[0]', here the first value of the PV4 tag\n");
+    fprintf(stderr, "        --threads INT                Use multithreading with <int> worker threads [0]\n");
+    fprintf(stderr, "    -v, --verbose                    Produce verbose per-site and per-sample output\n");
     fprintf(stderr, "\n");
     exit(1);
 }
@@ -1795,8 +1838,12 @@ int main_vcfstats(int argc, char *argv[])
             case 's': args->samples_list = optarg; break;
             case 'S': args->samples_list = optarg; args->samples_is_file = 1; break;
             case 'I': args->split_by_id = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 'h':
             case '?': usage(); break;
diff --git a/bcftools/vcfstats.c.pysam.c b/bcftools/vcfstats.c.pysam.c
index c52d016..050a68a 100644
--- a/bcftools/vcfstats.c.pysam.c
+++ b/bcftools/vcfstats.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfstats.c -- Produces stats which can be plotted using plot-vcfstats.
 
-    Copyright (C) 2012-2017 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -33,6 +33,7 @@ THE SOFTWARE.  */
 #include <stdarg.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <assert.h>
 #include <math.h>
 #include <htslib/vcf.h>
 #include <htslib/synced_bcf_reader.h>
@@ -42,6 +43,7 @@ THE SOFTWARE.  */
 #include "bcftools.h"
 #include "filter.h"
 #include "bin.h"
+#include "dist.h"
 
 // Logic of the filters: include or exclude sites which match the filters?
 #define FLT_INCLUDE 1
@@ -59,7 +61,7 @@ typedef struct
     float min, max;
     uint64_t *vals_ts, *vals_tv;
     void *val;
-    int nbins, type, m_val;
+    int nbins, type, m_val, idx;
 }
 user_stats_t;
 
@@ -83,7 +85,9 @@ typedef struct
     #endif
     int ts_alt1, tv_alt1;
     #if QUAL_STATS
-        int *qual_ts, *qual_tv, *qual_snps, *qual_indels;
+        // Values are rounded to one significant digit and 1 is added (Q*10+1); missing and negative values go in the first bin
+        // Only SNPs that are the 1st alternate allele are counted
+        dist_t *qual_ts, *qual_tv, *qual_indels;
     #endif
     int *insertions, *deletions, m_indel;   // maximum indel length
     int in_frame, out_frame, na_frame, in_frame_alt1, out_frame_alt1, na_frame_alt1;
@@ -188,13 +192,6 @@ static inline int idist_i2bin(idist_t *d, int i)
     return i-1+d->min;
 }
 
-static inline int clip_nonnegative(float x, int limit)
-{
-    if (x >= limit || isnan(x)) return limit - 1;
-    else if (x <= 0.0) return 0;
-    else return (int) x;
-}
-
 #define IC_DBG 0
 #if IC_DBG
 static void _indel_ctx_print1(_idc1_t *idc)
@@ -351,12 +348,29 @@ static void add_user_stats(args_t *args, char *str)
     args->usr = (user_stats_t*) realloc(args->usr,sizeof(user_stats_t)*args->nusr);
     user_stats_t *usr = &args->usr[args->nusr-1];
     memset(usr,0,sizeof(*usr));
-    usr->min  = 0;
-    usr->max  = 1;
+    usr->min   = 0;
+    usr->max   = 1;
     usr->nbins = 100;
+    usr->idx   = 0;
 
     char *tmp = str;
     while ( *tmp && *tmp!=':' ) tmp++;
+
+    // Tag with an index or just tag? (e.g. PV4[1] vs DP)
+    if ( tmp > str && tmp[-1]==']' )
+    {
+        char *ptr = tmp;
+        while ( ptr>str && *ptr!='[' ) ptr--;
+        if ( *ptr=='[' )
+        {
+            char *ptr2;
+            usr->idx = strtol(ptr+1, &ptr2, 10);
+            if ( ptr+1==ptr2 || ptr2 != tmp-1 ) error("Could not parse the index in \"%s\" (ptr=%s;ptr2=%s(%p),tmp=%s(%p),idx=%d)\n", str,ptr,ptr2,ptr2,tmp,tmp,usr->idx);
+            if ( usr->idx<0 ) error("Error: negative index is not allowed: \"%s\"\n", str);
+            *ptr = 0;
+        }
+    }
+
     usr->tag = (char*)calloc(tmp-str+2,sizeof(char));
     memcpy(usr->tag,str,tmp-str);
 
@@ -467,10 +481,9 @@ static void init_stats(args_t *args)
         int j;
         for (j=0; j<3; j++) stats->af_repeats[j] = (int*) calloc(args->m_af,sizeof(int));
         #if QUAL_STATS
-            stats->qual_ts     = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_tv     = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_snps   = (int*) calloc(args->m_qual,sizeof(int));
-            stats->qual_indels = (int*) calloc(args->m_qual,sizeof(int));
+            stats->qual_ts     = dist_init(5);
+            stats->qual_tv     = dist_init(5);
+            stats->qual_indels = dist_init(5);
         #endif
         if ( args->files->n_smpl )
         {
@@ -550,10 +563,9 @@ static void destroy_stats(args_t *args)
         for (j=0; j<3; j++)
             if (stats->af_repeats[j]) free(stats->af_repeats[j]);
         #if QUAL_STATS
-            if (stats->qual_ts) free(stats->qual_ts);
-            if (stats->qual_tv) free(stats->qual_tv);
-            if (stats->qual_snps) free(stats->qual_snps);
-            if (stats->qual_indels) free(stats->qual_indels);
+            if (stats->qual_ts) dist_destroy(stats->qual_ts);
+            if (stats->qual_tv) dist_destroy(stats->qual_tv);
+            if (stats->qual_indels) dist_destroy(stats->qual_indels);
         #endif
         #if HWE_STATS
             free(stats->af_hwe);
@@ -680,8 +692,8 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     bcf1_t *line = reader->buffer[0];
 
     #if QUAL_STATS
-        int iqual = clip_nonnegative(line->qual, args->m_qual);
-        stats->qual_indels[iqual]++;
+        int iqual = (isnan(line->qual) || line->qual<0) ? 0 : 1 + (int)(line->qual*10);
+        dist_insert(stats->qual_indels, iqual);
     #endif
 
     // Check if the indel is near an exon for the frameshift statistics
@@ -782,7 +794,7 @@ static void do_indel_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
 
 static void do_user_stats(stats_t *stats, bcf_sr_t *reader, int is_ts)
 {
-    int i;
+    int i, nval;
     for (i=0; i<stats->nusr; i++)
     {
         user_stats_t *usr = &stats->usr[i];
@@ -790,13 +802,15 @@ static void do_user_stats(stats_t *stats, bcf_sr_t *reader, int is_ts)
         float val;
         if ( usr->type==BCF_HT_REAL )
         {
-            if ( bcf_get_info_float(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val)<=0 ) continue;
-            val = ((float*)usr->val)[0];
+            if ( (nval=bcf_get_info_float(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val))<=0 ) continue;
+            if ( usr->idx >= nval ) continue;
+            val = ((float*)usr->val)[usr->idx];
         }
         else
         {
-            if ( bcf_get_info_int32(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val)<=0 ) continue;
-            val = ((int32_t*)usr->val)[0];
+            if ( (nval=bcf_get_info_int32(reader->header,reader->buffer[0],usr->tag,&usr->val,&usr->m_val))<=0 ) continue;
+            if ( usr->idx >= nval ) continue;
+            val = ((int32_t*)usr->val)[usr->idx];
         }
         int idx;
         if ( val<=usr->min ) idx = 0;
@@ -815,8 +829,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
     if ( ref<0 ) return;
 
     #if QUAL_STATS
-        int iqual = clip_nonnegative(line->qual, args->m_qual);
-        stats->qual_snps[iqual]++;
+        int iqual = (isnan(line->qual) || line->qual<0) ? 0 : 1 + (int)(line->qual*10);
     #endif
 
     int i;
@@ -835,7 +848,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
             {
                 stats->ts_alt1++;
                 #if QUAL_STATS
-                    stats->qual_ts[iqual]++;
+                    dist_insert(stats->qual_ts,iqual);
                 #endif
                 do_user_stats(stats, reader, 1);
             }
@@ -847,7 +860,7 @@ static void do_snp_stats(args_t *args, stats_t *stats, bcf_sr_t *reader)
             {
                 stats->tv_alt1++;
                 #if QUAL_STATS
-                    stats->qual_tv[iqual]++;
+                    dist_insert(stats->qual_tv,iqual);
                 #endif
                 do_user_stats(stats, reader, 0);
             }
@@ -872,7 +885,7 @@ static inline void update_dvaf(stats_t *stats, bcf1_t *line, bcf_fmt_t *fmt, int
         case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
         case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
         case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
-        default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt->type); exit(1); break;
+        default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt->type); bcftools_exit(1); break;
     }
     #undef BRANCH_INT
 
@@ -1020,7 +1033,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
             case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
             case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
             case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
-            default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
+            default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); bcftools_exit(1); break;
         }
         #undef BRANCH_INT
     }
@@ -1051,7 +1064,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
             case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
             case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
             case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
-            default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
+            default: fprintf(bcftools_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); bcftools_exit(1); break;
         }
         #undef BRANCH_INT
     }
@@ -1356,21 +1369,50 @@ static void print_stats(args_t *args)
         }
     }
     #if QUAL_STATS
-        fprintf(bcftools_stdout, "# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
+        fprintf(bcftools_stdout, "# QUAL, Stats by quality\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
         for (id=0; id<args->nstats; id++)
         {
             stats_t *stats = &args->stats[id];
-            for (i=0; i<args->m_qual; i++)
+            int ndist_ts = dist_nbins(stats->qual_ts);
+            int ndist_tv = dist_nbins(stats->qual_tv);
+            int ndist_in = dist_nbins(stats->qual_indels);
+            int ndist_max = ndist_ts;
+            if ( ndist_max < ndist_tv ) ndist_max = ndist_tv;
+            if ( ndist_max < ndist_in ) ndist_max = ndist_in;
+            uint32_t beg, end;
+            uint32_t nts, ntv, nin;
+            for (i=0; i<ndist_max; i++)
             {
-                if ( stats->qual_snps[i]+stats->qual_ts[i]+stats->qual_tv[i]+stats->qual_indels[i] == 0  ) continue;
-                fprintf(bcftools_stdout, "QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
+                nts = ntv = nin = 0;
+                float qval = -1;
+                if ( i < ndist_ts )
+                {
+                    nts = dist_get(stats->qual_ts, i, &beg, &end);
+                    qval = beg>0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( i < ndist_tv )
+                {
+                    ntv = dist_get(stats->qual_tv, i, &beg, &end);
+                    if ( qval==-1 ) qval = beg > 0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( i < ndist_in )
+                {
+                    nin = dist_get(stats->qual_indels, i, &beg, &end);
+                    if ( qval==-1 ) qval = beg > 0 ? 0.1*(beg - 1) : -1;
+                }
+                if ( nts+ntv+nin==0 ) continue;
+
+                fprintf(bcftools_stdout, "QUAL\t%d\t",id);
+                if ( qval==-1 ) fprintf(bcftools_stdout, ".");
+                else fprintf(bcftools_stdout, "%.1f",qval);
+                fprintf(bcftools_stdout, "\t%d\t%d\t%d\t%d\n",nts+ntv,nts,ntv,nin);
             }
         }
     #endif
     for (i=0; i<args->nusr; i++)
     {
-        fprintf(bcftools_stdout, "# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
-            args->usr[i].tag,args->usr[i].tag,args->usr[i].tag,args->usr[i].tag);
+        fprintf(bcftools_stdout, "# USR:%s/%d\t[2]id\t[3]%s/%d\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
+            args->usr[i].tag,args->usr[i].idx,args->usr[i].tag,args->usr[i].idx);
         for (id=0; id<args->nstats; id++)
         {
             user_stats_t *usr = &args->stats[id].usr[i];
@@ -1379,8 +1421,8 @@ static void print_stats(args_t *args)
             {
                 if ( usr->vals_ts[j]+usr->vals_tv[j] == 0 ) continue;   // skip empty bins
                 float val = usr->min + (usr->max - usr->min)*j/(usr->nbins-1);
-                const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s\t%d\t%.0f\t%d\t%d\t%d\n";
-                fprintf(bcftools_stdout, fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
+                const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s/%d\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s/%d\t%d\t%.0f\t%d\t%d\t%d\n";
+                fprintf(bcftools_stdout, fmt,usr->tag,usr->idx,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
             }
         }
     }
@@ -1484,10 +1526,10 @@ static void print_stats(args_t *args)
                 fprintf(bcftools_stdout, "# NRD and discordance is calculated as follows:\n");
                 fprintf(bcftools_stdout, "#   m .. number of matches\n");
                 fprintf(bcftools_stdout, "#   x .. number of mismatches\n");
-                fprintf(bcftools_stdout, "#   NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
-                fprintf(bcftools_stdout, "#   RR discordance = xRR / (xRR + mRR)\n");
-                fprintf(bcftools_stdout, "#   RA discordance = xRA / (xRA + mRA)\n");
-                fprintf(bcftools_stdout, "#   AA discordance = xAA / (xAA + mAA)\n");
+                fprintf(bcftools_stdout, "#   NRD = 100 * (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
+                fprintf(bcftools_stdout, "#   RR discordance = 100 * xRR / (xRR + mRR)\n");
+                fprintf(bcftools_stdout, "#   RA discordance = 100 * xRA / (xRA + mRA)\n");
+                fprintf(bcftools_stdout, "#   AA discordance = 100 * xAA / (xAA + mAA)\n");
                 fprintf(bcftools_stdout, "# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
             }
             else
@@ -1706,28 +1748,29 @@ static void usage(void)
     fprintf(bcftools_stderr, "Usage:   bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
     fprintf(bcftools_stderr, "\n");
     fprintf(bcftools_stderr, "Options:\n");
-    fprintf(bcftools_stderr, "        --af-bins <list>               allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
-    fprintf(bcftools_stderr, "        --af-tag <string>              allele frequency tag to use, by default estimated from AN,AC or GT\n");
-    fprintf(bcftools_stderr, "    -1, --1st-allele-only              include only 1st allele at multiallelic sites\n");
-    fprintf(bcftools_stderr, "    -c, --collapse <string>            treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
-    fprintf(bcftools_stderr, "    -d, --depth <int,int,int>          depth distribution: min,max,bin size [0,500,1]\n");
-    fprintf(bcftools_stderr, "    -e, --exclude <expr>               exclude sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "    -E, --exons <file.gz>              tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
-    fprintf(bcftools_stderr, "    -f, --apply-filters <list>         require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
-    fprintf(bcftools_stderr, "    -F, --fasta-ref <file>             faidx indexed reference sequence file to determine INDEL context\n");
-    fprintf(bcftools_stderr, "    -i, --include <expr>               select sites for which the expression is true (see man page for details)\n");
-    fprintf(bcftools_stderr, "    -I, --split-by-ID                  collect stats for sites with ID separately (known vs novel)\n");
-    fprintf(bcftools_stderr, "    -r, --regions <region>             restrict to comma-separated list of regions\n");
-    fprintf(bcftools_stderr, "    -R, --regions-file <file>          restrict to regions listed in a file\n");
-    fprintf(bcftools_stderr, "    -s, --samples <list>               list of samples for sample stats, \"-\" to include all samples\n");
-    fprintf(bcftools_stderr, "    -S, --samples-file <file>          file of samples to include\n");
-    fprintf(bcftools_stderr, "    -t, --targets <region>             similar to -r but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "    -T, --targets-file <file>          similar to -R but streams rather than index-jumps\n");
-    fprintf(bcftools_stderr, "    -u, --user-tstv <TAG[:min:max:n]>  collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
-    fprintf(bcftools_stderr, "        --threads <int>                use multithreading with <int> worker threads [0]\n");
-    fprintf(bcftools_stderr, "    -v, --verbose                      produce verbose per-site and per-sample output\n");
+    fprintf(bcftools_stderr, "        --af-bins LIST               Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\\n0.5\\n1)\n");
+    fprintf(bcftools_stderr, "        --af-tag STRING              Allele frequency tag to use, by default estimated from AN,AC or GT\n");
+    fprintf(bcftools_stderr, "    -1, --1st-allele-only            Include only 1st allele at multiallelic sites\n");
+    fprintf(bcftools_stderr, "    -c, --collapse STRING            Treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+    fprintf(bcftools_stderr, "    -d, --depth INT,INT,INT          Depth distribution: min,max,bin size [0,500,1]\n");
+    fprintf(bcftools_stderr, "    -e, --exclude EXPR               Exclude sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "    -E, --exons FILE.gz              Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, bgzip compressed)\n");
+    fprintf(bcftools_stderr, "    -f, --apply-filters LIST         Require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+    fprintf(bcftools_stderr, "    -F, --fasta-ref FILE             Faidx indexed reference sequence file to determine INDEL context\n");
+    fprintf(bcftools_stderr, "    -i, --include EXPR               Select sites for which the expression is true (see man page for details)\n");
+    fprintf(bcftools_stderr, "    -I, --split-by-ID                Collect stats for sites with ID separately (known vs novel)\n");
+    fprintf(bcftools_stderr, "    -r, --regions REGION             Restrict to comma-separated list of regions\n");
+    fprintf(bcftools_stderr, "    -R, --regions-file FILE          Restrict to regions listed in a file\n");
+    fprintf(bcftools_stderr, "    -s, --samples LIST               List of samples for sample stats, \"-\" to include all samples\n");
+    fprintf(bcftools_stderr, "    -S, --samples-file FILE          File of samples to include\n");
+    fprintf(bcftools_stderr, "    -t, --targets REGION             Similar to -r but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "    -T, --targets-file FILE          Similar to -R but streams rather than index-jumps\n");
+    fprintf(bcftools_stderr, "    -u, --user-tstv TAG[:min:max:n]  Collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+    fprintf(bcftools_stderr, "                                       A subfield can be selected as e.g. 'PV4[0]', here the first value of the PV4 tag\n");
+    fprintf(bcftools_stderr, "        --threads INT                Use multithreading with <int> worker threads [0]\n");
+    fprintf(bcftools_stderr, "    -v, --verbose                    Produce verbose per-site and per-sample output\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfstats(int argc, char *argv[])
@@ -1797,8 +1840,12 @@ int main_vcfstats(int argc, char *argv[])
             case 's': args->samples_list = optarg; break;
             case 'S': args->samples_list = optarg; args->samples_is_file = 1; break;
             case 'I': args->split_by_id = 1; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case  9 : args->n_threads = strtol(optarg, 0, 0); break;
             case 'h':
             case '?': usage(); break;
diff --git a/bcftools/vcfview.c b/bcftools/vcfview.c
index 4117d10..ce4c810 100644
--- a/bcftools/vcfview.c
+++ b/bcftools/vcfview.c
@@ -1,6 +1,6 @@
 /*  vcfview.c -- VCF/BCF conversion, view, subset and filter VCF/BCF files.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -221,12 +221,10 @@ static void init_data(args_t *args)
     }
 
     // setup output
+    const char *tmp = hts_bcf_wmode2(args->output_type,args->fn_out);
     char modew[8];
-    strcpy(modew, "w");
+    strcpy(modew,tmp);
     if (args->clevel >= 0 && args->clevel <= 9) sprintf(modew + 1, "%d", args->clevel);
-    if (args->output_type==FT_BCF) strcat(modew, "bu");         // uncompressed BCF
-    else if (args->output_type & FT_BCF) strcat(modew, "b");    // compressed BCF
-    else if (args->output_type & FT_GZ) strcat(modew,"z");      // compressed VCF
     args->out = hts_open(args->fn_out ? args->fn_out : "-", modew);
     if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
     if ( args->n_threads > 0)
@@ -501,7 +499,7 @@ static void usage(args_t *args)
     fprintf(stderr, "    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output\n");
     fprintf(stderr, "    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
     fprintf(stderr, "          --no-version                  do not append version and command line to the header\n");
-    fprintf(stderr, "    -o,   --output-file <file>          output file name [stdout]\n");
+    fprintf(stderr, "    -o,   --output <file>               output file name [stdout]\n");
     fprintf(stderr, "    -O,   --output-type <b|u|z|v>       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
     fprintf(stderr, "    -r, --regions <region>              restrict to comma-separated list of regions\n");
     fprintf(stderr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
@@ -575,6 +573,7 @@ int main_vcfview(int argc, char *argv[])
         {"force-samples",no_argument,NULL,1},
         {"output-type",required_argument,NULL,'O'},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {"types",required_argument,NULL,'v'},
         {"exclude-types",required_argument,NULL,'V'},
         {"targets",required_argument,NULL,'t'},
@@ -639,9 +638,12 @@ int main_vcfview(int argc, char *argv[])
                 break;
             case 'v': args->include_types = optarg; break;
             case 'V': args->exclude_types = optarg; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
-
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'c':
             {
                 args->min_ac_type = ALLELE_NONREF;
diff --git a/bcftools/vcfview.c.pysam.c b/bcftools/vcfview.c.pysam.c
index 77643b7..75b3e64 100644
--- a/bcftools/vcfview.c.pysam.c
+++ b/bcftools/vcfview.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcfview.c -- VCF/BCF conversion, view, subset and filter VCF/BCF files.
 
-    Copyright (C) 2013-2018 Genome Research Ltd.
+    Copyright (C) 2013-2021 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -166,7 +166,7 @@ static void init_data(args_t *args)
     if (args->include_types || args->exclude_types) {
         if (args->include_types && args->exclude_types) {
             fprintf(bcftools_stderr, "Error: only supply one of --include-types, --exclude-types options\n");
-            exit(1);
+            bcftools_exit(1);
         }
         char **type_list = 0;
         int m = 0, n = 0;
@@ -197,7 +197,7 @@ static void init_data(args_t *args)
                 else {
                     fprintf(bcftools_stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(bcftools_stderr, "Accepted types are snps, indels, mnps, other\n");
-                    exit(1);
+                    bcftools_exit(1);
                 }
             }
         }
@@ -213,7 +213,7 @@ static void init_data(args_t *args)
                 else {
                     fprintf(bcftools_stderr, "[E::%s] unknown type\n", type_list[i]);
                     fprintf(bcftools_stderr, "Accepted types are snps, indels, mnps, other\n");
-                    exit(1);
+                    bcftools_exit(1);
                 }
             }
         }
@@ -223,12 +223,10 @@ static void init_data(args_t *args)
     }
 
     // setup output
+    const char *tmp = hts_bcf_wmode2(args->output_type,args->fn_out);
     char modew[8];
-    strcpy(modew, "w");
+    strcpy(modew,tmp);
     if (args->clevel >= 0 && args->clevel <= 9) sprintf(modew + 1, "%d", args->clevel);
-    if (args->output_type==FT_BCF) strcat(modew, "bu");         // uncompressed BCF
-    else if (args->output_type & FT_BCF) strcat(modew, "b");    // compressed BCF
-    else if (args->output_type & FT_GZ) strcat(modew,"z");      // compressed VCF
     args->out = hts_open(args->fn_out ? args->fn_out : "-", modew);
     if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
     if ( args->n_threads > 0)
@@ -302,7 +300,7 @@ int bcf_all_phased(const bcf_hdr_t *header, bcf1_t *line)
                 case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_vector_end); break;
                 case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
                 case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
-                default: fprintf(bcftools_stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
+                default: fprintf(bcftools_stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); bcftools_exit(1); break;
             }
             #undef BRANCH_INT
             if (!sample_phased) {
@@ -503,7 +501,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output\n");
     fprintf(bcftools_stderr, "    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
     fprintf(bcftools_stderr, "          --no-version                  do not append version and command line to the header\n");
-    fprintf(bcftools_stderr, "    -o,   --output-file <file>          output file name [bcftools_stdout]\n");
+    fprintf(bcftools_stderr, "    -o,   --output <file>               output file name [bcftools_stdout]\n");
     fprintf(bcftools_stderr, "    -O,   --output-type <b|u|z|v>       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
     fprintf(bcftools_stderr, "    -r, --regions <region>              restrict to comma-separated list of regions\n");
     fprintf(bcftools_stderr, "    -R, --regions-file <file>           restrict to regions listed in a file\n");
@@ -533,7 +531,7 @@ static void usage(args_t *args)
     fprintf(bcftools_stderr, "    -v/V, --types/--exclude-types <list>        select/exclude comma-separated list of variant types: snps,indels,mnps,ref,bnd,other [null]\n");
     fprintf(bcftools_stderr, "    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
     fprintf(bcftools_stderr, "\n");
-    exit(1);
+    bcftools_exit(1);
 }
 
 int main_vcfview(int argc, char *argv[])
@@ -577,6 +575,7 @@ int main_vcfview(int argc, char *argv[])
         {"force-samples",no_argument,NULL,1},
         {"output-type",required_argument,NULL,'O'},
         {"output-file",required_argument,NULL,'o'},
+        {"output",required_argument,NULL,'o'},
         {"types",required_argument,NULL,'v'},
         {"exclude-types",required_argument,NULL,'V'},
         {"targets",required_argument,NULL,'t'},
@@ -641,9 +640,12 @@ int main_vcfview(int argc, char *argv[])
                 break;
             case 'v': args->include_types = optarg; break;
             case 'V': args->exclude_types = optarg; break;
-            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
-            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
-
+            case 'e':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+            case 'i':
+                if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
+                args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
             case 'c':
             {
                 args->min_ac_type = ALLELE_NONREF;
diff --git a/bcftools/vcmp.c b/bcftools/vcmp.c
index 7d3b0f9..dbdc4b7 100644
--- a/bcftools/vcmp.c
+++ b/bcftools/vcmp.c
@@ -1,6 +1,6 @@
 /*  vcmp.c -- reference allele utility functions.
 
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013-2015, 2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/vcmp.c.pysam.c b/bcftools/vcmp.c.pysam.c
index 00435bd..18a6813 100644
--- a/bcftools/vcmp.c.pysam.c
+++ b/bcftools/vcmp.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  vcmp.c -- reference allele utility functions.
 
-    Copyright (C) 2013 Genome Research Ltd.
+    Copyright (C) 2013-2015, 2018 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/vcmp.h b/bcftools/vcmp.h
index 9c6370c..03234b4 100644
--- a/bcftools/vcmp.h
+++ b/bcftools/vcmp.h
@@ -1,6 +1,6 @@
 /*  vcmp.h -- reference allele utility functions.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2015 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
diff --git a/bcftools/version.c b/bcftools/version.c
index 19cec91..d068897 100644
--- a/bcftools/version.c
+++ b/bcftools/version.c
@@ -1,6 +1,6 @@
 /*  version.c -- report version numbers for plugins.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <strings.h>
 #include <errno.h>
 #include <htslib/hts.h>
 #include "bcftools.h"
@@ -60,7 +61,6 @@ void error_errno(const char *format, ...)
     exit(-1);
 }
 
-
 const char *hts_bcf_wmode(int file_type)
 {
     if ( file_type == FT_BCF ) return "wbu";    // uncompressed BCF
@@ -69,4 +69,14 @@ const char *hts_bcf_wmode(int file_type)
     return "w";                                 // uncompressed VCF
 }
 
+const char *hts_bcf_wmode2(int file_type, char *fname)
+{
+    if ( !fname ) return hts_bcf_wmode(file_type);
+    int len = strlen(fname);
+    if ( len >= 4 && !strcasecmp(".bcf",fname+len-4) ) return hts_bcf_wmode(FT_BCF|FT_GZ);
+    if ( len >= 4 && !strcasecmp(".vcf",fname+len-4) ) return hts_bcf_wmode(FT_VCF);
+    if ( len >= 7 && !strcasecmp(".vcf.gz",fname+len-7) ) return hts_bcf_wmode(FT_VCF|FT_GZ);
+    if ( len >= 8 && !strcasecmp(".vcf.bgz",fname+len-8) ) return hts_bcf_wmode(FT_VCF|FT_GZ);
+    return hts_bcf_wmode(file_type);
+}
 
diff --git a/bcftools/version.c.pysam.c b/bcftools/version.c.pysam.c
index 01dad07..37fa828 100644
--- a/bcftools/version.c.pysam.c
+++ b/bcftools/version.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  version.c -- report version numbers for plugins.
 
-    Copyright (C) 2014 Genome Research Ltd.
+    Copyright (C) 2014-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <strings.h>
 #include <errno.h>
 #include <htslib/hts.h>
 #include "bcftools.h"
@@ -44,7 +45,7 @@ void error(const char *format, ...)
     va_start(ap, format);
     vfprintf(bcftools_stderr, format, ap);
     va_end(ap);
-    exit(-1);
+    bcftools_exit(-1);
 }
 
 void error_errno(const char *format, ...)
@@ -59,10 +60,9 @@ void error_errno(const char *format, ...)
     } else {
         fprintf(bcftools_stderr, "\n");
     }
-    exit(-1);
+    bcftools_exit(-1);
 }
 
-
 const char *hts_bcf_wmode(int file_type)
 {
     if ( file_type == FT_BCF ) return "wbu";    // uncompressed BCF
@@ -71,4 +71,14 @@ const char *hts_bcf_wmode(int file_type)
     return "w";                                 // uncompressed VCF
 }
 
+const char *hts_bcf_wmode2(int file_type, char *fname)
+{
+    if ( !fname ) return hts_bcf_wmode(file_type);
+    int len = strlen(fname);
+    if ( len >= 4 && !strcasecmp(".bcf",fname+len-4) ) return hts_bcf_wmode(FT_BCF|FT_GZ);
+    if ( len >= 4 && !strcasecmp(".vcf",fname+len-4) ) return hts_bcf_wmode(FT_VCF);
+    if ( len >= 7 && !strcasecmp(".vcf.gz",fname+len-7) ) return hts_bcf_wmode(FT_VCF|FT_GZ);
+    if ( len >= 8 && !strcasecmp(".vcf.bgz",fname+len-8) ) return hts_bcf_wmode(FT_VCF|FT_GZ);
+    return hts_bcf_wmode(file_type);
+}
 
diff --git a/bcftools/version.sh b/bcftools/version.sh
index 7232440..52b1e08 100755
--- a/bcftools/version.sh
+++ b/bcftools/version.sh
@@ -1,7 +1,30 @@
 #!/bin/sh
+# version.sh 
+#
+#     Author : Petr Danecek <pd3@sanger.ac.uk>
+#
+#     Copyright (C) 2018-2021 Genome Research Ltd.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
 
 # Master version, for use in tarballs or non-git source copies
-VERSION=1.10.2
+VERSION=1.13
 
 # If we have a git clone, then check against the current tag
 if [ -e .git ]
diff --git a/cy_build.py b/cy_build.py
index fae7055..aff41a0 100644
--- a/cy_build.py
+++ b/cy_build.py
@@ -31,6 +31,7 @@ def is_pip_install():
 class CyExtension(Extension):
     def __init__(self, *args, **kwargs):
         self._init_func = kwargs.pop("init_func", None)
+        self._prebuild_func = kwargs.pop("prebuild_func", None)
         Extension.__init__(self, *args, **kwargs)
 
     def extend_includes(self, includes):
@@ -82,5 +83,8 @@ class cy_build_ext(build_ext):
                 ext.extra_link_args = []
 
             ext.extra_link_args += ['-Wl,-rpath,$ORIGIN']
-                                    
+
+        if isinstance(ext, CyExtension) and ext._prebuild_func:
+            ext._prebuild_func(ext, self.force)
+
         build_ext.build_extension(self, ext)
diff --git a/devtools/import.py b/devtools/import.py
index f54138b..ea35792 100644
--- a/devtools/import.py
+++ b/devtools/import.py
@@ -40,7 +40,7 @@ EXCLUDE = {
     "htslib": (
         'htslib/tabix.c', 'htslib/bgzip.c',
         'htslib/htsfile.c',
-        "test"),
+        "test", "tests"),
 }
 
 
@@ -93,6 +93,10 @@ def _update_pysam_files(cf, destdir):
                 else:
                     lines = re.sub(r"int main\(", "int {}_{}_main(".format(
                         basename, subname), lines)
+                if basename == "samtools":
+                    lines = re.sub(r"main_(reheader)\(",
+                                   r"samtools_main_\1(", lines)
+                lines = re.sub(r"\bexit\(", "{}_exit(".format(basename), lines)
                 lines = re.sub("stderr", "{}_stderr".format(basename), lines)
                 lines = re.sub("stdout", "{}_stdout".format(basename), lines)
                 lines = re.sub(r" printf\(", " fprintf({}_stdout, ".format(basename), lines)
@@ -103,9 +107,6 @@ def _update_pysam_files(cf, destdir):
                 fn = os.path.basename(filename)
                 # some specific fixes:
                 SPECIFIC_SUBSTITUTIONS = {
-                    "bamtk.c": (
-                        'else if (strcmp(argv[1], "tview") == 0)',
-                        '//else if (strcmp(argv[1], "tview") == 0)'),
                     "bam_md.c": (
                         'sam_open_format("-", mode_w',
                         'sam_open_format({}_stdout_fn, mode_w'.format(basename)),
@@ -120,6 +121,10 @@ def _update_pysam_files(cf, destdir):
                     lines = lines.replace(
                         SPECIFIC_SUBSTITUTIONS[fn][0],
                         SPECIFIC_SUBSTITUTIONS[fn][1])
+                if fn == "bamtk.c":
+                    lines = re.sub(r'(#include "version.h")', r'\1\n#include "samtools_config_vars.h"', lines)
+                    lines = re.sub(r'(else if.*"tview")', r'//\1', lines)
+
                 outfile.write(lines)
 
     with open(os.path.join("import", "pysam.h")) as inf, \
@@ -224,9 +229,25 @@ if len(sys.argv) >= 1:
                     outf.write(line)
         os.rename(tmpfilename, filename)
 
+    def _update_version_doc_file(dest, value, filename):
+        tmpfilename = filename + ".tmp"
+        with open(filename, encoding="utf-8") as inf:
+            with open(tmpfilename, "w", encoding="utf-8") as outf:
+                for line in inf:
+                    if " wraps " in line:
+                        # hide the sentence's fullstop from the main regexp
+                        line = re.sub(r'\.$', ',DOT', line)
+                        line = re.sub(r'{}-[^*,]*'.format(dest),
+                                      '{}-{}'.format(dest, value), line)
+                        line = re.sub(',DOT', '.', line)
+                    outf.write(line)
+        os.rename(tmpfilename, filename)
+
     version = _getVersion(srcdir)
     _update_version_file("__{}_version__".format(dest), version, "pysam/version.py")
     _update_version_file(C_VERSION[dest], version + " (pysam)", "pysam/version.h")
+    _update_version_doc_file(dest, version, "README.rst")
+    _update_version_doc_file(dest, version, "doc/index.rst")
 
     sys.exit(0)
 
diff --git a/devtools/install-CGAT-tools.sh b/devtools/install-CGAT-tools.sh
index 27eb481..e45d391 100755
--- a/devtools/install-CGAT-tools.sh
+++ b/devtools/install-CGAT-tools.sh
@@ -80,7 +80,7 @@ else
 fi # if-OS
 } # install_os_packages
 
-# funcion to install Python dependencies
+# function to install Python dependencies
 install_python_deps() {
 
 if [ "$OS" == "ubuntu" -o "$OS" == "sl" ] ; then
@@ -185,12 +185,13 @@ python setup.py install
 # problems in the compilation test.
 cd tests
 
-# create auxilliary data
+# create auxiliary data
 echo
 echo 'building test data'
 echo 
 make -C pysam_data all
 make -C cbcf_data all
+make -C tabix_data all
 
 # run nosetests
 # -s: do not capture stdout, conflicts with pysam.dispatch
diff --git a/devtools/run_tests_travis.sh b/devtools/run_tests_travis.sh
index 9ad41a7..1f14fc3 100755
--- a/devtools/run_tests_travis.sh
+++ b/devtools/run_tests_travis.sh
@@ -37,8 +37,8 @@ conda config --add channels conda-forge
 
 # pin versions, so that tests do not fail when pysam/htslib out of step
 # add htslib dependencies
-# NB: we force conda-forge:ncurses due to bioconda/bioconda-recipes#13488
-conda install -y "samtools=1.9" "bcftools=1.9" "htslib=1.9" xz curl bzip2 conda-forge:ncurses
+# NB: force conda-forge:blas due to conda/conda#7548
+conda install -y "samtools>=1.11" "bcftools>=1.11" "htslib>=1.11" xz curl bzip2 "conda-forge::blas=*=openblas"
 
 # As HTSLIB_MODE is (defaulted to) 'shared', ensure we don't pick up
 # the external headers from the Conda-installed htslib package.
@@ -60,12 +60,13 @@ echo "============ installing via setup.py from repository ============"
 echo
 python setup.py install || exit
 
-# create auxilliary data
+# create auxiliary data
 echo
 echo 'building test data'
 echo
 make -C tests/pysam_data
 make -C tests/cbcf_data
+make -C tests/tabix_data
 
 # echo any limits that are in place
 ulimit -a
diff --git a/doc/api.rst b/doc/api.rst
index 3f2c042..6246c35 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -1,7 +1,4 @@
-======================================================
-pysam - An interface for reading and writing SAM files
-======================================================
-
+============
 Introduction
 ============
 
@@ -24,7 +21,7 @@ iteration returns a :class:`~pysam.AlignedSegment` object which
 represents a single read along with its fields and optional tags::
 
    for read in samfile.fetch('chr1', 100, 120):
-	print read
+       print read
 
    samfile.close()
 
@@ -41,8 +38,8 @@ You can also write to a :class:`~pysam.AlignmentFile`::
    samfile = pysam.AlignmentFile("ex1.bam", "rb")
    pairedreads = pysam.AlignmentFile("allpaired.bam", "wb", template=samfile)
    for read in samfile.fetch():
-	if read.is_paired:
-		pairedreads.write(read)
+       if read.is_paired:
+           pairedreads.write(read)
 
    pairedreads.close()
    samfile.close()
@@ -130,11 +127,12 @@ More detailed usage instructions is at :ref:`usage`.
 
        The pysam website containing documentation
 
+===
 API
 ===
 
 SAM/BAM/CRAM files
--------------------
+==================
 
 Objects of type :class:`~pysam.AlignmentFile` allow working with
 BAM/SAM formatted files.
@@ -162,7 +160,7 @@ a SAM/BAM file.
 
 
 Tabix files
------------
+===========
 
 :class:`~pysam.TabixFile` opens tabular files that have been
 indexed with tabix_.
@@ -191,14 +189,14 @@ To iterate over tabix files, use :func:`~pysam.tabix_iterator`:
    :members:
 
 
-Fasta files
------------
+FASTA files
+===========
 
 .. autoclass:: pysam.FastaFile
    :members:
 
-Fastq files
------------
+FASTQ files
+===========
 
 .. autoclass:: pysam.FastxFile
    :members:
@@ -208,8 +206,8 @@ Fastq files
    :members:
 
 
-VCF files
----------
+VCF/BCF files
+=============
 
 .. autoclass:: pysam.VariantFile
    :members:
@@ -224,7 +222,7 @@ VCF files
    :members:
 
 HTSFile
--------
+=======
 
 HTSFile is the base class for :class:`pysam.AlignmentFile` and
 :class:`pysam.VariantFile`.
diff --git a/doc/benchmarking.rst b/doc/benchmarking.rst
index 1ec0d43..8fc054a 100644
--- a/doc/benchmarking.rst
+++ b/doc/benchmarking.rst
@@ -1,3 +1,5 @@
+.. _Benchmarking:
+
 ============
 Benchmarking
 ============
diff --git a/doc/conf.py b/doc/conf.py
index 375aa55..39b6f45 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,13 +11,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os, glob
+import sys, os, sysconfig
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-_libdir = "../build/lib.%s-%s-%s.%s" % (os.uname()[0].lower(), os.uname()[4],
-                                        sys.version_info[0], sys.version_info[1])
+_pyversion = sysconfig.get_python_version()
+_libdir = "../build/lib.%s-%s" % (sysconfig.get_platform(), _pyversion)
 if os.path.exists(_libdir):
     sys.path.insert(0, os.path.abspath(_libdir))
 
@@ -32,7 +32,7 @@ extensions = ['sphinx.ext.autodoc',
               'sphinx.ext.intersphinx',
               'sphinx.ext.napoleon']
 
-intersphinx_mapping = {'python': ('http://docs.python.org/3.5', None)}
+intersphinx_mapping = {'python': ('https://docs.python.org/%s' % _pyversion, None)}
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -48,7 +48,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'pysam'
-copyright = u'2009, Andreas Heger, Kevin Jacobs et al.'
+copyright = u'2009â2021, Andreas Heger, Kevin Jacobs, et al'
 
 # Included at the end of each rst file
 rst_epilog = '''
@@ -61,6 +61,8 @@ rst_epilog = '''
 .. _Galaxy: https://main.g2.bx.psu.edu/
 .. _cython: http://cython.org/
 .. _python: http://python.org/
+.. _pypi: https://pypi.org/
+.. _pip: https://pip.pypa.io/
 .. _pyximport: http://www.prescod.net/pyximport/
 .. _conda: https://conda.io/docs/
 .. _bioconda: https://bioconda.github.io/
@@ -201,8 +203,8 @@ htmlhelp_basename = 'samtoolsdoc'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-    ('index', 'pysam.tex', ur'pysam documentation',
-     ur'Andreas Heger, Kevin Jacobs et al.', 'manual'),
+    ('index', 'pysam.tex', u'pysam documentation',
+     u'Andreas Heger, Kevin Jacobs, et al.', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
diff --git a/doc/developer.rst b/doc/developer.rst
index 09ae832..ca49fdc 100644
--- a/doc/developer.rst
+++ b/doc/developer.rst
@@ -12,7 +12,7 @@ directories:
    Code specific to pysam
 
 :file:`doc`
-   The documentation. To build the latest documention type::
+   The documentation. To build the latest documentation type::
 
        make -C doc html
 
@@ -46,6 +46,17 @@ run::
 
    pytest tests
 
+Most tests use test data from the :file:`tests/*_data` directories.
+Some of these test data files are generated from other files in these
+directories, which is done by running ``make`` in each directory::
+
+   make -C tests/pysam_data
+   # etc
+
+Alternatively if any :file:`tests/*_data/all.stamp` file is not already
+present, running the unit tests should generate that directory's data
+files automatically.
+
 Benchmarking
 ============
 
diff --git a/doc/faq.rst b/doc/faq.rst
index 62fe11d..fc39b60 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -5,17 +5,18 @@ FAQ
 How should I cite pysam
 =======================
 
-Pysam has not been published in print. When refering pysam, please
+Pysam has not been published in print. When referring to pysam, please
 use the github URL: https://github.com/pysam-developers/pysam. 
 As pysam is a wrapper around htslib and the samtools package, I
-suggest cite `Li et al (2009) <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`.
+suggest citing [Li.2009]_, [Bonfield.2021]_, and/or [Danecek.2021]_,
+as appropriate.
 
 Is pysam thread-safe?
 =====================
 
 Pysam is a mix of python and C code. Instructions within python are
 generally made thread-safe through python's `global interpreter lock`_
-(GIL_). This ensures that python data structures will always be in a
+(:dfn:`GIL`). This ensures that python data structures will always be in a
 consistent state. 
 
 If an external function outside python is called, the programmer has a
@@ -28,7 +29,7 @@ Alternatively, the GIL can be released while the external function is
 called. This will allow other threads to run concurrently. This can be
 beneficial if the external function is expected to halt, for example
 when waiting for data to read or write. However, to achieve
-thread-safety, the external function needs to implememented with
+thread-safety, the external function needs to be implemented with
 thread-safety in mind. This means that there can be no shared state
 between threads, or if there is shared, it needs to be controlled to
 prevent any access conflicts.
@@ -38,7 +39,7 @@ I/O intensive tasks. This is generally fine, but thread-safety of all
 parts have not been fully tested. 
 
 A related issue is when different threads read from the same file
-objec - or the same thread uses two iterators over a file. There is
+object - or the same thread uses two iterators over a file. There is
 only a single file-position for each opened file. To prevent this from
 hapeding, use the option ``multiple_iterator=True`` when calling
 a fetch() method. This will return an iterator on a newly opened
@@ -141,7 +142,7 @@ I can't call AlignmentFile.fetch on a file without index
 
 :meth:`~pysam.AlignmentFile.fetch` requires an index when
 iterating over a SAM/BAM file. To iterate over a file without
-index, use the ``until_eof=True`::
+index, use the ``until_eof=True``::
 
     bf = pysam.AlignmentFile(fname, "rb")
     for r in bf.fetch(until_eof=True):
diff --git a/doc/glossary.rst b/doc/glossary.rst
index 4e9fa57..0389270 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -48,6 +48,11 @@ Glossary
        Binary SAM format. BAM files are binary formatted, indexed and
        allow random access.
 
+   CRAM
+       CRAM is a binary format representing the same sequence alignment
+       information as SAM and BAM, but offering significantly better
+       lossless compression than BAM.
+
    TAM
        Text SAM file. TAM files are human readable files of
        tab-separated fields. TAM files do not allow random access.
@@ -106,6 +111,14 @@ Glossary
    BCF
       Binary :term:`VCF`
 
+   FASTA
+      Simple text format containing sequence data, with only the bare
+      minimum of metadata. Typically used for reference sequence data.
+
+   FASTQ
+      Simple text format containing sequence data and associated base
+      qualities.
+
    tabix
       Utility in the htslib package to index :term:`bgzip` compressed
       files.
diff --git a/doc/index.rst b/doc/index.rst
index 4e18b76..15de2ca 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -18,8 +18,7 @@ This module provides a low-level wrapper around the htslib_ C-API as
 using cython and a high-level, pythonic API for convenient access to
 the data within genomic file formats. 
 
-The current version wraps *htslib-1.10.2*, *samtools-1.10* and
-*bcftools-1.10.2*.
+The current version wraps *htslib-1.13*, *samtools-1.13*, and *bcftools-1.13*.
 
 To install the latest release, type::
 
@@ -54,9 +53,21 @@ Contents:
 References
 ----------
 
-.. [Li2009] The Sequence Alignment/Map format and SAMtools. Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup.
-   	    Bioinformatics. 2009 Aug 15;25(16):2078-9. Epub 2009 Jun 8.
-	    `PMID: 19505943 <http://www.ncbi.nlm.nih.gov/pubmed/19505943?dopt=Abstract>`_
+.. [Li.2009] *The Sequence Alignment/Map format and SAMtools.*
+   Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup.
+   Bioinformatics. 2009 Aug 15;25(16):2078-9.
+   Epub 2009 Jun 8 `btp352 <https://doi.org/10.1093/bioinformatics/btp352>`_.
+   PMID: `19505943 <https://pubmed.ncbi.nlm.nih.gov/19505943>`_.
+
+.. [Bonfield.2021] *HTSlib: C library for reading/writing high-throughput sequencing data.*
+   Bonfield JK, Marshall J, Danecek P, Li H, Ohan V, Whitwham A, Keane T, Davies RM.
+   GigaScience (2021) 10(2) `giab007 <https://doi.org/10.1093/gigascience/giab007>`_.
+   PMID: `33594436 <https://pubmed.ncbi.nlm.nih.gov/33594436>`_.
+
+.. [Danecek.2021] *Twelve years of SAMtools and BCFtools.*
+   Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H.
+   GigaScience (2021) 10(2) `giab008 <https://doi.org/10.1093/gigascience/giab008>`_.
+   PMID: `33590861 <https://pubmed.ncbi.nlm.nih.gov/33590861>`_.
 
 .. seealso::
  
diff --git a/doc/installation.rst b/doc/installation.rst
index 535f4bc..a286c27 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -46,7 +46,7 @@ features. If these fail, for example due to missing library
 dependencies (`libcurl`, `libcrypto`), it will fall back to
 conservative defaults.
 
-Options can be passed to the configure script explicitely by
+Options can be passed to the configure script explicitly by
 setting the environment variable `HTSLIB_CONFIGURE_OPTIONS`.
 For example::
 
diff --git a/doc/release.rst b/doc/release.rst
index 06c602b..966ee6a 100644
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,48 @@
 Release notes
 =============
 
+Release 0.17.0
+==============
+
+This release wraps htslib/samtools/bcftools version 1.13. Corresponding
+to new samtools commands, `pysam.samtools` now has additional functions
+`ampliconclip`, `ampliconstats`, `fqimport`, and `version`.
+
+Bugs fixed:
+
+* [#447] The maximum QNAME length is fully restored to 254
+* [#506, #958, #1000] Don't crash the Python interpreter on ``pysam.bcftools.*()`` errors
+* [#603] count_coverage: ignore reads that have no SEQ field
+* [#928] Fix ``pysam.bcftools.mpileup()`` segmentation fault
+* [#983] Add win32/\*.[ch] to MANIFEST.in
+* [#994] Raise exception in ``get_tid()`` if header could not be parsed
+* [#995] Choose TBI/CSI in ``tabix_index()`` via both min_shift and csi
+* [#996] ``AlignmentFile.fetch()`` now works with large chromosomes longer than 2\ :sup:`29` bases
+* [#1019] Fix Sphinx documentation generation by avoiding Python 2 ``ur'string'`` syntax
+* [#1035] Improved handling of file iteration errors
+* [#1038] ``tabix_index()`` no longer leaks file descriptors
+* [#1040] ``print(aligned_segment)`` now prints the correct TLEN value
+  (it also now prints RNAME/RNEXT more clearly and prints POS/PNEXT 1-based)
+* *setup.py* longer uses ``setup(use_2to3)`` for compatibility with setuptools >= v58.0.0
+
+New facilities:
+
+* [PR #963] Additional VCF classes are exposed to pysam programmers
+* [#998, PR #1001] Add ``get/set_encoding_error_handler()`` to control UTF-8 conversion
+* [PR #1012] Running ``python setup.py sdist`` now automatically runs cythonize
+* Running tests with ``pytest`` now automatically runs ``make`` to generate test data
+
+Documentation improvements:
+
+* [#726] Clarify get_forward_sequence/get_forward_qualities documentation
+* [#865] Improved example
+* [#968] ``get_index_statstics`` parameters
+* [#986] Clarify ``VariantFile.fetch`` start/stop region parameters are 0-based and half-open.
+* [#990] Corrected ``PileupColumn.get_query_sequences`` documentation
+* [#999] Fix documentation for ``AlignmentFile.get_reference_length()``
+* [#1002] Document the default min_base_quality for ``pileup()``
+
+
 Release 0.16.0
 ==============
 
@@ -149,7 +191,7 @@ Backwards incompatible changes:
 
   The rationale for this change is to have consistency between
   AlignmentFile and VariantFile.
-  	      
+
 * AlignmentFile and FastaFile now raise IOError instead of OSError
 
 Medium term we plan to have a 1.0 release. The pysam
@@ -190,6 +232,7 @@ contains a series of bugfixes.
 * [#473] A new FastxRecord class that can be instantiated from class and
   modified in-place. Replaces PersistentFastqProxy.
 * [#521] In AligmentFile, Simplify file detection logic and allow remote index files
+
   * Removed attempts to guess data and index file names; this is magic left
     to htslib.
   * Removed file existence check prior to opening files with htslib
@@ -200,6 +243,7 @@ contains a series of bugfixes.
   * Allow remote indices (tested using S3 signed URLs).
   * Document filepath_index and make it an alias for index_filename.
   * Added a require_index parameter to AlignmentFile
+
 * [#526] handle unset ref when creating new records
 * [#513] fix bcf_translate to skip deleted FORMAT fields to avoid
   segfaults
@@ -225,7 +269,7 @@ are created will need to change as the constructor requires a header::
     header = pysam.AlignmentHeader(
         reference_names=["chr1", "chr2"],
         reference_lengths=[1000, 1000])
-        
+
     read = pysam.AlignedSegment(header)
 
 This will affect all code that instantiates AlignedSegment objects
@@ -252,7 +296,7 @@ Release 0.11.2
 ==============
 
 This release wraps htslib/samtools/bcfools versions 1.4.1 in response
-to a security fix in these libraries. Additionaly the following
+to a security fix in these libraries. Additionally the following
 issues have been fixed:
 
 * [#452] add GFF3 support for tabix parsers
@@ -373,7 +417,7 @@ Overview
 --------
 
 The 0.9.0 release upgrades htslib to htslib 1.3 and numerous other
-enchancements and bugfixes. See below for a detailed list.
+enhancements and bugfixes. See below for a detailed list.
 
 `Htslib 1.3 <https://github.com/samtools/htslib/releases/tag/1.3>`_
 comes with additional capabilities for remote file access which depend
@@ -416,7 +460,7 @@ Detailed release notes
      and code bloat.
    * run configure for the builtin htslib library in order to detect
      optional libraries such as libcurl. Configure behaviour can be
-     controlled by setting the environmet variable
+     controlled by setting the environment variable
      HTSLIB_CONFIGURE_OPTIONS.
 * get_reference_sequence() now returns the reference sequence and not
   something looking like it. This bug had effects on
@@ -440,15 +484,17 @@ Potential isses when upgrading from v0.8.3:
 
 * renamed several methods for pep8 compatibility, old names still retained for	
   backwards compatibility, but should be considered deprecated.
+
    * gettid() is now get_tid()
    * getrname() is now get_reference_name()
    * parseRegion() is now parse_region()
 
 * some methods have changed for pep8 compatibility without the old
   names being present:
+
    * fromQualityString() is now qualitystring_to_array()
    * toQualityString() is now qualities_to_qualitystring()
-   
+
 * faidx now returns strings and not binary strings in py3.
 
 * The cython components have been broken up into smaller files with
@@ -557,7 +603,7 @@ Release 0.8.2
   with reading and writing capability. However, the interface is still
   incomplete and preliminary and lacks capability to mutate the
   resulting data.
-  
+
 Release 0.8.1
 =============
 
@@ -569,7 +615,7 @@ Release 0.8.1
   * issue #19: multiple iterators can now be made to work on the same tabix file
   * issue #24: All strings returned from/passed to the pysam API are now unicode in python 3
   * issue #5:  type guessing for lists of integers fixed    
-    
+
 * API changes for consistency. The old API is still present,
   but deprecated.
   In particular:
@@ -619,7 +665,7 @@ Other changes:
 
 Backwards incompatible changes
 
-* Empty cigarstring now returns None (intstead of '')
+* Empty cigarstring now returns None (instead of '')
 * Empty cigar now returns None (instead of [])
 * When using the extension classes in cython modules, AlignedRead
   needs to be substituted with AlignedSegment. 
@@ -686,18 +732,18 @@ Release 0.7.5
 
 Release 0.7.4
 =============
-	
+
 * further bugfixes to setup.py and package layout
 
 Release 0.7.3
 =============
-	
+
 * further bugfixes to setup.py
 * upgraded distribute_setup.py to 0.6.34
 
 Release 0.7.2
 =============
-  
+
 * bugfix in installer - failed when cython not present
 * changed installation locations of shared libraries
 
diff --git a/doc/usage.rst b/doc/usage.rst
index f4b7498..fc4f2bb 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -269,7 +269,8 @@ simple variant attributes such as :class:`~pysam.VariantRecord.contig`,
        print (rec.pos)
 
 but also to complex attributes such as the contents to the
-:term:`info`, :term:`format` and :term:`genotype` columns. These
+:class:`~pysam.VariantRecord.info`, :class:`~pysam.VariantRecord.format`
+and :term:`genotype` columns. These
 complex attributes are views on the underlying htslib data structures
 and provide dictionary-like access to the data::
 
diff --git a/import/pysam.c b/import/pysam.c
index 5692622..2a81e4d 100644
--- a/import/pysam.c
+++ b/import/pysam.c
@@ -1,6 +1,7 @@
 #include <ctype.h>
 #include <assert.h>
 #include <unistd.h>
+#include <setjmp.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -55,6 +56,25 @@ int @pysam@_puts(const char *s)
   return putc('\n', @pysam@_stdout);
 }
 
+
+static jmp_buf @pysam@_jmpbuf;
+static int @pysam@_status = 0;
+
+int @pysam@_dispatch(int argc, char *argv[])
+{
+  if (setjmp(@pysam@_jmpbuf) == 0)
+    return @pysam@_main(argc, argv);
+  else
+    return @pysam@_status;
+}
+
+void @pysam@_exit(int status)
+{
+  @pysam@_status = status;
+  longjmp(@pysam@_jmpbuf, 1);
+}
+
+
 void @pysam@_set_optind(int val)
 {
   // setting this in cython via 
diff --git a/import/pysam.h b/import/pysam.h
index 6abb884..8dbb09e 100644
--- a/import/pysam.h
+++ b/import/pysam.h
@@ -3,6 +3,17 @@
 
 #include <stdio.h>
 
+#ifndef __has_attribute
+#define __has_attribute(attribute) 0
+#endif
+#ifndef PYSAM_NORETURN
+#if __has_attribute(__noreturn__) || __GNUC__ >= 3
+#define PYSAM_NORETURN __attribute__((__noreturn__))
+#else
+#define PYSAM_NORETURN
+#endif
+#endif
+
 extern FILE * @pysam@_stderr;
 
 extern FILE * @pysam@_stdout;
@@ -40,6 +51,8 @@ int @pysam@_puts(const char *s);
 
 int @pysam@_dispatch(int argc, char *argv[]);
 
+void PYSAM_NORETURN @pysam@_exit(int status);
+
 void @pysam@_set_optind(int);
 
 extern int @pysam@_main(int argc, char *argv[]);
diff --git a/pysam.py b/pysam.py
deleted file mode 100644
index 0823abd..0000000
--- a/pysam.py
+++ /dev/null
@@ -1 +0,0 @@
-raise ImportError('''calling "import pysam" from the source directory is not supported - please import pysam from somewhere else.''')
diff --git a/pysam/__init__.py b/pysam/__init__.py
index 40877da..a6ff6d7 100644
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -11,8 +11,8 @@ import pysam.libcfaidx as libcfaidx
 from pysam.libcfaidx import *
 import pysam.libctabix as libctabix
 from pysam.libctabix import *
-# import pysam.libctabixproxies as libctabixproxies
-# from pysam.libctabixproxies import *
+import pysam.libctabixproxies as libctabixproxies
+from pysam.libctabixproxies import *
 import pysam.libcsamfile as libcsamfile
 from pysam.libcsamfile import *
 import pysam.libcalignmentfile as libcalignmentfile
diff --git a/pysam/libcalignedsegment.pxd b/pysam/libcalignedsegment.pxd
index c964160..473c5b1 100644
--- a/pysam/libcalignedsegment.pxd
+++ b/pysam/libcalignedsegment.pxd
@@ -64,7 +64,7 @@ cdef class AlignedSegment:
 
 
 cdef class PileupColumn:
-    cdef bam_pileup1_t ** plp
+    cdef const bam_pileup1_t ** plp
     cdef int tid
     cdef int pos
     cdef int n_pu
@@ -89,7 +89,7 @@ cdef AlignedSegment makeAlignedSegment(
     AlignmentHeader header)
 
 cdef PileupColumn makePileupColumn(
-     bam_pileup1_t ** plp,
+    const bam_pileup1_t ** plp,
     int tid,
     int pos,
     int n_pu,
@@ -97,7 +97,7 @@ cdef PileupColumn makePileupColumn(
     char * reference_sequence,
     AlignmentHeader header)
 
-cdef PileupRead makePileupRead(bam_pileup1_t * src,
+cdef PileupRead makePileupRead(const bam_pileup1_t * src,
 		               AlignmentHeader header)
 
 cdef uint32_t get_alignment_length(bam1_t * src)
diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx
index 5674b49..da7274c 100644
--- a/pysam/libcalignedsegment.pyx
+++ b/pysam/libcalignedsegment.pyx
@@ -134,7 +134,7 @@ cdef inline uint8_t strand_mark_char(uint8_t ch, bam1_t *b):
             return toupper(ch)
 
 
-cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
+cdef inline bint pileup_base_qual_skip(const bam_pileup1_t * p, uint32_t threshold):
     cdef uint32_t c
     if p.qpos < p.b.core.l_qseq:
         c = bam_get_qual(p.b)[p.qpos]
@@ -608,7 +608,7 @@ cdef AlignedSegment makeAlignedSegment(bam1_t *src,
 
 
 cdef class PileupColumn
-cdef PileupColumn makePileupColumn(bam_pileup1_t ** plp,
+cdef PileupColumn makePileupColumn(const bam_pileup1_t ** plp,
                       int tid,
                       int pos,
                       int n_pu,
@@ -635,7 +635,7 @@ cdef PileupColumn makePileupColumn(bam_pileup1_t ** plp,
 
 
 cdef class PileupRead
-cdef PileupRead makePileupRead(bam_pileup1_t *src,
+cdef PileupRead makePileupRead(const bam_pileup1_t *src,
                                AlignmentHeader header):
     '''return a PileupRead object construted from a bam_pileup1_t * object.'''
     # note that the following does not call __init__
@@ -784,7 +784,7 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
 
     # Check if MD tag is valid by matching CIGAR length to MD tag defined length
     # Insertions would be in addition to what is described by MD, so we calculate
-    # the number of insertions seperately.
+    # the number of insertions separately.
     cdef int insertions = 0
 
     while s[s_idx] != 0:
@@ -978,13 +978,13 @@ cdef class AlignedSegment:
         # requires a valid header.
         return "\t".join(map(str, (self.query_name,
                                    self.flag,
-                                   self.reference_id,
-                                   self.reference_start,
+                                   "#%d" % self.reference_id if self.reference_id >= 0 else "*",
+                                   self.reference_start + 1,
                                    self.mapping_quality,
                                    self.cigarstring,
-                                   self.next_reference_id,
-                                   self.next_reference_start,
-                                   self.query_alignment_length,
+                                   "#%d" % self.next_reference_id if self.next_reference_id >= 0 else "*",
+                                   self.next_reference_start + 1,
+                                   self.template_length,
                                    self.query_sequence,
                                    self.query_qualities,
                                    self.tags)))
@@ -1169,10 +1169,8 @@ cdef class AlignedSegment:
             if qname is None or len(qname) == 0:
                 return
 
-            # See issue #447
-            # (The threshold is 252 chars, but this includes a \0 byte.
-            if len(qname) > 251:
-                raise ValueError("query length out of range {} > 251".format(
+            if len(qname) > 254:
+                raise ValueError("query length out of range {} > 254".format(
                     len(qname)))
 
             qname = force_bytes(qname)
@@ -1392,9 +1390,9 @@ cdef class AlignedSegment:
            read.query_squence = read.query_sequence[5:10]
            read.query_qualities = q[5:10]
 
-        The sequence is returned as it is stored in the BAM file. Some mappers
-        might have stored a reverse complement of the original read
-        sequence.
+        The sequence is returned as it is stored in the BAM file. (This will
+        be the reverse complement of the original read sequence if the mapper
+        has aligned the read to the reverse strand.)
         """
         def __get__(self):
             if self.cache_query_sequence:
@@ -1570,7 +1568,7 @@ cdef class AlignedSegment:
         def __set__(self, val):
             pysam_update_flag(self._delegate, val, BAM_FUNMAP)
             # setting the unmapped flag requires recalculation of
-            # bin as alignment length is now implicitely 1
+            # bin as alignment length is now implicitly 1
             update_bin(self._delegate)
 
     property mate_is_unmapped:
@@ -1843,8 +1841,9 @@ cdef class AlignedSegment:
     def get_forward_sequence(self):
         """return the original read sequence.
 
-        Reads mapping to the reverse strand will be reverse
-        complemented.
+        Reads mapped to the reverse strand are stored reverse complemented in
+        the BAM file. This method returns such reads reverse complemented back
+        to their original orientation.
 
         Returns None if the record has no query sequence.
         """
@@ -1856,9 +1855,12 @@ cdef class AlignedSegment:
         return s
 
     def get_forward_qualities(self):
-        """return base qualities of the read sequence.
+        """return the original base qualities of the read sequence,
+        in the same format as the :attr:`query_qualities` property.
 
-        Reads mapping to the reverse strand will be reversed.
+        Reads mapped to the reverse strand have their base qualities stored
+        reversed in the BAM file. This method returns such reads' base qualities
+        reversed back to their original orientation.
         """
         if self.is_reverse:
             return self.query_qualities[::-1]
@@ -2242,7 +2244,7 @@ cdef class AlignedSegment:
         *value*.
 
         An existing value of the same *tag* will be overwritten unless
-        *replace* is set to False. This is usually not recommened as a
+        *replace* is set to False. This is usually not recommended as a
         tag may only appear once in the optional alignment section.
 
         If *value* is None, the tag will be deleted.
@@ -2468,7 +2470,7 @@ cdef class AlignedSegment:
             return value
 
     def get_tags(self, with_value_type=False):
-        """the fields in the optional aligment section.
+        """the fields in the optional alignment section.
 
         Returns a list of all fields in the optional
         alignment section. Values are converted to appropriate python
@@ -2841,7 +2843,7 @@ cdef class PileupColumn:
                 raise ValueError("PileupColumn accessed after iterator finished")
 
             cdef int x
-            cdef bam_pileup1_t * p = NULL
+            cdef const bam_pileup1_t * p = NULL
             pileups = []
 
             # warning: there could be problems if self.n and self.buf are
@@ -2893,7 +2895,7 @@ cdef class PileupColumn:
         cdef uint32_t x = 0
         cdef uint32_t c = 0
         cdef uint32_t cnt = 0
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
         if self.plp == NULL or self.plp[0] == NULL:
             raise ValueError("PileupColumn accessed after iterator finished")
 
@@ -2941,7 +2943,7 @@ cdef class PileupColumn:
 
         mark_matches: bool
 
-          If True, output bases matching the reference as "," or "."
+          If True, output bases matching the reference as "." or ","
           for forward and reverse strand, respectively. This mark
           requires the reference sequence. If no reference is
           present, this option is ignored.
@@ -2969,7 +2971,7 @@ cdef class PileupColumn:
         cdef uint8_t cc = 0
         cdef uint8_t rb = 0
         cdef kstring_t * buf = &self.buf
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
 
         if self.plp == NULL or self.plp[0] == NULL:
             raise ValueError("PileupColumn accessed after iterator finished")
@@ -3052,7 +3054,7 @@ cdef class PileupColumn:
         list: a list of quality scores
         """
         cdef uint32_t x = 0
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
         cdef uint32_t c = 0
         result = []
         for x from 0 <= x < self.n_pu:
@@ -3083,7 +3085,7 @@ cdef class PileupColumn:
             raise ValueError("PileupColumn accessed after iterator finished")
 
         cdef uint32_t x = 0
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
         result = []
         for x from 0 <= x < self.n_pu:
             p = &(self.plp[0][x])
@@ -3109,7 +3111,7 @@ cdef class PileupColumn:
             raise ValueError("PileupColumn accessed after iterator finished")
 
         cdef uint32_t x = 0
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
         result = []
         for x from 0 <= x < self.n_pu:
             p = &(self.plp[0][x])
@@ -3135,7 +3137,7 @@ cdef class PileupColumn:
             raise ValueError("PileupColumn accessed after iterator finished")
 
         cdef uint32_t x = 0
-        cdef bam_pileup1_t * p = NULL
+        cdef const bam_pileup1_t * p = NULL
         result = []
         for x from 0 <= x < self.n_pu:
             p = &(self.plp[0][x])
diff --git a/pysam/libcalignmentfile.pxd b/pysam/libcalignmentfile.pxd
index 6ee4963..2a17fbe 100644
--- a/pysam/libcalignmentfile.pxd
+++ b/pysam/libcalignmentfile.pxd
@@ -58,24 +58,6 @@ cdef class AlignmentFile(HTSFile):
     cpdef int write(self, AlignedSegment read) except -1
 
 
-cdef class PileupColumn:
-    cdef bam_pileup1_t ** plp
-    cdef int tid
-    cdef int pos
-    cdef int n_pu
-
-
-cdef class PileupRead:
-    cdef AlignedSegment _alignment
-    cdef int32_t  _qpos
-    cdef int _indel
-    cdef int _level
-    cdef uint32_t _is_del
-    cdef uint32_t _is_head
-    cdef uint32_t _is_tail
-    cdef uint32_t _is_refskip
-
-
 cdef class IteratorRow:
     cdef int retval
     cdef bam1_t * b
@@ -124,7 +106,7 @@ cdef class IteratorColumn:
     cdef int pos
     cdef int n_plp
     cdef uint32_t min_base_quality
-    cdef bam_pileup1_t * plp
+    cdef const bam_pileup1_t * plp
     cdef bam_mplp_t pileup_iter
     cdef __iterdata iterdata
     cdef AlignmentFile samfile
diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx
index b8e4230..e192ff3 100644
--- a/pysam/libcalignmentfile.pyx
+++ b/pysam/libcalignmentfile.pyx
@@ -100,7 +100,7 @@ IndexStats = collections.namedtuple("IndexStats",
 ########################################################
 ## global variables
 # maximum genomic coordinace
-# for some reason, using 'int' causes overlflow
+# for some reason, using 'int' causes overflow
 cdef int MAX_POS = (1 << 31) - 1
 
 # valid types for SAM headers
@@ -175,6 +175,12 @@ cdef AlignmentHeader makeAlignmentHeader(bam_hdr_t *hdr):
 
     return header
 
+def read_failure_reason(code):
+    if code == -2:
+        return 'truncated file'
+    else:
+        return "error {} while reading file".format(code)
+
 
 # the following should be class-method for VariantHeader, but cdef @classmethods
 # are not implemented in cython.
@@ -522,7 +528,10 @@ cdef class AlignmentHeader(object):
         returns -1 if reference is not known.
         """
         reference = force_bytes(reference)
-        return bam_name2id(self.ptr, reference)
+        tid = bam_name2id(self.ptr, reference)
+        if tid < -1:
+            raise ValueError('could not parse header')
+        return tid
 
     def __str__(self):
         '''string with the full contents of the :term:`sam file` header as a
@@ -1029,7 +1038,7 @@ cdef class AlignmentFile(HTSFile):
 
         See :meth:`~pysam.HTSFile.parse_region` for more information
         on how genomic regions can be specified. :term:`reference` and
-        `end` are also accepted for backward compatiblity as synonyms
+        `end` are also accepted for backward compatibility as synonyms
         for :term:`contig` and `stop`, respectively.
 
         Without a `contig` or `region` all mapped reads in the file
@@ -1212,7 +1221,7 @@ cdef class AlignmentFile(HTSFile):
         """perform a :term:`pileup` within a :term:`region`. The region is
         specified by :term:`contig`, `start` and `stop` (using
         0-based indexing).  :term:`reference` and `end` are also accepted for
-        backward compatiblity as synonyms for :term:`contig` and `stop`,
+        backward compatibility as synonyms for :term:`contig` and `stop`,
         respectively.  Alternatively, a samtools 'region' string
         can be supplied.
 
@@ -1239,7 +1248,7 @@ cdef class AlignmentFile(HTSFile):
 
            By default, the samtools pileup engine outputs all reads
            overlapping a region. If truncate is True and a region is
-           given, only columns in the exact region specificied are
+           given, only columns in the exact region specified are
            returned.
 
         max_depth : int
@@ -1288,7 +1297,7 @@ cdef class AlignmentFile(HTSFile):
         min_base_quality: int
 
            Minimum base quality. Bases below the minimum quality will
-           not be output.
+           not be output. The default is 13.
 
         adjust_capq_threshold: int
 
@@ -1354,7 +1363,7 @@ cdef class AlignmentFile(HTSFile):
 
         The region is specified by :term:`contig`, `start` and `stop`.
         :term:`reference` and `end` are also accepted for backward
-        compatiblity as synonyms for :term:`contig` and `stop`,
+        compatibility as synonyms for :term:`contig` and `stop`,
         respectively.  Alternatively, a :term:`samtools` :term:`region`
         string can be supplied.
 
@@ -1458,7 +1467,7 @@ cdef class AlignmentFile(HTSFile):
 
         The region is specified by :term:`contig`, `start` and `stop`.
         :term:`reference` and `end` are also accepted for backward
-        compatiblity as synonyms for :term:`contig` and `stop`,
+        compatibility as synonyms for :term:`contig` and `stop`,
         respectively.  Alternatively, a :term:`samtools` :term:`region`
         string can be supplied.  The coverage is computed per-base [ACGT].
 
@@ -1575,6 +1584,8 @@ cdef class AlignmentFile(HTSFile):
 
             # count
             seq = read.seq
+            if seq is None:
+                continue
             quality = read.query_qualities
 
             for qpos, refpos in read.get_aligned_pairs(True):
@@ -1779,7 +1790,8 @@ cdef class AlignmentFile(HTSFile):
 
     property nocoordinate:
         """int with total number of reads without coordinates according to the
-        statistics recorded in the index. This is a read-only attribute.
+        statistics recorded in the index, i.e., the statistic printed for "*"
+        by the ``samtools idxstats`` command. This is a read-only attribute.
         """
         def __get__(self):
             self.check_index()
@@ -1790,7 +1802,8 @@ cdef class AlignmentFile(HTSFile):
 
     def get_index_statistics(self):
         """return statistics about mapped/unmapped reads per chromosome as
-        they are stored in the index.
+        they are stored in the index, similarly to the statistics printed
+        by the ``samtools idxstats`` command.
 
         Returns:
             list :
@@ -1846,12 +1859,12 @@ cdef class AlignmentFile(HTSFile):
 
     def __next__(self):
         cdef int ret = self.cnext()
-        if (ret >= 0):
+        if ret >= 0:
             return makeAlignedSegment(self.b, self.header)
-        elif ret == -2:
-            raise IOError('truncated file')
-        else:
+        elif ret == -1:
             raise StopIteration
+        else:
+            raise IOError(read_failure_reason(ret))
 
     ###########################################
     # methods/properties referencing the header
@@ -1886,7 +1899,7 @@ cdef class AlignmentFile(HTSFile):
 
     def get_reference_length(self, reference):
         """
-        return :term:`reference` name corresponding to numerical :term:`tid`
+        return :term:`reference` length corresponding to numerical :term:`tid`
         """
         if self.header is None:
             raise ValueError("header not available in closed files")
@@ -2138,10 +2151,10 @@ cdef class IteratorRowHead(IteratorRow):
         if ret >= 0:
             self.current_row += 1
             return makeAlignedSegment(self.b, self.header)
-        elif ret == -2:
-            raise IOError('truncated file')
-        else:
+        elif ret == -1:
             raise StopIteration
+        else:
+            raise IOError(read_failure_reason(ret))
 
 
 cdef class IteratorRowAll(IteratorRow):
@@ -2183,10 +2196,10 @@ cdef class IteratorRowAll(IteratorRow):
         cdef int ret = self.cnext()
         if ret >= 0:
             return makeAlignedSegment(self.b, self.header)
-        elif ret == -2:
-            raise IOError('truncated file')
-        else:
+        elif ret == -1:
             raise StopIteration
+        else:
+            raise IOError(read_failure_reason(ret))
 
 
 cdef class IteratorRowAllRefs(IteratorRow):
@@ -2217,7 +2230,7 @@ cdef class IteratorRowAllRefs(IteratorRow):
         self.rowiter = IteratorRowRegion(self.samfile,
                                          self.tid,
                                          0,
-                                         1<<29)
+                                         MAX_POS)
         # set htsfile and header of the rowiter
         # to the values in this iterator to reflect multiple_iterators
         self.rowiter.htsfile = self.htsfile
@@ -2301,10 +2314,10 @@ cdef class IteratorRowSelection(IteratorRow):
         cdef int ret = self.cnext()
         if ret >= 0:
             return makeAlignedSegment(self.b, self.header)
-        elif ret == -2:
-            raise IOError('truncated file')
-        else:
+        elif ret == -1:
             raise StopIteration
+        else:
+            raise IOError(read_failure_reason(ret))
 
 
 cdef int __advance_nofilter(void *data, bam1_t *b):
@@ -2434,7 +2447,7 @@ cdef class IteratorColumn:
 
     For reasons of efficiency, the iterator points to the current
     pileup buffer. The pileup buffer is updated at every iteration.
-    This might cause some unexpected behavious. For example,
+    This might cause some unexpected behaviour. For example,
     consider the conversion to a list::
 
        f = AlignmentFile("file.bam", "rb")
@@ -2661,7 +2674,7 @@ cdef class IteratorColumn:
         # reset in order to avoid memory leak messages for iterators
         # that have not been fully consumed
         self._free_pileup_iter()
-        self.plp = <bam_pileup1_t*>NULL
+        self.plp = <const bam_pileup1_t*>NULL
 
         if self.iterdata.seq != NULL:
             free(self.iterdata.seq)
@@ -2858,9 +2871,7 @@ cdef class SNPCall:
 
 
 cdef class IndexedReads:
-    """*(AlignmentFile samfile, multiple_iterators=True)
-
-    Index a Sam/BAM-file by query name while keeping the
+    """Index a Sam/BAM-file by query name while keeping the
     original sort order intact.
 
     The index is kept in memory and can be substantial.
diff --git a/pysam/libcbcf.pyx b/pysam/libcbcf.pyx
index c9bcbd2..05a5fe8 100644
--- a/pysam/libcbcf.pyx
+++ b/pysam/libcbcf.pyx
@@ -106,6 +106,24 @@ from pysam.utils import unquoted_str
 __all__ = ['VariantFile',
            'VariantHeader',
            'VariantHeaderRecord',
+           'VariantHeaderRecords',
+           'VariantMetadata',
+           'VariantHeaderMetadata',
+           'VariantContig',
+           'VariantHeaderContigs',
+           'VariantHeaderSamples',
+           'VariantRecordFilter',
+           'VariantRecordFormat',
+           'VariantRecordInfo',
+           'VariantRecordSamples',
+           'VariantRecord',
+           'VariantRecordSample',
+           'BaseIndex',
+           'BCFIndex',
+           'TabixIndex',
+           'BaseIterator',
+           'BCFIterator',
+           'TabixIterator',
            'VariantRecord']
 
 ########################################################################
@@ -125,7 +143,7 @@ cdef tuple METADATA_LENGTHS = ('FIXED', 'VARIABLE', 'A', 'G', 'R')
 ########################################################################
 
 from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
-from pysam.libcutils cimport encode_filename, from_string_and_size
+from pysam.libcutils cimport encode_filename, from_string_and_size, decode_bytes
 
 
 ########################################################################
@@ -166,7 +184,7 @@ cdef inline bcf_str_cache_get_charptr(const char* s):
 ########################################################################
 
 cdef int comb(int n, int k) except -1:
-    """Return binomial coeffient: n choose k
+    """Return binomial coefficient: n choose k
 
     >>> comb(5, 1)
     5
@@ -284,7 +302,7 @@ cdef bcf_array_to_object(void *data, int type, ssize_t n, ssize_t count, int sca
             else:
                 # Otherwise, copy the entire block
                 b = datac[:n]
-            value = tuple(v.decode('utf-8') if v and v != bcf_str_missing else None for v in b.split(b','))
+            value = tuple(decode_bytes(v, 'utf-8') if v and v != bcf_str_missing else None for v in b.split(b','))
     else:
         value = []
         if type == BCF_BT_INT8:
@@ -3141,7 +3159,7 @@ cdef class VariantRecord(object):
         # causes a memory leak https://github.com/pysam-developers/pysam/issues/773
         # return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
         if (r.d.m_id == 0):
-            raise ValueError('Error extracing ID')
+            raise ValueError('Error extracting ID')
         return charptr_to_str(r.d.id) if r.d.id != b'.' else None
 
     @id.setter
@@ -3755,7 +3773,7 @@ cdef class BaseIterator(object):
     pass
 
 
-# Interal function to clean up after iteration stop or failure.
+# Internal function to clean up after iteration stop or failure.
 # This would be a nested function if it weren't a cdef function.
 cdef void _stop_BCFIterator(BCFIterator self, bcf1_t *record):
     bcf_destroy1(record)
@@ -3786,7 +3804,7 @@ cdef class BCFIterator(BaseIterator):
         try:
             rid = index.refmap[contig]
         except KeyError:
-            # A query for a non-existant contig yields an empty iterator, does not raise an error
+            # A query for a non-existent contig yields an empty iterator, does not raise an error
             self.iter = NULL
             return
 
@@ -3874,7 +3892,7 @@ cdef class TabixIterator(BaseIterator):
         try:
             rid = index.refmap[contig]
         except KeyError:
-            # A query for a non-existant contig yields an empty iterator, does not raise an error
+            # A query for a non-existent contig yields an empty iterator, does not raise an error
             self.iter = NULL
             return
 
@@ -4346,9 +4364,10 @@ cdef class VariantFile(HTSFile):
         return bcf_str_cache_get_charptr(bcf_hdr_id2name(hdr, rid))
 
     def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False, end=None, reference=None):
-        """fetch records in a :term:`region` using 0-based indexing. The
-        region is specified by :term:`contig`, *start* and *end*.
-        Alternatively, a samtools :term:`region` string can be supplied.
+        """fetch records in a :term:`region`, specified either by
+        :term:`contig`, *start*, and *end* (which are 0-based, half-open);
+        or alternatively by a samtools :term:`region` string (which is
+        1-based inclusive).
 
         Without *contig* or *region* all mapped records will be fetched.  The
         records will be returned ordered by contig, which will not necessarily
diff --git a/pysam/libcbcftools.pxd b/pysam/libcbcftools.pxd
index 62a6f3d..d57f784 100644
--- a/pysam/libcbcftools.pxd
+++ b/pysam/libcbcftools.pxd
@@ -1,6 +1,6 @@
 cdef extern from "bcftools.pysam.h":
 
-    int bcftools_main(int argc, char *argv[])
+    int bcftools_dispatch(int argc, char *argv[])
     void bcftools_set_stderr(int fd)
     void bcftools_close_stderr()
     void bcftools_set_stdout(int fd)
diff --git a/pysam/libcfaidx.pyx b/pysam/libcfaidx.pyx
index a70d42d..e73adf9 100644
--- a/pysam/libcfaidx.pyx
+++ b/pysam/libcfaidx.pyx
@@ -496,7 +496,7 @@ cdef class FastxRecord:
 
 
 cdef class FastxFile:
-    """Stream access to :term:`fasta` or :term:`fastq` formatted files.
+    r"""Stream access to :term:`fasta` or :term:`fastq` formatted files.
 
     The file is automatically opened.
 
@@ -541,7 +541,7 @@ cdef class FastxFile:
     ...        print(entry.quality)
     >>> with pysam.FastxFile(filename) as fin, open(out_filename, mode='w') as fout:
     ...    for entry in fin:
-    ...        fout.write(str(entry))
+    ...        fout.write(str(entry) + '\n')
 
     """
     def __cinit__(self, *args, **kwargs):
diff --git a/pysam/libchtslib.pxd b/pysam/libchtslib.pxd
index 370e492..9684ef9 100644
--- a/pysam/libchtslib.pxd
+++ b/pysam/libchtslib.pxd
@@ -275,7 +275,7 @@ cdef extern from "htslib/bgzf.h" nogil:
     int SEEK_SET
 
     #  Return a virtual file pointer to the current location in the file.
-    #  No interpetation of the value should be made, other than a subsequent
+    #  No interpretation of the value should be made, other than a subsequent
     #  call to bgzf_seek can be used to position the file at the same point.
     #  Return value is non-negative on success.
     int64_t bgzf_tell(BGZF *fp)
@@ -326,7 +326,7 @@ cdef extern from "htslib/bgzf.h" nogil:
     #  Read one line from a BGZF file. It is faster than bgzf_getc()
     #
     #  @param fp     BGZF file handler
-    #  @param delim  delimitor
+    #  @param delim  delimiter
     #  @param str    string to write to; must be initialized
     #  @return       length of the string; 0 on end-of-file; negative on error
     int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
@@ -796,7 +796,7 @@ cdef extern from "htslib/hts.h" nogil:
 
     ctypedef struct hts_md5_context
 
-    # /*! @abstract   Intialises an MD5 context.
+    # /*! @abstract   Initialises an MD5 context.
     #  *  @discussion
     #  *    The expected use is to allocate an hts_md5_context using
     #  *    hts_md5_init().  This pointer is then passed into one or more calls
@@ -1353,10 +1353,10 @@ cdef extern from "htslib/tbx.h" nogil:
 
     # tbx.h definitions
     int8_t TBX_MAX_SHIFT
-    int8_t TBX_GENERIC
-    int8_t TBX_SAM
-    int8_t TBX_VCF
-    int8_t TBX_UCSC
+    int32_t TBX_GENERIC
+    int32_t TBX_SAM
+    int32_t TBX_VCF
+    int32_t TBX_UCSC
 
     ctypedef struct tbx_conf_t:
         int32_t preset
@@ -1418,7 +1418,7 @@ cdef extern from "htslib/vcf.h" nogil:
 
     # === Dictionary ===
     #
-    # The header keeps three dictonaries. The first keeps IDs in the
+    # The header keeps three dictionaries. The first keeps IDs in the
     # "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths
     # in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[]
     # is the actual hash table, which is opaque to the end users. In the hash
@@ -2112,8 +2112,7 @@ cdef extern from "htslib/vcfutils.h" nogil:
     # be determined.
     #
     # The value of @which determines if existing INFO/AC,AN can be
-    # used (BCF_UN_INFO) and and if indv fields can be splitted
-    # (BCF_UN_FMT).
+    # used (BCF_UN_INFO) and and if indv fields can be split (BCF_UN_FMT).
     int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)
 
     # bcf_gt_type() - determines type of the genotype
@@ -2261,7 +2260,7 @@ cdef extern from "htslib/cram.h" nogil:
     # the container, meaning multiple compression headers to manipulate.
     # Changing RG may change the size of the compression header and
     # therefore the length field in the container.  Hence we rewrite all
-    # blocks just incase and also emit the adjusted container.
+    # blocks just in case and also emit the adjusted container.
     #
     # The current implementation can only cope with renumbering a single
     # RG (and only then if it is using HUFFMAN or BETA codecs).  In
@@ -2511,7 +2510,7 @@ cdef extern from "htslib/cram.h" nogil:
     #         2 if the file is a stream and thus unseekable
     #         1 if the file contains an EOF block
     #         0 if the file does not contain an EOF block
-    #        -1 if an error occured whilst reading the file or we could not seek back to where we were
+    #        -1 if an error occurred whilst reading the file or we could not seek back to where we were
     #
     #
     int cram_check_EOF(cram_fd *fd)
diff --git a/pysam/libchtslib.pyx b/pysam/libchtslib.pyx
index 92d4e8f..778fc23 100644
--- a/pysam/libchtslib.pyx
+++ b/pysam/libchtslib.pyx
@@ -72,7 +72,7 @@ cdef class HFile(object):
     cdef hFILE *fp
     cdef readonly object name, mode
 
-    def __init__(self, name, mode='r', closedf=True):
+    def __init__(self, name, mode='r', closefd=True):
         self._open(name, mode, closefd=True)
 
     def __dealloc__(self):
@@ -585,7 +585,7 @@ cdef class HTSFile(object):
                 rval = hts_opt_apply(self.htsfile, opts)
                 if rval != 0:
                     hts_opt_free(opts)
-                    raise RuntimeError('An error occured while applying the requested format options')
+                    raise RuntimeError('An error occurred while applying the requested format options')
                 hts_opt_free(opts)
 
     def parse_region(self, contig=None, start=None, stop=None,
@@ -595,7 +595,7 @@ cdef class HTSFile(object):
         either be specified by :term:`contig`, `start` and
         `stop`. `start` and `stop` denote 0-based, half-open
         intervals. :term:`reference` and `end` are also accepted for
-        backward compatiblity as synonyms for :term:`contig` and
+        backward compatibility as synonyms for :term:`contig` and
         `stop`, respectively.
 
         Alternatively, a samtools :term:`region` string can be
diff --git a/pysam/libcsamtools.pxd b/pysam/libcsamtools.pxd
index 70fda60..3c39476 100644
--- a/pysam/libcsamtools.pxd
+++ b/pysam/libcsamtools.pxd
@@ -1,6 +1,6 @@
 cdef extern from "samtools.pysam.h":
 
-    int samtools_main(int argc, char *argv[])
+    int samtools_dispatch(int argc, char *argv[])
     void samtools_set_stderr(int fd)
     void samtools_close_stderr()
     void samtools_set_stdout(int fd)
diff --git a/pysam/libctabix.pyx b/pysam/libctabix.pyx
index e581b61..4436420 100644
--- a/pysam/libctabix.pyx
+++ b/pysam/libctabix.pyx
@@ -53,7 +53,6 @@
 # DEALINGS IN THE SOFTWARE.
 #
 ###############################################################################
-import binascii
 import os
 import sys
 
@@ -75,8 +74,8 @@ from pysam.libchtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
     tbx_index_build2, tbx_index_load2, tbx_itr_queryi, tbx_itr_querys, \
     tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \
     tbx_destroy, hisremote, region_list, hts_getline, \
-    TBX_GENERIC, TBX_SAM, TBX_VCF, TBX_UCSC, htsExactFormat, bcf, \
-    bcf_index_build2
+    TBX_GENERIC, TBX_SAM, TBX_VCF, TBX_UCSC, hts_get_format, htsFormat, \
+    no_compression, bcf, bcf_index_build2
 
 from pysam.libcutils cimport force_bytes, force_str, charptr_to_str
 from pysam.libcutils cimport encode_filename, from_string_and_size
@@ -302,7 +301,7 @@ cdef class TabixFile:
 
     index : string
         The filename of the index. If not set, the default is to
-        assume that the index is called ``filename.tbi`
+        assume that the index is called ``filename.tbi``
 
     mode : char
         The file opening mode. Currently, only ``r`` is permitted.
@@ -581,7 +580,7 @@ cdef class TabixFile:
     property contigs:
         '''list of chromosome names'''
         def __get__(self):
-            cdef char ** sequences
+            cdef const char ** sequences
             cdef int nsequences
             
             with nogil:
@@ -880,13 +879,6 @@ def tabix_compress(filename_in,
             raise IOError("error %i when closing file %s" % (r, filename_in))
 
 
-def is_gzip_file(filename):
-    gzip_magic_hex = b'1f8b'
-    fd = os.open(filename, os.O_RDONLY)
-    header = os.read(fd, 2)
-    return header == binascii.a2b_hex(gzip_magic_hex)
-
-
 def tabix_index(filename,
                 force=False,
                 seq_col=None,
@@ -928,16 +920,13 @@ def tabix_index(filename,
     compressed. The original file will be removed and only the compressed
     file will be retained.
 
-    *min-shift* sets the minimal interval size to 1<<INT; 0 for the
-    old tabix index. The default of -1 is changed inside htslib to 
-    the old tabix default of 0.
+    By default or when *min_shift* is 0, creates a TBI index. If *min_shift*
+    is greater than zero and/or *csi* is True, creates a CSI index with a
+    minimal interval size of 1<<*min_shift* (1<<14 if only *csi* is set).
 
     *index* controls the filename which should be used for creating the index.
     If not set, the default is to append ``.tbi`` to *filename*.
 
-    If *csi* is set, create a CSI index, the default is to create a
-    TBI index.
-
     When automatically compressing files, if *keep_original* is set the
     uncompressed file will not be deleted.
 
@@ -945,27 +934,29 @@ def tabix_index(filename,
 
     '''
     
-    if not os.path.exists(filename):
-        raise IOError("No such file '%s'" % filename)
-
     if preset is None and \
        (seq_col is None or start_col is None or end_col is None):
         raise ValueError(
             "neither preset nor seq_col,start_col and end_col given")
 
-    if not is_gzip_file(filename):
-        tabix_compress(filename, filename + ".gz", force=force)
-        if not keep_original:
-            os.unlink(filename)
-        filename += ".gz"
-
     fn = encode_filename(filename)
     cdef char *cfn = fn
 
     cdef htsFile *fp = hts_open(cfn, "r")
-    cdef htsExactFormat fmt = fp.format.format
+    if fp == NULL:
+        raise IOError("Could not open file '%s': %s" % (filename, force_str(strerror(errno))))
+
+    cdef htsFormat fmt = hts_get_format(fp)[0]
     hts_close(fp)
-    
+
+    if fmt.compression == no_compression:
+        tabix_compress(filename, filename + ".gz", force=force)
+        if not keep_original:
+            os.unlink(filename)
+        filename += ".gz"
+        fn = encode_filename(filename)
+        cfn = fn
+
     # columns (1-based):
     #   preset-code, contig, start, end, metachar for
     #     comments, lines to ignore at beginning
@@ -979,10 +970,8 @@ def tabix_index(filename,
         }
     
     conf_data = None
-    if preset == "bcf" or fmt == bcf:
+    if preset == "bcf" or fmt.format == bcf:
         csi = True
-        if min_shift == -1:
-            min_shift = 14
     elif preset:
         try:
             conf_data = preset2conf[preset]
@@ -1010,10 +999,13 @@ def tabix_index(filename,
     if conf_data:
         conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data
 
-    if csi:
+    if csi or min_shift > 0:
         suffix = ".csi"
+        if min_shift <= 0: min_shift = 14
     else:
         suffix = ".tbi"
+        min_shift = 0
+
     index = index or filename + suffix    
     fn_index = encode_filename(index)
 
@@ -1024,7 +1016,7 @@ def tabix_index(filename,
     cdef char *fnidx = fn_index
     cdef int retval = 0
 
-    if csi and fmt == bcf:
+    if csi and fmt.format == bcf:
         with nogil:
             retval = bcf_index_build2(cfn, fnidx, min_shift)
     else:
diff --git a/pysam/libcutils.pxd b/pysam/libcutils.pxd
index 9e1cce1..d78b706 100644
--- a/pysam/libcutils.pxd
+++ b/pysam/libcutils.pxd
@@ -14,15 +14,21 @@ cpdef array_to_qualitystring(c_array.array arr, int offset=*)
 cpdef qualities_to_qualitystring(qualities, int offset=*)
 
 ########################################################################
+## String encoding configuration facilities
 ########################################################################
+
+cpdef get_encoding_error_handler()
+cpdef set_encoding_error_handler(name)
+
 ########################################################################
 ## Python 3 compatibility functions
 ########################################################################
-cdef charptr_to_str(const char *s, encoding=*)
-cdef bytes charptr_to_bytes(const char *s, encoding=*)
-cdef charptr_to_str_w_len(const char* s, size_t n, encoding=*)
-cdef force_str(object s, encoding=*)
-cdef bytes force_bytes(object s, encoding=*)
+cdef charptr_to_str(const char *s, encoding=*, errors=*)
+cdef bytes charptr_to_bytes(const char *s, encoding=*, errors=*)
+cdef charptr_to_str_w_len(const char* s, size_t n, encoding=*, errors=*)
+cdef force_str(object s, encoding=*, errors=*)
+cdef bytes force_bytes(object s, encoding=*, errors=*)
+cdef decode_bytes(bytes s, encoding=*, errors=*)
 cdef bytes encode_filename(object filename)
 cdef from_string_and_size(const char *s, size_t length)
 
diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx
index fe61bb8..adc9cec 100644
--- a/pysam/libcutils.pyx
+++ b/pysam/libcutils.pyx
@@ -6,6 +6,7 @@ import tempfile
 import os
 import io
 from contextlib import contextmanager
+from codecs import register_error
 
 from cpython.version cimport PY_MAJOR_VERSION, PY_MINOR_VERSION
 from cpython cimport PyBytes_Check, PyUnicode_Check
@@ -17,10 +18,10 @@ from libc.stdio cimport fprintf, stderr, fflush
 from libc.stdio cimport stdout as c_stdout
 from posix.fcntl cimport open as c_open, O_WRONLY
 
-from libcsamtools cimport samtools_main, samtools_set_stdout, samtools_set_stderr, \
+from libcsamtools cimport samtools_dispatch, samtools_set_stdout, samtools_set_stderr, \
     samtools_close_stdout, samtools_close_stderr, samtools_set_stdout_fn, samtools_set_optind
 
-from libcbcftools cimport bcftools_main, bcftools_set_stdout, bcftools_set_stderr, \
+from libcbcftools cimport bcftools_dispatch, bcftools_set_stdout, bcftools_set_stderr, \
     bcftools_close_stdout, bcftools_close_stderr, bcftools_set_stdout_fn, bcftools_set_optind
 
 #####################################################################
@@ -82,7 +83,27 @@ cpdef qualities_to_qualitystring(qualities, int offset=33):
 
 
 ########################################################################
+## String encoding configuration facilities
 ########################################################################
+
+# Codec error handler that just interprets each bad byte as ISO-8859-1.
+def latin1_replace(exception):
+    return (chr(exception.object[exception.start]), exception.end)
+
+register_error('pysam.latin1replace', latin1_replace)
+
+
+cdef str ERROR_HANDLER = 'strict'
+
+cpdef get_encoding_error_handler():
+    return ERROR_HANDLER
+
+cpdef set_encoding_error_handler(name):
+    global ERROR_HANDLER
+    previous = ERROR_HANDLER
+    ERROR_HANDLER = name
+    return previous
+
 ########################################################################
 ## Python 3 compatibility functions
 ########################################################################
@@ -91,7 +112,7 @@ cdef bint IS_PYTHON3 = PY_MAJOR_VERSION >= 3
 
 cdef from_string_and_size(const char* s, size_t length):
     if IS_PYTHON3:
-        return s[:length].decode("utf8")
+        return s[:length].decode('utf-8', ERROR_HANDLER)
     else:
         return s[:length]
 
@@ -115,7 +136,7 @@ cdef bytes encode_filename(object filename):
         raise TypeError("Argument must be string or unicode.")
 
 
-cdef bytes force_bytes(object s, encoding=TEXT_ENCODING):
+cdef bytes force_bytes(object s, encoding=None, errors=None):
     """convert string or unicode object to bytes, assuming
     utf8 encoding.
     """
@@ -124,37 +145,37 @@ cdef bytes force_bytes(object s, encoding=TEXT_ENCODING):
     elif PyBytes_Check(s):
         return s
     elif PyUnicode_Check(s):
-        return s.encode(encoding)
+        return s.encode(encoding or TEXT_ENCODING, errors or ERROR_HANDLER)
     else:
         raise TypeError("Argument must be string, bytes or unicode.")
 
 
-cdef charptr_to_str(const char* s, encoding=TEXT_ENCODING):
+cdef charptr_to_str(const char* s, encoding=None, errors=None):
     if s == NULL:
         return None
     if PY_MAJOR_VERSION < 3:
         return s
     else:
-        return s.decode(encoding)
+        return s.decode(encoding or TEXT_ENCODING, errors or ERROR_HANDLER)
 
 
-cdef charptr_to_str_w_len(const char* s, size_t n, encoding=TEXT_ENCODING):
+cdef charptr_to_str_w_len(const char* s, size_t n, encoding=None, errors=None):
     if s == NULL:
         return None
     if PY_MAJOR_VERSION < 3:
         return s[:n]
     else:
-        return s[:n].decode(encoding)
+        return s[:n].decode(encoding or TEXT_ENCODING, errors or ERROR_HANDLER)
 
 
-cdef bytes charptr_to_bytes(const char* s, encoding=TEXT_ENCODING):
+cdef bytes charptr_to_bytes(const char* s, encoding=None, errors=None):
     if s == NULL:
         return None
     else:
         return s
 
 
-cdef force_str(object s, encoding=TEXT_ENCODING):
+cdef force_str(object s, encoding=None, errors=None):
     """Return s converted to str type of current Python
     (bytes in Py2, unicode in Py3)"""
     if s is None:
@@ -162,12 +183,21 @@ cdef force_str(object s, encoding=TEXT_ENCODING):
     if PY_MAJOR_VERSION < 3:
         return s
     elif PyBytes_Check(s):
-        return s.decode(encoding)
+        return s.decode(encoding or TEXT_ENCODING, errors or ERROR_HANDLER)
     else:
         # assume unicode
         return s
 
 
+cdef decode_bytes(bytes s, encoding=None, errors=None):
+    """Return s converted to current Python's str type,
+    always decoding even in Python 2"""
+    if s is None:
+        return None
+    else:
+        return s.decode(encoding or TEXT_ENCODING, errors or ERROR_HANDLER)
+
+
 cpdef parse_region(contig=None,
                    start=None,
                    stop=None,
@@ -179,7 +209,7 @@ cpdef parse_region(contig=None,
     `end`. `start` and `end` denote 0-based, half-open intervals.
     
     :term:`reference` and `end` are also accepted for backward
-    compatiblity as synonyms for :term:`contig` and `stop`,
+    compatibility as synonyms for :term:`contig` and `stop`,
     respectively.
 
     Alternatively, a samtools :term:`region` string can be supplied.
@@ -386,13 +416,13 @@ def _pysam_dispatch(collection,
     if collection == b"samtools":
         samtools_set_stdout(stdout_h)
         samtools_set_stderr(stderr_h)
-        retval = samtools_main(n + 2, cargs)
+        retval = samtools_dispatch(n + 2, cargs)
         samtools_close_stdout()
         samtools_close_stderr()
     elif collection == b"bcftools":
         bcftools_set_stdout(stdout_h)
         bcftools_set_stderr(stderr_h)
-        retval = bcftools_main(n + 2, cargs)
+        retval = bcftools_dispatch(n + 2, cargs)
         bcftools_close_stdout()
         bcftools_close_stderr()
 
@@ -425,6 +455,10 @@ def _pysam_dispatch(collection,
     return retval, out_stderr, out_stdout
 
 
-__all__ = ["qualitystring_to_array",
-           "array_to_qualitystring",
-           "qualities_to_qualitystring"]
+__all__ = [
+    "qualitystring_to_array",
+    "array_to_qualitystring",
+    "qualities_to_qualitystring",
+    "get_encoding_error_handler",
+    "set_encoding_error_handler",
+]
diff --git a/pysam/samtools.py b/pysam/samtools.py
index 58cc2ee..9042cc1 100644
--- a/pysam/samtools.py
+++ b/pysam/samtools.py
@@ -37,6 +37,10 @@ SAMTOOLS_DISPATCH = {
     "quickcheck": ("quickcheck", None),
     "split": ("split", None),
     "flags": ("flags", None),
+    "ampliconclip": ("ampliconclip", None),
+    "ampliconstats": ("ampliconstats", None),
+    "version": ("version", None),
+    "fqimport": ("import", None),
 }
 
 # instantiate samtools commands as python functions
diff --git a/pysam/version.h b/pysam/version.h
index 7c4ea99..33676ea 100644
--- a/pysam/version.h
+++ b/pysam/version.h
@@ -1,5 +1,5 @@
 // Version information used while compiling samtools, bcftools, and htslib
 
-#define SAMTOOLS_VERSION "1.10 (pysam)"
-#define BCFTOOLS_VERSION "1.10.2 (pysam)"
-#define HTS_VERSION_TEXT "1.10.2 (pysam)"
+#define SAMTOOLS_VERSION "1.13 (pysam)"
+#define BCFTOOLS_VERSION "1.13 (pysam)"
+#define HTS_VERSION_TEXT "1.13 (pysam)"
diff --git a/pysam/version.py b/pysam/version.py
index 3ad71c7..8c871ba 100644
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,6 +1,6 @@
 # pysam versioning information
-__version__ = "0.16.0.1"
+__version__ = "0.17.0"
 
-__samtools_version__ = "1.10"
-__bcftools_version__ = "1.10.2"
-__htslib_version__ = "1.10.2"
+__samtools_version__ = "1.13"
+__bcftools_version__ = "1.13"
+__htslib_version__ = "1.13"
diff --git a/samtools/LICENSE b/samtools/LICENSE
index 3c56f48..cd102b8 100644
--- a/samtools/LICENSE
+++ b/samtools/LICENSE
@@ -1,6 +1,6 @@
 The MIT/Expat License
 
-Copyright (C) 2008-2019 Genome Research Ltd.
+Copyright (C) 2008-2021 Genome Research Ltd.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/samtools/README b/samtools/README
index bb7af6c..dd27670 100644
--- a/samtools/README
+++ b/samtools/README
@@ -9,7 +9,7 @@ Building samtools
 The typical simple case of building Samtools using the HTSlib bundled within
 this Samtools release tarball is done as follows:
 
-    cd .../samtools-1.10 # Within the unpacked release directory
+    cd .../samtools-1.13 # Within the unpacked release directory
     ./configure
     make
 
@@ -21,7 +21,7 @@ install samtools etc properly into a directory of your choosing.  Building for
 installation using the HTSlib bundled within this Samtools release tarball,
 and building the various HTSlib utilities such as bgzip is done as follows:
 
-    cd .../samtools-1.10 # Within the unpacked release directory
+    cd .../samtools-1.13 # Within the unpacked release directory
     ./configure --prefix=/path/to/location
     make all all-htslib
     make install install-htslib
@@ -48,7 +48,7 @@ There are two advantages to this:
 To build with plug-ins, you need to use the --enable-plugins configure option
 as follows:
 
-    cd .../samtools-1.10 # Within the unpacked release directory
+    cd .../samtools-1.13 # Within the unpacked release directory
     ./configure --enable-plugins --prefix=/path/to/location
     make all all-htslib
     make install install-htslib
@@ -66,8 +66,8 @@ Setting --with-plugin-path is useful if you want to run directly from
 the source distribution instead of installing the package.  In that case
 you can use:
 
-    cd .../samtools-1.10 # Within the unpacked release directory
-    ./configure --enable-plugins --with-plugin-path=$PWD/htslib-1.10
+    cd .../samtools-1.13 # Within the unpacked release directory
+    ./configure --enable-plugins --with-plugin-path=$PWD/htslib-1.13
     make all all-htslib
 
 It is possible to override the built-in search path using the HTS_PATH
@@ -99,3 +99,28 @@ Benchmarks comparing the various zlibs are available at:
 
 It is recommended that you perform your own rigorous tests for an entire
 pipeline if you wish to switch to one of the optimised zlib implementations.
+
+Citing
+======
+
+Please cite this paper when using SAMtools for your publications:
+
+Twelve years of SAMtools and BCFtools
+Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li
+GigaScience, Volume 10, Issue 2, February 2021, giab008, https://doi.org/10.1093/gigascience/giab008
+
+@article{10.1093/gigascience/giab008,
+    author = {Danecek, Petr and Bonfield, James K and Liddle, Jennifer and Marshall, John and Ohan, Valeriu and Pollard, Martin O and Whitwham, Andrew and Keane, Thomas and McCarthy, Shane A and Davies, Robert M and Li, Heng},
+    title = "{Twelve years of SAMtools and BCFtools}",
+    journal = {GigaScience},
+    volume = {10},
+    number = {2},
+    year = {2021},
+    month = {02},
+    abstract = "{SAMtools and BCFtools are widely used programs for processing and analysing high-throughput sequencing data. They include tools for file format conversion and manipulation, sorting, querying, statistics, variant calling, and effect analysis amongst other methods.The first version appeared online 12 years ago and has been maintained and further developed ever since, with many new features and improvements added over the years. The SAMtools and BCFtools packages represent a unique collection of tools that have been used in numerous other software projects and countless genomic pipelines.Both SAMtools and BCFtools are freely available on GitHub under the permissive MIT licence, free for both non-commercial and commercial use. Both packages have been installed \\&gt;1 million times via Bioconda. The source code and documentation are available from https://www.htslib.org.}",
+    issn = {2047-217X},
+    doi = {10.1093/gigascience/giab008},
+    url = {https://doi.org/10.1093/gigascience/giab008},
+    note = {giab008},
+    eprint = {https://academic.oup.com/gigascience/article-pdf/10/2/giab008/36332246/giab008.pdf},
+}
diff --git a/samtools/amplicon_stats.c b/samtools/amplicon_stats.c
new file mode 100644
index 0000000..62bb15c
--- /dev/null
+++ b/samtools/amplicon_stats.c
@@ -0,0 +1,1754 @@
+/*  stats.c -- This is the former bamcheck integrated into samtools/htslib.
+
+    Copyright (C) 2020-2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+/*
+ * This tool is designed to give "samtools stats" style output, but dedicated
+ * to small amplicon sequencing projects.  It gathers stats on the
+ * distribution of reads across amplicons.
+ */
+
+/*
+ * TODO:
+ * - Cope with multiple references.  What do we do here?  Just request one?
+ * - Permit regions rather than consuming whole file (maybe solves above).
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <math.h>
+
+#include <htslib/sam.h>
+#include <htslib/khash.h>
+
+#include "samtools.h"
+#include "sam_opts.h"
+#include "bam_ampliconclip.h"
+
+KHASH_MAP_INIT_INT64(tcoord, int64_t)
+KHASH_MAP_INIT_STR(qname, int64_t)
+
+#ifndef MIN
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#endif
+
+#ifndef MAX
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#endif
+
+#ifndef ABS
+#define ABS(a) ((a)>=0?(a):-(a))
+#endif
+
+#define TCOORD_MIN_COUNT   10
+#define MAX_AMP 1000       // Default maximum number of amplicons
+#define MAX_AMP_LEN 1000   // Default maximum length of any single amplicon
+#define MAX_PRIMER_PER_AMPLICON 4  // Max primers per LEFT/RIGHT
+#define MAX_DEPTH 5        // Number of different depths permitted
+
+typedef struct {
+    sam_global_args ga;
+    uint32_t flag_require;
+    uint32_t flag_filter;
+    int max_delta;   // Used for matching read to amplicon primer loc
+    int min_depth[MAX_DEPTH]; // Used for coverage; must be >= min_depth deep
+    int use_sample_name;
+    int max_amp;     // Total number of amplicons
+    int max_amp_len; // Maximum length of an individual amplicon
+    double depth_bin;// aggregate depth within this fraction
+    int tlen_adj;    // Adjust tlen by this amount, due to clip but no fixmate
+    FILE *out_fp;
+    char *argv;
+    int tcoord_min_count;
+    int tcoord_bin;
+    int multi_ref;
+} astats_args_t;
+
+typedef struct {
+    int nseq;       // total sequence count
+    int nfiltered;  // sequence filtered
+    int nfailprimer;// count of sequences not matching the primer locations
+
+    // Sizes of memory allocated below, to permit reset
+    int max_amp, max_amp_len, max_len;
+
+    // Summary across all samples, sum(x) plus sum(x^2) for s.d. calc
+    int64_t *nreads, *nreads2;          // [max_amp]
+    double  *nfull_reads;               // [max_amp]; 0.5/read if paired.
+    double  *nrperc, *nrperc2;          // [max_amp]
+    int64_t *nbases, *nbases2;          // [max_amp]
+    int64_t *coverage;                  // [max_amp][max_amp_len]
+    double  (*covered_perc)[MAX_DEPTH]; // [max_amp][MAX_DEPTH]
+    double  (*covered_perc2)[MAX_DEPTH];// [max_amp][MAX_DEPTH];
+    khash_t(tcoord) **tcoord;           // [max_amp+1]
+
+    // 0 is correct pair, 1 is incorrect pair, 2 is unidentified
+    int     (*amp_dist)[3];             // [MAX_AMP][3];
+
+    int *depth_valid; // [max_len]
+    int *depth_all;   // [max_len]
+    khash_t(qname) *qend;  // queryname end, for overlap removal
+} astats_t;
+
+// We can have multiple primers for LEFT / RIGHT, so this
+// permits detection by any compatible combination.
+// One reference:
+typedef struct {
+    int64_t left[MAX_PRIMER_PER_AMPLICON];
+    int nleft;
+    int64_t right[MAX_PRIMER_PER_AMPLICON];
+    int nright;
+    int64_t max_left, min_right; // inner dimensions
+    int64_t min_left, max_right; // outer dimensions
+} amplicon_t;
+
+// Multiple references, we have an array of amplicons_t - one per used ref.
+// We have per reference local and global stats here, as some of the stats
+// are coordinate based.  However we report them combined together as a single
+// list across all references.
+// "namp" is the number of amplicons in this reference, but they're
+// numbered first_amp to first_amp+namp-1 inclusively.
+typedef struct {
+    int tid, namp;
+    int64_t len;
+    bed_entry_list_t *sites;
+    amplicon_t *amp;
+    astats_t *lstats, *gstats; // local (1 file) and global (all file) stats
+    const char *ref;           // ref name (pointer to the bed hash table key)
+    int first_amp;             // first amplicon number for this ref
+} amplicons_t;
+
+// Reinitialised for each new reference/chromosome.
+// Counts from 1 to namp, -1 for no match and 0 for ?.
+static int *pos2start = NULL;
+static int *pos2end = NULL;
+static int pos2size = 0; // allocated size of pos2start/end
+
+// Lookup table to go from position to amplicon based on
+// read start / end.
+static int initialise_amp_pos_lookup(astats_args_t *args,
+                                     amplicons_t *amps,
+                                     int ref) {
+    int64_t i, j;
+    amplicon_t *amp = amps[ref].amp;
+    int64_t max_len = amps[ref].len;
+    int namp = amps[ref].namp;
+
+    if (max_len+1 > pos2size) {
+        if (!(pos2start = realloc(pos2start, (max_len+1)*sizeof(*pos2start))))
+            return -1;
+        if (!(pos2end   = realloc(pos2end,   (max_len+1)*sizeof(*pos2end))))
+            return -1;
+        pos2size = max_len;
+    }
+    for (i = 0; i < max_len; i++)
+        pos2start[i] = pos2end[i] = -1;
+
+    for (i = 0; i < namp; i++) {
+        for (j = 0; j < amp[i].nleft; j++) {
+            int64_t p;
+            for (p = amp[i].left[j] - args->max_delta;
+                 p <= amp[i].left[j] + args->max_delta; p++) {
+                if (p < 1 || p > max_len)
+                    continue;
+                pos2start[p-1] = i;
+            }
+        }
+        for (j = 0; j < amp[i].nright; j++) {
+            int64_t p;
+            for (p = amp[i].right[j] - args->max_delta;
+                 p <= amp[i].right[j] + args->max_delta; p++) {
+                if (p < 1 || p > max_len)
+                    continue;
+                pos2end[p-1] = i;
+            }
+        }
+    }
+
+    return 0;
+}
+
+// Counts amplicons.
+// Assumption: input BED file alternates between LEFT and RIGHT primers
+// per amplicon, thus we can count the number based on the switching
+// orientation.
+static int count_amplicon(bed_entry_list_t *sites) {
+    int i, namp, last_rev = 0;
+    for (i = namp = 0; i < sites->length; i++) {
+        if (sites->bp[i].rev == 0 && last_rev)
+            namp++;
+        last_rev = sites->bp[i].rev;
+    }
+
+    return ++namp;
+}
+
+// We're only interest in the internal part of the amplicon.
+// Our bed file has LEFT start/end followed by RIGHT start/end,
+// so collapse these to LEFT end / RIGHT start.
+//
+// Returns right most amplicon position on success,
+//         < 0 on error
+static int64_t bed2amplicon(astats_args_t *args, bed_entry_list_t *sites,
+                            amplicon_t *amp, int *namp, int do_title,
+                            const char *ref, int first_amp) {
+    int i, j;
+    int64_t max_right = 0;
+    FILE *ofp = args->out_fp;
+
+    *namp = 0;
+
+    // Assume all primers for the same amplicon are adjacent in BED
+    // with all + followed by all -.  Thus - to + signifies next primer set.
+    int last_rev = 0;
+    amp[0].max_left = 0;
+    amp[0].min_right = INT64_MAX;
+    amp[0].min_left = INT64_MAX;
+    amp[0].max_right = 0;
+    if (do_title) {
+        fprintf(ofp, "# Amplicon locations from BED file.\n");
+        fprintf(ofp, "# LEFT/RIGHT are <start>-<end> format and "
+                "comma-separated for alt-primers.\n");
+        if (args->multi_ref)
+            fprintf(ofp, "#\n# AMPLICON\tREF\tNUMBER\tLEFT\tRIGHT\n");
+        else
+            fprintf(ofp, "#\n# AMPLICON\tNUMBER\tLEFT\tRIGHT\n");
+    }
+    for (i = j = 0; i < sites->length; i++) {
+        if (i == 0 && sites->bp[i].rev != 0) {
+            fprintf(stderr, "[ampliconstats] error: BED file should start"
+                    " with the + strand primer\n");
+            return -1;
+        }
+        if (sites->bp[i].rev == 0 && last_rev) {
+            j++;
+            if (j >= args->max_amp) {
+                fprintf(stderr, "[ampliconstats] error: too many amplicons"
+                        " (%d). Use -a option to raise this.\n", j);
+                return -1;
+            }
+            amp[j].max_left = 0;
+            amp[j].min_right = INT64_MAX;
+            amp[j].min_left = INT64_MAX;
+            amp[j].max_right = 0;
+        }
+        if (sites->bp[i].rev == 0) {
+            if (i == 0 || last_rev) {
+                if (j>0) fprintf(ofp, "\n");
+                if (args->multi_ref)
+                    fprintf(ofp, "AMPLICON\t%s\t%d", ref, j+1 + first_amp);
+                else
+                    fprintf(ofp, "AMPLICON\t%d", j+1);
+            }
+            if (amp[j].nleft >= MAX_PRIMER_PER_AMPLICON) {
+                print_error_errno("ampliconstats",
+                                  "too many primers per amplicon (%d).\n",
+                                  MAX_PRIMER_PER_AMPLICON);
+                return -1;
+            }
+            amp[j].left[amp[j].nleft++] = sites->bp[i].right;
+            if (amp[j].max_left < sites->bp[i].right+1)
+                amp[j].max_left = sites->bp[i].right+1;
+            if (amp[j].min_left > sites->bp[i].right+1)
+                amp[j].min_left = sites->bp[i].right+1;
+            // BED file, so left+1 as zero based. right(+1-1) as
+            // BED goes one beyond end (and we want inclusive range).
+            fprintf(ofp, "%c%"PRId64"-%"PRId64, "\t,"[amp[j].nleft > 1],
+                    sites->bp[i].left+1, sites->bp[i].right);
+        } else {
+            if (amp[j].nright >= MAX_PRIMER_PER_AMPLICON) {
+                print_error_errno("ampliconstats",
+                                  "too many primers per amplicon (%d)",
+                                  MAX_PRIMER_PER_AMPLICON);
+                return -1;
+            }
+            amp[j].right[amp[j].nright++] = sites->bp[i].left;
+            if (amp[j].min_right > sites->bp[i].left-1)
+                amp[j].min_right = sites->bp[i].left-1;
+            if (amp[j].max_right < sites->bp[i].left-1) {
+                amp[j].max_right = sites->bp[i].left-1;
+                if (amp[j].max_right - amp[j].min_left + 1 >=
+                    args->max_amp_len) {
+                    fprintf(stderr, "[ampliconstats] error: amplicon "
+                            "longer (%d) than max_amp_len option (%d)\n",
+                            (int)(amp[j].max_right - amp[j].min_left + 2),
+                            args->max_amp_len);
+                    return -1;
+                }
+                if (max_right < amp[j].max_right)
+                    max_right = amp[j].max_right;
+            }
+            fprintf(ofp, "%c%"PRId64"-%"PRId64, "\t,"[amp[j].nright > 1],
+                    sites->bp[i].left+1, sites->bp[i].right);
+        }
+        last_rev = sites->bp[i].rev;
+    }
+    if (last_rev != 1) {
+        fprintf(ofp, "\n"); // useful if going to stdout
+        fprintf(stderr, "[ampliconstats] error: bed file does not end on"
+                " a reverse strand primer.\n");
+        return -1;
+    }
+    *namp = ++j;
+    if (j) fprintf(ofp, "\n");
+
+    if (j >= args->max_amp) {
+        fprintf(stderr, "[ampliconstats] error: "
+                "too many amplicons (%d). Use -a option to raise this.", j);
+        return -1;
+    }
+
+//    for (i = 0; i < *namp; i++) {
+//      printf("%d\t%ld", i, amp[i].length);
+//      for (j = 0; j < amp[i].nleft; j++)
+//          printf("%c%ld", "\t,"[j>0], amp[i].left[j]);
+//      for (j = 0; j < amp[i].nright; j++)
+//          printf("%c%ld", "\t,"[j>0], amp[i].right[j]);
+//      printf("\n");
+//    }
+
+    return max_right;
+}
+
+void stats_free(astats_t *st) {
+    if (!st)
+        return;
+
+    free(st->nreads);
+    free(st->nreads2);
+    free(st->nfull_reads);
+    free(st->nrperc);
+    free(st->nrperc2);
+    free(st->nbases);
+    free(st->nbases2);
+    free(st->coverage);
+    free(st->covered_perc);
+    free(st->covered_perc2);
+    free(st->amp_dist);
+
+    free(st->depth_valid);
+    free(st->depth_all);
+
+    if (st->tcoord) {
+        int i;
+        for (i = 0; i <= st->max_amp; i++) {
+            if (st->tcoord[i])
+                kh_destroy(tcoord, st->tcoord[i]);
+        }
+        free(st->tcoord);
+    }
+
+    khiter_t k;
+    for (k = kh_begin(st->qend); k != kh_end(st->qend); k++)
+        if (kh_exist(st->qend, k))
+            free((void *)kh_key(st->qend, k));
+    kh_destroy(qname, st->qend);
+
+    free(st);
+}
+
+astats_t *stats_alloc(int64_t max_len, int max_amp, int max_amp_len) {
+    astats_t *st = calloc(1, sizeof(*st));
+    if (!st)
+        return NULL;
+
+    st->max_amp = max_amp;
+    st->max_amp_len = max_amp_len;
+    st->max_len = max_len;
+
+    if (!(st->nreads  = calloc(max_amp, sizeof(*st->nreads))))  goto err;
+    if (!(st->nreads2 = calloc(max_amp, sizeof(*st->nreads2)))) goto err;
+    if (!(st->nrperc  = calloc(max_amp, sizeof(*st->nrperc))))  goto err;
+    if (!(st->nrperc2 = calloc(max_amp, sizeof(*st->nrperc2)))) goto err;
+    if (!(st->nbases  = calloc(max_amp, sizeof(*st->nbases))))  goto err;
+    if (!(st->nbases2 = calloc(max_amp, sizeof(*st->nbases2)))) goto err;
+
+    if (!(st->nfull_reads = calloc(max_amp, sizeof(*st->nfull_reads))))
+        goto err;
+
+    if (!(st->coverage = calloc(max_amp*max_amp_len, sizeof(*st->coverage))))
+        goto err;
+
+    if (!(st->covered_perc  = calloc(max_amp, sizeof(*st->covered_perc))))
+        goto err;
+    if (!(st->covered_perc2 = calloc(max_amp, sizeof(*st->covered_perc2))))
+        goto err;
+
+    if (!(st->tcoord = calloc(max_amp+1, sizeof(*st->tcoord)))) goto err;
+    int i;
+    for (i = 0; i <= st->max_amp; i++)
+        if (!(st->tcoord[i] = kh_init(tcoord)))
+            goto err;
+
+    if (!(st->qend = kh_init(qname)))
+        goto err;
+
+    if (!(st->depth_valid = calloc(max_len, sizeof(*st->depth_valid))))
+        goto err;
+    if (!(st->depth_all   = calloc(max_len, sizeof(*st->depth_all))))
+        goto err;
+
+    if (!(st->amp_dist  = calloc(max_amp, sizeof(*st->amp_dist))))  goto err;
+
+    return st;
+
+ err:
+    stats_free(st);
+    return NULL;
+}
+
+static void stats_reset(astats_t *st) {
+    st->nseq = 0;
+    st->nfiltered = 0;
+    st->nfailprimer = 0;
+
+    memset(st->nreads,  0, st->max_amp * sizeof(*st->nreads));
+    memset(st->nreads2, 0, st->max_amp * sizeof(*st->nreads2));
+    memset(st->nfull_reads, 0, st->max_amp * sizeof(*st->nfull_reads));
+
+    memset(st->nrperc,  0, st->max_amp * sizeof(*st->nrperc));
+    memset(st->nrperc2, 0, st->max_amp * sizeof(*st->nrperc2));
+
+    memset(st->nbases,  0, st->max_amp * sizeof(*st->nbases));
+    memset(st->nbases2, 0, st->max_amp * sizeof(*st->nbases2));
+
+    memset(st->coverage, 0, st->max_amp * st->max_amp_len
+           * sizeof(*st->coverage));
+    memset(st->covered_perc,  0, st->max_amp * sizeof(*st->covered_perc));
+    memset(st->covered_perc2, 0, st->max_amp * sizeof(*st->covered_perc2));
+
+    // Keep the allocated entries as it's likely all files will share
+    // the same keys.  Instead we reset counters to zero for common ones
+    // and delete rare ones.
+    int i;
+    for (i = 0; i <= st->max_amp; i++) {
+        khiter_t k;
+        for (k = kh_begin(st->tcoord[i]);
+             k != kh_end(st->tcoord[i]); k++)
+            if (kh_exist(st->tcoord[i], k)) {
+                if (kh_value(st->tcoord[i], k) < 5)
+                    kh_del(tcoord, st->tcoord[i], k);
+                else
+                    kh_value(st->tcoord[i], k) = 0;
+            }
+    }
+
+    khiter_t k;
+    for (k = kh_begin(st->qend); k != kh_end(st->qend); k++)
+        if (kh_exist(st->qend, k))
+            free((void *)kh_key(st->qend, k));
+    kh_clear(qname, st->qend);
+
+    memset(st->depth_valid, 0, st->max_len * sizeof(*st->depth_valid));
+    memset(st->depth_all,   0, st->max_len * sizeof(*st->depth_all));
+    memset(st->amp_dist,  0, st->max_amp * sizeof(*st->amp_dist));
+}
+
+static void amp_stats_reset(amplicons_t *amps, int nref) {
+    int i;
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+        stats_reset(amps[i].lstats);
+    }
+}
+
+static int accumulate_stats(astats_args_t *args, amplicons_t *amps,
+                            bam1_t *b) {
+    int ref = b->core.tid;
+    amplicon_t *amp = amps[ref].amp;
+    astats_t *stats = amps[ref].lstats;
+    int len = amps[ref].len;
+
+    if (!stats)
+        return 0;
+
+    stats->nseq++;
+    if ((b->core.flag & args->flag_require) != args->flag_require ||
+        (b->core.flag & args->flag_filter)  != 0) {
+        stats->nfiltered++;
+        return 0;
+    }
+
+    int64_t start = b->core.pos, mstart = start; // modified start
+    int64_t end = bam_endpos(b), i;
+
+    // Compute all-template-depth and valid-template-depth.
+    // We track current end location per read name so we can remove overlaps.
+    // Potentially we could use this data for a better amplicon-depth
+    // count too, but for now it's purely for the per-base plots.
+    int ret;
+    khiter_t k;
+    int prev_start = 0, prev_end = 0;
+    if ((b->core.flag & BAM_FPAIRED)
+        && !(b->core.flag & (BAM_FSUPPLEMENTARY | BAM_FSECONDARY))) {
+        k = kh_put(qname, stats->qend, bam_get_qname(b), &ret);
+        if (ret == 0) {
+            prev_start = kh_value(stats->qend, k) & 0xffffffff;
+            prev_end = kh_value(stats->qend, k)>>32;
+            mstart = MAX(mstart, prev_end);
+            // Ideally we'd reuse strings so we don't thrash free/malloc.
+            // However let's see if the official way of doing that (malloc
+            // itself) is fast enough first.
+            free((void *)kh_key(stats->qend, k));
+            kh_del(qname, stats->qend, k);
+            //fprintf(stderr, "remove overlap %d to %d\n", (int)start, (int)mstart);
+        } else {
+            if (!(kh_key(stats->qend, k) = strdup(bam_get_qname(b))))
+                return -1;
+
+            kh_value(stats->qend, k) = start | (end << 32);
+        }
+    }
+    for (i = mstart; i < end && i < len; i++)
+        stats->depth_all[i]++;
+    if (i < end) {
+        print_error("ampliconstats", "record %s overhangs end of reference",
+                    bam_get_qname(b));
+        // But keep going, as it's harmless.
+    }
+
+    // On single ended runs, eg ONT or PacBio, we just use the start/end
+    // of the template to assign.
+    int anum = (b->core.flag & BAM_FREVERSE) || !(b->core.flag & BAM_FPAIRED)
+        ? (end-1 >= 0 && end-1 < len ? pos2end[end-1] : -1)
+        : (start >= 0 && start < len ? pos2start[start] : -1);
+
+    // ivar sometimes soft-clips 100% of the bases.
+    // This is essentially unmapped
+    if (end == start && (args->flag_filter & BAM_FUNMAP)) {
+        stats->nfiltered++;
+        return 0;
+    }
+
+    if (anum == -1)
+        stats->nfailprimer++;
+
+    if (anum >= 0) {
+        int64_t c = MIN(end,amp[anum].min_right+1) - MAX(start,amp[anum].max_left);
+        if (c > 0) {
+            stats->nreads[anum]++;
+            // NB: ref bases rather than read bases
+            stats->nbases[anum] += c;
+
+            int64_t i;
+            if (start < 0) start = 0;
+            if (end > len) end = len;
+
+            int64_t ostart = MAX(start, amp[anum].min_left-1);
+            int64_t oend = MIN(end, amp[anum].max_right);
+            int64_t offset = amp[anum].min_left-1;
+            for (i = ostart; i < oend; i++)
+                stats->coverage[anum*stats->max_amp_len + i-offset]++;
+        } else {
+            stats->nfailprimer++;
+        }
+    }
+
+    // Template length in terms of amplicon number to amplicon number.
+    // We expect left to right of same amplicon (len 0), but it may go
+    // to next amplicon (len 1) or prev (len -1), etc.
+    int64_t t_end;
+    int oth_anum = -1;
+
+    if (b->core.flag & BAM_FPAIRED) {
+        t_end = (b->core.flag & BAM_FREVERSE ? end : start)
+            + b->core.isize;
+
+        // If we've clipped the primers but not followed up with a fixmates
+        // then our start+TLEN will take us to a location which is
+        // length(LEFT_PRIMER) + length(RIGHT_PRIMER) too far away.
+        //
+        // The correct solution is to run samtools fixmate so TLEN is correct.
+        // The hacky solution is to fudge the expected tlen by double the
+        // average primer length (e.g. 50).
+        t_end += b->core.isize > 0 ? -args->tlen_adj : +args->tlen_adj;
+
+        if (t_end > 0 && t_end < len && b->core.isize != 0)
+            oth_anum = (b->core.flag & BAM_FREVERSE)
+                ? pos2start[t_end]
+                : pos2end[t_end];
+    } else {
+        // Not paired (see int anum = (REV || !PAIR) ?en :st expr above)
+        oth_anum = pos2start[start];
+        t_end = end;
+    }
+
+    // We don't want to count our pairs twice.
+    // If both left/right are known, count it on left only.
+    // If only one is known, we'll only get to this code once
+    // so we can also count it.
+    int astatus = 2;
+    if (anum != -1 && oth_anum != -1) {
+        astatus = oth_anum == anum ? 0 : 1;
+        if (start <= t_end)
+            stats->amp_dist[anum][astatus]++;
+    } else if (anum >= 0) {
+        stats->amp_dist[anum][astatus = 2]++;
+    }
+
+    if (astatus == 0 && !(b->core.flag & (BAM_FUNMAP | BAM_FMUNMAP))) {
+        if (prev_end && mstart > prev_end) {
+            // 2nd read with gap to 1st; undo previous increment.
+            for (i = prev_start; i < prev_end; i++)
+                stats->depth_valid[i]--;
+            stats->nfull_reads[anum] -= (b->core.flag & BAM_FPAIRED) ? 0.5 : 1;
+        } else {
+            // 1st read, or 2nd read that overlaps 1st
+            for (i = mstart; i < end; i++)
+                stats->depth_valid[i]++;
+            stats->nfull_reads[anum] += (b->core.flag & BAM_FPAIRED) ? 0.5 : 1;
+        }
+    }
+
+    // Track template start,end frequencies, so we can give stats on
+    // amplicon primer usage.
+    if ((b->core.flag & BAM_FPAIRED) && b->core.isize <= 0)
+        // left to right only, so we don't double count template positions.
+        return 0;
+
+    start = b->core.pos;
+    t_end = b->core.flag & BAM_FPAIRED
+        ? start + b->core.isize-1
+        : end;
+    uint64_t tcoord = MIN(start+1, UINT32_MAX) | (MIN(t_end+1, UINT32_MAX)<<32);
+    k = kh_put(tcoord, stats->tcoord[anum+1], tcoord, &ret);
+    if (ret < 0)
+        return -1;
+    if (ret == 0)
+        kh_value(stats->tcoord[anum+1], k)++;
+    else
+        kh_value(stats->tcoord[anum+1], k)=1;
+    kh_value(stats->tcoord[anum+1], k) |= ((int64_t)astatus<<32);
+
+    return 0;
+}
+
+// Append file local stats to global stats
+int append_lstats(astats_t *lstats, astats_t *gstats, int namp, int all_nseq) {
+    gstats->nseq += lstats->nseq;
+    gstats->nfiltered += lstats->nfiltered;
+    gstats->nfailprimer += lstats->nfailprimer;
+
+    int a;
+    for (a = -1; a < namp; a++) {
+        // Add khash local (kl) to khash global (kg)
+        khiter_t kl, kg;
+        for (kl = kh_begin(lstats->tcoord[a+1]);
+             kl != kh_end(lstats->tcoord[a+1]); kl++) {
+            if (!kh_exist(lstats->tcoord[a+1], kl) ||
+                kh_value(lstats->tcoord[a+1], kl) == 0)
+                continue;
+
+            int ret;
+            kg = kh_put(tcoord, gstats->tcoord[a+1],
+                        kh_key(lstats->tcoord[a+1], kl),
+                        &ret);
+            if (ret < 0)
+                return -1;
+
+            kh_value(gstats->tcoord[a+1], kg) =
+                (ret == 0
+                 ? (kh_value(gstats->tcoord[a+1], kg) & 0xFFFFFFFF)
+                 : 0)
+                + kh_value(lstats->tcoord[a+1], kl);
+        }
+        if (a == -1) continue;
+
+        gstats->nreads[a]  += lstats->nreads[a];
+        gstats->nreads2[a] += lstats->nreads[a] * lstats->nreads[a];
+        gstats->nfull_reads[a] += lstats->nfull_reads[a];
+
+        // To get mean & sd for amplicon read percentage, we need
+        // to do the divisions here as nseq differs for each sample.
+        double nrperc = all_nseq ? 100.0 * lstats->nreads[a] / all_nseq : 0;
+        gstats->nrperc[a]  += nrperc;
+        gstats->nrperc2[a] += nrperc*nrperc;
+
+        gstats->nbases[a]  += lstats->nbases[a];
+        gstats->nbases2[a] += lstats->nbases[a] * lstats->nbases[a];
+
+        int d;
+        for (d = 0; d < MAX_DEPTH; d++) {
+            gstats->covered_perc[a][d]  += lstats->covered_perc[a][d];
+            gstats->covered_perc2[a][d] += lstats->covered_perc[a][d]
+                                         * lstats->covered_perc[a][d];
+        }
+
+        for (d = 0; d < 3; d++)
+            gstats->amp_dist[a][d] += lstats->amp_dist[a][d];
+    }
+
+    for (a = 0; a < lstats->max_len; a++) {
+        gstats->depth_valid[a] += lstats->depth_valid[a];
+        gstats->depth_all[a]   += lstats->depth_all[a];
+    }
+
+    return 0;
+}
+
+int append_stats(amplicons_t *amps, int nref) {
+    int i, r, all_nseq = 0;
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = amps[r].lstats;
+        all_nseq  += stats->nseq - stats->nfiltered - stats->nfailprimer;
+    }
+
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+        if (append_lstats(amps[i].lstats, amps[i].gstats, amps[i].namp,
+                          all_nseq) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+typedef struct {
+    int32_t start, end;
+    uint32_t freq;
+    uint32_t status;
+} tcoord_t;
+
+// Sort tcoord by descending frequency and then ascending start and  end.
+static int tcoord_freq_sort(const void *vp1, const void *vp2) {
+    const tcoord_t *t1 = (const tcoord_t *)vp1;
+    const tcoord_t *t2 = (const tcoord_t *)vp2;
+
+    if (t1->freq != t2->freq)
+        return t2->freq - t1->freq;
+
+    if (t1->start != t2->start)
+        return t1->start - t2->start;
+
+    return t1->end - t2->end;
+}
+
+
+/*
+ * Merges tcoord start,end,freq,status tuples if their coordinates are
+ * close together.  We aim to keep the start,end for the most frequent
+ * value and assume that is the correct coordinate and all others are
+ * minor fluctuations due to errors or variants.
+ *
+ * We sort by frequency first and then merge later items in the list into
+ * the earlier more frequent ones.  It's O(N^2), but sufficient for now
+ * given current scale of projects.
+ *
+ * If we ever need to resolve that then consider sorting by start
+ * coordinate and scanning the list to find all items within X, find
+ * the most frequent of those, and then cluster that way.  (I'd have
+ * done that had I thought of it at the time!)
+ */
+static void aggregate_tcoord(astats_args_t *args, tcoord_t *tpos, size_t *np){
+    size_t n = *np, j, j2, j3, k;
+
+    // Sort by frequency and cluster infrequent coords into frequent
+    // ones provided they're close by.
+    // This is O(N^2), but we've already binned by tcoord_bin/2 so
+    // the list isn't intended to be vast at this point.
+    qsort(tpos, n, sizeof(*tpos), tcoord_freq_sort);
+
+    // For frequency ties, find mid start coord, and then find mid end
+    // coord of those matching start.
+    // We make that the first item so we merge into that mid point.
+    for (j = 0; j < n; j++) {
+        for (j2 = j+1; j2 < n; j2++) {
+            if (tpos[j].freq != tpos[j2].freq)
+                break;
+            if (tpos[j2].start - tpos[j].start >= args->tcoord_bin)
+                break;
+        }
+
+        // j to j2 all within bin of a common start,
+        // m is the mid start.
+        if (j2-1 > j) {
+            size_t m = (j2-1 + j)/2;
+
+            // Find mid end for this same start
+            while (m > 1 && tpos[m].start == tpos[m-1].start)
+                m--;
+            for (j3 = m+1; j3 < j2; j3++) {
+                if (tpos[m].start != tpos[j3].start)
+                    break;
+                if (tpos[m].end - tpos[j3].end >= args->tcoord_bin)
+                    break;
+            }
+            if (j3-1 > m)
+                m = (j3-1 + m)/2;
+
+            // Swap with first item.
+            tcoord_t tmp = tpos[j];
+            tpos[j] = tpos[m];
+            tpos[m] = tmp;
+            j = j2-1;
+        }
+    }
+
+    // Now merge in coordinates.
+    // This bit is O(N^2), so consider binning first to reduce the
+    // size of the list if we have excessive positional variation.
+    for (k = j = 0; j < n; j++) {
+        if (!tpos[j].freq)
+            continue;
+
+        if (k < j)
+            tpos[k] = tpos[j];
+
+        for (j2 = j+1; j2 < n; j2++) {
+            if (ABS(tpos[j].start-tpos[j2].start) < args->tcoord_bin/2 &&
+                ABS(tpos[j].end  -tpos[j2].end)  < args->tcoord_bin/2 &&
+                tpos[j].status == tpos[j2].status) {
+                tpos[k].freq += tpos[j2].freq;
+                tpos[j2].freq = 0;
+            }
+        }
+        k++;
+    }
+
+    *np = k;
+}
+
+int dump_stats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref, int local) {
+    int i, r;
+    FILE *ofp = args->out_fp;
+    tcoord_t *tpos = NULL;
+    size_t ntcoord = 0;
+
+    // summary stats for this sample (or for all samples)
+    fprintf(ofp, "# Summary stats.\n");
+    fprintf(ofp, "# Use 'grep ^%cSS | cut -f 2-' to extract this part.\n", type);
+
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        int nmatch = stats->nseq - stats->nfiltered - stats->nfailprimer;
+        char *name_ref = malloc(strlen(name) + strlen(amps[r].ref) + 2);
+        if (!name_ref)
+            return -1;
+        if (args->multi_ref)
+            sprintf(name_ref, "%s\t%s", name, amps[r].ref);
+        else
+            sprintf(name_ref, "%s", name);
+        fprintf(ofp, "%cSS\t%s\traw total sequences:\t%d\n",
+                type, name_ref, stats->nseq);
+        fprintf(ofp, "%cSS\t%s\tfiltered sequences:\t%d\n",
+                type, name_ref, stats->nfiltered);
+        fprintf(ofp, "%cSS\t%s\tfailed primer match:\t%d\n",
+                type, name_ref, stats->nfailprimer);
+        fprintf(ofp, "%cSS\t%s\tmatching sequences:\t%d\n",
+                type, name_ref, nmatch);
+
+        int d = 0;
+        do {
+            // From first to last amplicon only, so not entire consensus.
+            // If contig length is known, maybe we want to add the missing
+            // count to < DEPTH figures?
+            int64_t start = 0, covered = 0, total = 0;
+            amplicon_t *amp = amps[r].amp;
+            for (i = 0; i < amps[r].namp; i++) {
+                int64_t j, offset = amp[i].min_left-1;
+                if (amp[i].min_right - amp[i].min_left > stats->max_amp_len) {
+                    fprintf(stderr, "[ampliconstats] error: "
+                            "Maximum amplicon length (%d) exceeded for '%s'\n",
+                            stats->max_amp, name);
+                    return -1;
+                }
+                for (j = MAX(start, amp[i].max_left-1);
+                     j < MAX(start, amp[i].min_right); j++) {
+                    if (stats->coverage[i*stats->max_amp_len + j-offset]
+                        >= args->min_depth[d])
+                        covered++;
+                    total++;
+                }
+                start = MAX(start, amp[i].min_right);
+            }
+            fprintf(ofp, "%cSS\t%s\tconsensus depth count < %d and >= %d:\t%"
+                    PRId64"\t%"PRId64"\n", type, name_ref,
+                    args->min_depth[d], args->min_depth[d],
+                    total-covered, covered);
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+
+        free(name_ref);
+    }
+
+    // Read count
+    fprintf(ofp, "# Absolute matching read counts per amplicon.\n");
+    fprintf(ofp, "# Use 'grep ^%cREADS | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cREADS\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            fprintf(ofp, "\t%"PRId64, stats->nreads[i]);
+        }
+    }
+    fprintf(ofp, "\n");
+
+    // Valid depth is the number of full length reads (already divided
+    // by the number we expect to cover), so +0.5 per read in pair.
+    // A.k.a "usable depth" in the plots.
+    fprintf(ofp, "%cVDEPTH\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++)
+            fprintf(ofp, "\t%d", (int)stats->nfull_reads[i]);
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        fprintf(ofp, "CREADS\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                fprintf(ofp, "\t%.1f", stats->nreads[i] / (double)nfile);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CREADS\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                double n1 = stats->nreads[i];
+                fprintf(ofp, "\t%.1f", nfile > 1 && stats->nreads2[i] > 0
+                        ? sqrt(stats->nreads2[i]/(double)nfile
+                               - (n1/nfile)*(n1/nfile))
+                        : 0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    fprintf(ofp, "# Read percentage of distribution between amplicons.\n");
+    fprintf(ofp, "# Use 'grep ^%cRPERC | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cRPERC\t%s", type, name);
+    int all_nseq = 0;
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        all_nseq  += stats->nseq - stats->nfiltered - stats->nfailprimer;
+    }
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            if (type == 'C') {
+                fprintf(ofp, "\t%.3f", (double)stats->nrperc[i] / nfile);
+            } else {
+                fprintf(ofp, "\t%.3f",
+                        all_nseq ? 100.0 * stats->nreads[i] / all_nseq : 0);
+            }
+        }
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we compute mean and standard deviation too
+        fprintf(ofp, "CRPERC\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                fprintf(ofp, "\t%.3f", stats->nrperc[i] / nfile);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CRPERC\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                // variance = SUM(X^2) - ((SUM(X)^2) / N)
+                double n1 = stats->nrperc[i];
+                double v = stats->nrperc2[i]/nfile - (n1/nfile)*(n1/nfile);
+                fprintf(ofp, "\t%.3f", v>0?sqrt(v):0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // Base depth
+    fprintf(ofp, "# Read depth per amplicon.\n");
+    fprintf(ofp, "# Use 'grep ^%cDEPTH | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cDEPTH\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        amplicon_t *amp = amps[r].amp;
+        for (i = 0; i < amps[r].namp; i++) {
+            int nseq = stats->nseq - stats->nfiltered - stats->nfailprimer;
+            int64_t alen = amp[i].min_right - amp[i].max_left+1;
+            fprintf(ofp, "\t%.1f", nseq ? stats->nbases[i] / (double)alen : 0);
+        }
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        fprintf(ofp, "CDEPTH\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            amplicon_t *amp = amps[r].amp;
+            int nseq = stats->nseq - stats->nfiltered - stats->nfailprimer;
+            for (i = 0; i < amps[r].namp; i++) {
+                int64_t alen = amp[i].min_right - amp[i].max_left+1;
+                fprintf(ofp, "\t%.1f", nseq ? stats->nbases[i] / (double)alen / nfile : 0);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CDEPTH\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            amplicon_t *amp = amps[r].amp;
+            for (i = 0; i < amps[r].namp; i++) {
+                double alen = amp[i].min_right - amp[i].max_left+1;
+                double n1 = stats->nbases[i] / alen;
+                double v = stats->nbases2[i] / (alen*alen) /nfile
+                    - (n1/nfile)*(n1/nfile);
+                fprintf(ofp, "\t%.1f", v>0?sqrt(v):0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // Percent Coverage
+    if (type == 'F') {
+        fprintf(ofp, "# Percentage coverage per amplicon\n");
+        fprintf(ofp, "# Use 'grep ^%cPCOV | cut -f 2-' to extract this part.\n", type);
+        int d = 0;
+        do {
+            fprintf(ofp, "%cPCOV-%d\t%s", type, args->min_depth[d], name);
+
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                amplicon_t *amp = amps[r].amp;
+                for (i = 0; i < amps[r].namp; i++) {
+                    int covered = 0;
+                    if (amp[i].min_right - amp[i].min_left > stats->max_amp_len) {
+                        fprintf(stderr, "[ampliconstats] error: "
+                                "Maximum amplicon length (%d) exceeded for '%s'\n",
+                                stats->max_amp, name);
+                        return -1;
+                    }
+                    int64_t j, offset = amp[i].min_left-1;
+                    for (j = amp[i].max_left-1; j < amp[i].min_right; j++) {
+                        int apos = i*stats->max_amp_len + j-offset;
+                        if (stats->coverage[apos] >= args->min_depth[d])
+                            covered++;
+                    }
+                    int64_t alen = amp[i].min_right - amp[i].max_left+1;
+                    stats->covered_perc[i][d] = 100.0 * covered / alen;
+                    fprintf(ofp, "\t%.2f", 100.0 * covered / alen);
+                }
+            }
+            fprintf(ofp, "\n");
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+
+    } else if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        int d = 0;
+        do {
+            fprintf(ofp, "CPCOV-%d\tMEAN", args->min_depth[d]);
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                for (i = 0; i < amps[r].namp; i++) {
+                    fprintf(ofp, "\t%.1f", stats->covered_perc[i][d] / nfile);
+                }
+            }
+            fprintf(ofp, "\n");
+
+            fprintf(ofp, "CPCOV-%d\tSTDDEV", args->min_depth[d]);
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                for (i = 0; i < amps[r].namp; i++) {
+                    double n1 = stats->covered_perc[i][d] / nfile;
+                    double v = stats->covered_perc2[i][d] / nfile - n1*n1;
+                    fprintf(ofp, "\t%.1f", v>0?sqrt(v):0);
+                }
+            }
+            fprintf(ofp, "\n");
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+    }
+
+    // Plus base depth for all reads, irrespective of amplicon.
+    // This is post overlap removal, if reads in the read-pair overlap.
+    fprintf(ofp, "# Depth per reference base for ALL data.\n");
+    fprintf(ofp, "# Use 'grep ^%cDP_ALL | cut -f 2-' to extract this part.\n",
+            type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        if (args->multi_ref)
+            fprintf(ofp, "%cDP_ALL\t%s\t%s", type, name, amps[r].ref);
+        else
+            fprintf(ofp, "%cDP_ALL\t%s", type, name);
+
+        for (i = 0; i < amps[r].len; i++) {
+            // Basic run-length encoding provided all values are within
+            // +- depth_bin fraction of the mid-point.
+            int dmin = stats->depth_all[i], dmax = stats->depth_all[i], j;
+            double dmid = (dmin + dmax)/2.0;
+            double low  = dmid*(1-args->depth_bin);
+            double high = dmid*(1+args->depth_bin);
+            for (j = i+1; j < amps[r].len; j++) {
+                int d = stats->depth_all[j];
+                if (d < low || d > high)
+                    break;
+                if (dmin > d) {
+                    dmin = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                } else if (dmax < d) {
+                    dmax = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                }
+            }
+            fprintf(ofp, "\t%d,%d", (int)dmid, j-i);
+            i = j-1;
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // And depth for only reads matching to a single amplicon for full
+    // length.  This is post read overlap removal.
+    fprintf(ofp, "# Depth per reference base for full-length valid amplicon data.\n");
+    fprintf(ofp, "# Use 'grep ^%cDP_VALID | cut -f 2-' to extract this "
+            "part.\n", type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        if (args->multi_ref)
+            fprintf(ofp, "%cDP_VALID\t%s\t%s", type, name, amps[r].ref);
+        else
+            fprintf(ofp, "%cDP_VALID\t%s", type, name);
+
+        for (i = 0; i < amps[r].len; i++) {
+            int dmin = stats->depth_valid[i], dmax = stats->depth_valid[i], j;
+            double dmid = (dmin + dmax)/2.0;
+            double low  = dmid*(1-args->depth_bin);
+            double high = dmid*(1+args->depth_bin);
+            for (j = i+1; j < amps[r].len; j++) {
+                int d = stats->depth_valid[j];
+                if (d < low || d > high)
+                    break;
+                if (dmin > d) {
+                    dmin = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                } else if (dmax < d) {
+                    dmax = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                }
+            }
+            fprintf(ofp, "\t%d,%d", (int)dmid, j-i);
+            i = j-1;
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // TCOORD (start to end) distribution
+    fprintf(ofp, "# Distribution of aligned template coordinates.\n");
+    fprintf(ofp, "# Use 'grep ^%cTCOORD | cut -f 2-' to extract this part.\n", type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0 - (nref==1); i < amps[r].namp; i++) {
+            if (ntcoord < kh_size(stats->tcoord[i+1])) {
+                ntcoord = kh_size(stats->tcoord[i+1]);
+                tcoord_t *tmp = realloc(tpos, ntcoord * sizeof(*tmp));
+                if (!tmp) {
+                    free(tpos);
+                    return -1;
+                }
+                tpos = tmp;
+            }
+
+            khiter_t k;
+            size_t n = 0, j;
+            for (k = kh_begin(stats->tcoord[i+1]);
+                 k != kh_end(stats->tcoord[i+1]); k++) {
+                if (!kh_exist(stats->tcoord[i+1], k) ||
+                    (kh_value(stats->tcoord[i+1], k) & 0xFFFFFFFF) == 0)
+                    continue;
+                // Key is start,end in 32-bit quantities.
+                // Yes this limits us to 4Gb references, but just how
+                // many primers are we planning on making?  Not that many
+                // I hope.
+                tpos[n].start = kh_key(stats->tcoord[i+1], k)&0xffffffff;
+                tpos[n].end   = kh_key(stats->tcoord[i+1], k)>>32;
+
+                // Value is frequency (top 32-bits) and status (bottom 32).
+                tpos[n].freq   = kh_value(stats->tcoord[i+1], k)&0xffffffff;
+                tpos[n].status = kh_value(stats->tcoord[i+1], k)>>32;
+                n++;
+            }
+
+            if (args->tcoord_bin > 1)
+                aggregate_tcoord(args, tpos, &n);
+
+            fprintf(ofp, "%cTCOORD\t%s\t%d", type, name,
+                    i+1+amps[r].first_amp); // per amplicon
+            for (j = 0; j < n; j++) {
+                if (tpos[j].freq < args->tcoord_min_count)
+                    continue;
+                fprintf(ofp, "\t%d,%d,%u,%u",
+                        tpos[j].start,
+                        tpos[j].end,
+                        tpos[j].freq,
+                        tpos[j].status);
+            }
+            fprintf(ofp, "\n");
+        }
+    }
+
+
+    // AMP length distribution.
+    // 0 = both ends in this amplicon
+    // 1 = ends in different amplicons
+    // 2 = other end matching an unknown amplicon site
+    //     (see tcoord for further analysis of where)
+    fprintf(ofp, "# Classification of amplicon status.  Columns are\n");
+    fprintf(ofp, "# number with both primers from this amplicon, number with\n");
+    fprintf(ofp, "# primers from different amplicon, and number with a position\n");
+    fprintf(ofp, "# not matching any valid amplicon primer site\n");
+    fprintf(ofp, "# Use 'grep ^%cAMP | cut -f 2-' to extract this part.\n", type);
+
+    fprintf(ofp, "%cAMP\t%s\t0", type, name); // all merged
+    int amp_dist[3] = {0};
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) { // accumulate for all amps
+            amp_dist[0] += stats->amp_dist[i][0];
+            amp_dist[1] += stats->amp_dist[i][1];
+            amp_dist[2] += stats->amp_dist[i][2];
+        }
+    }
+    fprintf(ofp, "\t%d\t%d\t%d\n", amp_dist[0], amp_dist[1], amp_dist[2]);
+
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            // per amplicon
+            fprintf(ofp, "%cAMP\t%s\t%d", type, name, i+1+amps[r].first_amp);
+            fprintf(ofp, "\t%d\t%d\t%d\n", stats->amp_dist[i][0],
+                    stats->amp_dist[i][1], stats->amp_dist[i][2]);
+        }
+    }
+
+    free(tpos);
+    return 0;
+}
+
+int dump_lstats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref) {
+    return dump_stats(args, type, name, nfile, amps, nref, 1);
+}
+
+int dump_gstats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref) {
+    return dump_stats(args, type, name, nfile, amps, nref, 0);
+}
+
+char const *get_sample_name(sam_hdr_t *header, char *RG) {
+    kstring_t ks = {0};
+    sam_hdr_find_tag_id(header, "RG", RG?"ID":NULL, RG, "SM", &ks);
+    return ks.s;
+}
+
+// Return maximum reference length (SQ is NULL) or the length
+// of the specified reference in SQ.
+int64_t get_ref_len(sam_hdr_t *header, const char *SQ) {
+    if (SQ) {
+        int tid = SQ ? sam_hdr_name2tid(header, SQ) : 0;
+        return tid >= 0 ? sam_hdr_tid2len(header, tid) : -1;
+    } else {
+        int nref = sam_hdr_nref(header), tid;;
+        int64_t len = 0;
+        for (tid = 0; tid < nref; tid++) {
+            int64_t rl = sam_hdr_tid2len(header, tid);
+            if (len < rl)
+                len = rl;
+        }
+        return len;
+    }
+}
+
+static int amplicon_stats(astats_args_t *args,
+                          khash_t(bed_list_hash) *bed_hash,
+                          char **filev, int filec) {
+    int i, ref = -1, ref_tid = -1, ret = -1, nref = 0;
+    samFile *fp = NULL;
+    sam_hdr_t *header = NULL;
+    bam1_t *b = bam_init1();
+    FILE *ofp = args->out_fp;
+    char sname_[8192], *sname = NULL;
+    amplicons_t *amps = NULL;
+
+    // Report initial SS header.  We gather data from the bed_hash entries
+    // as well as from the first SAM header (with the requirement that all
+    // headers should be compatible).
+    if (filec) {
+        if (!(fp = sam_open_format(filev[0], "r", &args->ga.in))) {
+            print_error_errno("ampliconstats",
+                              "Cannot open input file \"%s\"",
+                              filev[0]);
+            goto err;
+        }
+        if (!(header = sam_hdr_read(fp)))
+            goto err;
+
+        if (!amps) {
+            amps = calloc(nref=sam_hdr_nref(header), sizeof(*amps));
+            if (!amps)
+                goto err;
+            fprintf(ofp, "# Summary statistics, used for scaling the plots.\n");
+            fprintf(ofp, "SS\tSamtools version: %s\n", samtools_version());
+            fprintf(ofp, "SS\tCommand line: %s\n", args->argv);
+            fprintf(ofp, "SS\tNumber of files:\t%d\n", filec);
+
+            // Note: order of hash entries will be different to order of
+            // BED file which may also differ to order of SQ headers.
+            // SQ header is canonical ordering (pos sorted file).
+            khiter_t k;
+            int bam_nref = sam_hdr_nref(header);
+            for (i = 0; i < bam_nref; i++) {
+                k = kh_get(bed_list_hash, bed_hash,
+                           sam_hdr_tid2name(header, i));
+                if (!kh_exist(bed_hash, k))
+                    continue;
+
+                bed_entry_list_t *sites = &kh_value(bed_hash, k);
+
+                ref = i;
+                amps[ref].ref = kh_key(bed_hash, k);
+                amps[ref].sites = sites;
+                amps[ref].namp = count_amplicon(sites);
+                amps[ref].amp  = calloc(sites->length,
+                                        sizeof(*amps[ref].amp));
+                if (!amps[ref].amp)
+                    goto err;
+                if (args->multi_ref)
+                    fprintf(ofp, "SS\tNumber of amplicons:\t%s\t%d\n",
+                            kh_key(bed_hash, k), amps[ref].namp);
+                else
+                    fprintf(ofp, "SS\tNumber of amplicons:\t%d\n",
+                            amps[ref].namp);
+
+                amps[ref].tid = ref;
+                if (ref_tid == -1)
+                    ref_tid = ref;
+
+                int64_t len = get_ref_len(header, kh_key(bed_hash, k));
+                amps[ref].len = len;
+                if (args->multi_ref)
+                    fprintf(ofp, "SS\tReference length:\t%s\t%"PRId64"\n",
+                            kh_key(bed_hash, k), len);
+                else
+                    fprintf(ofp, "SS\tReference length:\t%"PRId64"\n",
+                            len);
+
+                amps[ref].lstats = stats_alloc(len, args->max_amp,
+                                               args->max_amp_len);
+                amps[ref].gstats = stats_alloc(len, args->max_amp,
+                                               args->max_amp_len);
+                if (!amps[ref].lstats || !amps[ref].gstats)
+                    goto err;
+            }
+        }
+
+        sam_hdr_destroy(header);
+        header = NULL;
+        if (sam_close(fp) < 0) {
+            fp = NULL;
+            goto err;
+        }
+        fp = NULL;
+    }
+    fprintf(ofp, "SS\tEnd of summary\n");
+
+    // Extract the bits of amplicon data we need from bed hash and turn
+    // it into a position-to-amplicon lookup table.
+    int offset = 0;
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+
+        amps[i].first_amp = offset;
+        if (bed2amplicon(args, amps[i].sites, amps[i].amp,
+                         &amps[i].namp, i==0, amps[i].ref, offset) < 0)
+            goto err;
+
+        offset += amps[i].namp; // cumulative amplicon number across refs
+    }
+
+    // Now iterate over file contents, one at a time.
+    for (i = 0; i < filec; i++) {
+        char *nstart = filev[i];
+
+        fp = sam_open_format(filev[i], "r", &args->ga.in);
+        if (!fp) {
+            print_error_errno("ampliconstats",
+                              "Cannot open input file \"%s\"",
+                              filev[i]);
+            goto err;
+        }
+
+        if (args->ga.nthreads > 0)
+            hts_set_threads(fp, args->ga.nthreads);
+
+        if (!(header = sam_hdr_read(fp)))
+            goto err;
+
+        if (nref != sam_hdr_nref(header)) {
+            print_error_errno("ampliconstats",
+                              "SAM headers are not consistent across input files");
+            goto err;
+        }
+        int r;
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].ref ||
+                strcmp(amps[r].ref, sam_hdr_tid2name(header, r)) != 0 ||
+                amps[r].len != sam_hdr_tid2len(header, r)) {
+                print_error_errno("ampliconstats",
+                                  "SAM headers are not consistent across "
+                                  "input files");
+                goto err;
+            }
+        }
+
+        if (args->use_sample_name)
+            sname = (char *)get_sample_name(header, NULL);
+
+        if (!sname) {
+            sname = sname_;
+            char *nend = filev[i] + strlen(filev[i]), *cp;
+            if ((cp = strrchr(filev[i], '/')))
+                nstart = cp+1;
+            if ((cp = strrchr(nstart, '.')) &&
+                (strcmp(cp, ".bam") == 0 ||
+                 strcmp(cp, ".sam") == 0 ||
+                 strcmp(cp, ".cram") == 0))
+                nend = cp;
+            if (nend - nstart >= 8192) nend = nstart+8191;
+            memcpy(sname, nstart, nend-nstart);
+            sname[nend-nstart] = 0;
+        }
+
+        // Stats local to this sample only
+        amp_stats_reset(amps, nref);
+
+        int last_ref = -9;
+        while ((r = sam_read1(fp, header, b)) >= 0) {
+            // Other filter options useful here?
+            if (b->core.tid < 0)
+                continue;
+
+            if (last_ref != b->core.tid) {
+                last_ref  = b->core.tid;
+                if (initialise_amp_pos_lookup(args, amps, last_ref) < 0)
+                    goto err;
+            }
+
+            if (accumulate_stats(args, amps, b) < 0)
+                goto err;
+        }
+
+        if (r < -1) {
+            print_error_errno("ampliconstats", "Fail reading record");
+            goto err;
+        }
+
+        sam_hdr_destroy(header);
+        if (sam_close(fp) < 0) {
+            fp = NULL;
+            goto err;
+        }
+
+        fp = NULL;
+        header = NULL;
+
+        if (dump_lstats(args, 'F', sname, filec, amps, nref) < 0)
+            goto err;
+
+        if (append_stats(amps, nref) < 0)
+            goto err;
+
+        if (sname && sname != sname_)
+            free(sname);
+        sname = NULL;
+    }
+
+    if (dump_gstats(args, 'C', "COMBINED", filec, amps, nref) < 0)
+        goto err;
+
+    ret = 0;
+ err:
+    bam_destroy1(b);
+    if (ret) {
+        if (header)
+            sam_hdr_destroy(header);
+        if (fp)
+            sam_close(fp);
+    }
+    for (i = 0; i < nref; i++) {
+        stats_free(amps[i].lstats);
+        stats_free(amps[i].gstats);
+        free(amps[i].amp);
+    }
+    free(amps);
+    free(pos2start);
+    free(pos2end);
+    if (ret) {
+        if (sname && sname != sname_)
+            free(sname);
+    }
+
+    return ret;
+}
+
+static int usage(astats_args_t *args, FILE *fp, int exit_status) {
+    fprintf(fp,
+"\n"
+"Usage: samtools ampliconstats [options] primers.bed *.bam > astats.txt\n"
+"\n"
+"Options:\n");
+    fprintf(fp, "  -f, --required-flag STR|INT\n"
+            "               Only include reads with all of the FLAGs present [0x%X]\n",args->flag_require);
+    fprintf(fp, "  -F, --filter-flag STR|INT\n"
+            "               Only include reads with none of the FLAGs present [0x%X]\n",args->flag_filter & 0xffff);
+    fprintf(fp, "  -a, --max-amplicons INT\n"
+            "               Change the maximum number of amplicons permitted [%d]\n", MAX_AMP);
+    fprintf(fp, "  -l, --max-amplicon-length INT\n"
+            "               Change the maximum length of an individual amplicon [%d]\n", MAX_AMP_LEN);
+    fprintf(fp, "  -d, --min-depth INT[,INT]...\n"
+            "               Minimum base depth(s) to consider position covered [%d]\n", args->min_depth[0]);
+    fprintf(fp, "  -m, --pos-margin INT\n"
+            "               Margin of error for matching primer positions [%d]\n", args->max_delta);
+    fprintf(fp, "  -o, --output FILE\n"
+            "               Specify output file [stdout if unset]\n");
+    fprintf(fp, "  -s, --use-sample-name\n"
+            "               Use the sample name from the first @RG header line\n");
+    fprintf(fp, "  -t, --tlen-adjust INT\n"
+            "               Add/subtract from TLEN; use when clipping but no fixmate step\n");
+    fprintf(fp, "  -b, --tcoord-bin INT\n"
+            "               Bin template start,end positions into multiples of INT[1]\n");
+    fprintf(fp, "  -c, --tcoord-min-count INT\n"
+            "               Minimum template start,end frequency for recording [%d]\n", TCOORD_MIN_COUNT);
+    fprintf(fp, "  -D, --depth-bin FRACTION\n"
+            "               Merge FDP values within +/- FRACTION together\n");
+    fprintf(fp, "  -S, --single-ref\n"
+            "               Force single-ref (<=1.12) output format\n");
+    sam_global_opt_help(fp, "I.--.@");
+
+    return exit_status;
+}
+
+int main_ampliconstats(int argc, char **argv) {
+    astats_args_t args = {
+        .ga = SAM_GLOBAL_ARGS_INIT,
+        .flag_require = 0,
+        .flag_filter = 0x10B04,
+        //.sites = BED_LIST_INIT,
+        .max_delta = 30, // large enough to cope with alt primers
+        .min_depth = {1},
+        .use_sample_name = 0,
+        .max_amp = MAX_AMP,
+        .max_amp_len = MAX_AMP_LEN,
+        .tlen_adj = 0,
+        .out_fp = stdout,
+        .tcoord_min_count = TCOORD_MIN_COUNT,
+        .tcoord_bin = 1,
+        .depth_bin = 0.01,
+        .multi_ref = 1
+    }, oargs = args;
+
+    static const struct option loptions[] =
+    {
+        SAM_OPT_GLOBAL_OPTIONS('I', 0, '-', '-', 0, '@'),
+        {"help", no_argument, NULL, 'h'},
+        {"flag-require", required_argument, NULL, 'f'},
+        {"flag-filter", required_argument, NULL, 'F'},
+        {"min-depth", required_argument, NULL, 'd'},
+        {"output", required_argument, NULL, 'o'},
+        {"pos-margin", required_argument, NULL, 'm'},
+        {"use-sample-name", no_argument, NULL, 's'},
+        {"max-amplicons", required_argument, NULL, 'a'},
+        {"max-amplicon-length", required_argument, NULL, 'l'},
+        {"tlen-adjust", required_argument, NULL, 't'},
+        {"tcoord-min-count", required_argument, NULL, 'c'},
+        {"tcoord-bin", required_argument, NULL, 'b'},
+        {"depth-bin", required_argument, NULL, 'D'},
+        {"single-ref", no_argument, NULL, 'S'},
+        {NULL, 0, NULL, 0}
+    };
+    int opt;
+
+    while ( (opt=getopt_long(argc,argv,"?hf:F:@:p:m:d:sa:l:t:o:c:b:D:S",loptions,NULL))>0 ) {
+        switch (opt) {
+        case 'f': args.flag_require = bam_str2flag(optarg); break;
+        case 'F':
+            if (args.flag_filter & 0x10000)
+                args.flag_filter = 0; // strip default on first -F usage
+            args.flag_filter |= bam_str2flag(optarg); break;
+
+        case 'm': args.max_delta = atoi(optarg); break; // margin
+        case 'D': args.depth_bin = atof(optarg); break; // depth bin fraction
+        case 'd': {
+            int d = 0;
+            char *cp = optarg, *ep;
+            do {
+                long n = strtol(cp, &ep, 10);
+                args.min_depth[d++] = n;
+                if (*ep != ',')
+                    break;
+                cp = ep+1;
+            } while (d < MAX_DEPTH);
+            break;
+        }
+
+        case 'a': args.max_amp = atoi(optarg)+1;break;
+        case 'l': args.max_amp_len = atoi(optarg)+1;break;
+
+        case 'c': args.tcoord_min_count = atoi(optarg);break;
+        case 'b':
+            args.tcoord_bin = atoi(optarg);
+            if (args.tcoord_bin < 1)
+                args.tcoord_bin = 1;
+            break;
+
+        case 't': args.tlen_adj = atoi(optarg);break;
+
+        case 's': args.use_sample_name = 1;break;
+
+        case 'o':
+            if (!(args.out_fp = fopen(optarg, "w"))) {
+                perror(optarg);
+                return 1;
+            }
+            break;
+
+        case 'S':
+            args.multi_ref = 0;
+            break;
+
+        case '?': return usage(&oargs, stderr, EXIT_FAILURE);
+        case 'h': return usage(&oargs, stdout, EXIT_SUCCESS);
+
+        default:
+            if (parse_sam_global_opt(opt, optarg, loptions, &args.ga) != 0)
+                usage(&oargs,stderr, EXIT_FAILURE);
+            break;
+        }
+    }
+
+    if (argc <= optind)
+        return usage(&oargs, stdout, EXIT_SUCCESS);
+    if (argc <= optind+1 && isatty(STDIN_FILENO))
+        return usage(&oargs, stderr, EXIT_FAILURE);
+
+    khash_t(bed_list_hash) *bed_hash = kh_init(bed_list_hash);
+    if (load_bed_file_multi_ref(argv[optind], 1, 0, bed_hash)) {
+        print_error_errno("ampliconstats",
+                          "Could not read file \"%s\"", argv[optind]);
+        return 1;
+
+    }
+
+    khiter_t k, ref_count = 0;
+    for (k = kh_begin(bed_hash); k != kh_end(bed_hash); k++) {
+        if (!kh_exist(bed_hash, k))
+            continue;
+        ref_count++;
+    }
+    if (ref_count == 0)
+        return 1;
+    if (ref_count > 1 && args.multi_ref == 0) {
+        print_error("ampliconstats",
+                    "Single-ref mode is not permitted for BED files\n"
+                    "containing more than one reference.");
+        return 1;
+    }
+
+    args.argv = stringify_argv(argc, argv);
+    int ret;
+    if (argc == ++optind) {
+        char *av = "-";
+        ret = amplicon_stats(&args, bed_hash, &av, 1);
+    } else {
+        ret = amplicon_stats(&args, bed_hash, &argv[optind], argc-optind);
+    }
+
+    free(args.argv);
+    destroy_bed_hash(bed_hash);
+
+    return ret;
+}
diff --git a/samtools/amplicon_stats.c.pysam.c b/samtools/amplicon_stats.c.pysam.c
new file mode 100644
index 0000000..aa09459
--- /dev/null
+++ b/samtools/amplicon_stats.c.pysam.c
@@ -0,0 +1,1756 @@
+#include "samtools.pysam.h"
+
+/*  stats.c -- This is the former bamcheck integrated into samtools/htslib.
+
+    Copyright (C) 2020-2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+/*
+ * This tool is designed to give "samtools stats" style output, but dedicated
+ * to small amplicon sequencing projects.  It gathers stats on the
+ * distribution of reads across amplicons.
+ */
+
+/*
+ * TODO:
+ * - Cope with multiple references.  What do we do here?  Just request one?
+ * - Permit regions rather than consuming whole file (maybe solves above).
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <math.h>
+
+#include <htslib/sam.h>
+#include <htslib/khash.h>
+
+#include "samtools.h"
+#include "sam_opts.h"
+#include "bam_ampliconclip.h"
+
+KHASH_MAP_INIT_INT64(tcoord, int64_t)
+KHASH_MAP_INIT_STR(qname, int64_t)
+
+#ifndef MIN
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#endif
+
+#ifndef MAX
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#endif
+
+#ifndef ABS
+#define ABS(a) ((a)>=0?(a):-(a))
+#endif
+
+#define TCOORD_MIN_COUNT   10
+#define MAX_AMP 1000       // Default maximum number of amplicons
+#define MAX_AMP_LEN 1000   // Default maximum length of any single amplicon
+#define MAX_PRIMER_PER_AMPLICON 4  // Max primers per LEFT/RIGHT
+#define MAX_DEPTH 5        // Number of different depths permitted
+
+typedef struct {
+    sam_global_args ga;
+    uint32_t flag_require;
+    uint32_t flag_filter;
+    int max_delta;   // Used for matching read to amplicon primer loc
+    int min_depth[MAX_DEPTH]; // Used for coverage; must be >= min_depth deep
+    int use_sample_name;
+    int max_amp;     // Total number of amplicons
+    int max_amp_len; // Maximum length of an individual amplicon
+    double depth_bin;// aggregate depth within this fraction
+    int tlen_adj;    // Adjust tlen by this amount, due to clip but no fixmate
+    FILE *out_fp;
+    char *argv;
+    int tcoord_min_count;
+    int tcoord_bin;
+    int multi_ref;
+} astats_args_t;
+
+typedef struct {
+    int nseq;       // total sequence count
+    int nfiltered;  // sequence filtered
+    int nfailprimer;// count of sequences not matching the primer locations
+
+    // Sizes of memory allocated below, to permit reset
+    int max_amp, max_amp_len, max_len;
+
+    // Summary across all samples, sum(x) plus sum(x^2) for s.d. calc
+    int64_t *nreads, *nreads2;          // [max_amp]
+    double  *nfull_reads;               // [max_amp]; 0.5/read if paired.
+    double  *nrperc, *nrperc2;          // [max_amp]
+    int64_t *nbases, *nbases2;          // [max_amp]
+    int64_t *coverage;                  // [max_amp][max_amp_len]
+    double  (*covered_perc)[MAX_DEPTH]; // [max_amp][MAX_DEPTH]
+    double  (*covered_perc2)[MAX_DEPTH];// [max_amp][MAX_DEPTH];
+    khash_t(tcoord) **tcoord;           // [max_amp+1]
+
+    // 0 is correct pair, 1 is incorrect pair, 2 is unidentified
+    int     (*amp_dist)[3];             // [MAX_AMP][3];
+
+    int *depth_valid; // [max_len]
+    int *depth_all;   // [max_len]
+    khash_t(qname) *qend;  // queryname end, for overlap removal
+} astats_t;
+
+// We can have multiple primers for LEFT / RIGHT, so this
+// permits detection by any compatible combination.
+// One reference:
+typedef struct {
+    int64_t left[MAX_PRIMER_PER_AMPLICON];
+    int nleft;
+    int64_t right[MAX_PRIMER_PER_AMPLICON];
+    int nright;
+    int64_t max_left, min_right; // inner dimensions
+    int64_t min_left, max_right; // outer dimensions
+} amplicon_t;
+
+// Multiple references, we have an array of amplicons_t - one per used ref.
+// We have per reference local and global stats here, as some of the stats
+// are coordinate based.  However we report them combined together as a single
+// list across all references.
+// "namp" is the number of amplicons in this reference, but they're
+// numbered first_amp to first_amp+namp-1 inclusively.
+typedef struct {
+    int tid, namp;
+    int64_t len;
+    bed_entry_list_t *sites;
+    amplicon_t *amp;
+    astats_t *lstats, *gstats; // local (1 file) and global (all file) stats
+    const char *ref;           // ref name (pointer to the bed hash table key)
+    int first_amp;             // first amplicon number for this ref
+} amplicons_t;
+
+// Reinitialised for each new reference/chromosome.
+// Counts from 1 to namp, -1 for no match and 0 for ?.
+static int *pos2start = NULL;
+static int *pos2end = NULL;
+static int pos2size = 0; // allocated size of pos2start/end
+
+// Lookup table to go from position to amplicon based on
+// read start / end.
+static int initialise_amp_pos_lookup(astats_args_t *args,
+                                     amplicons_t *amps,
+                                     int ref) {
+    int64_t i, j;
+    amplicon_t *amp = amps[ref].amp;
+    int64_t max_len = amps[ref].len;
+    int namp = amps[ref].namp;
+
+    if (max_len+1 > pos2size) {
+        if (!(pos2start = realloc(pos2start, (max_len+1)*sizeof(*pos2start))))
+            return -1;
+        if (!(pos2end   = realloc(pos2end,   (max_len+1)*sizeof(*pos2end))))
+            return -1;
+        pos2size = max_len;
+    }
+    for (i = 0; i < max_len; i++)
+        pos2start[i] = pos2end[i] = -1;
+
+    for (i = 0; i < namp; i++) {
+        for (j = 0; j < amp[i].nleft; j++) {
+            int64_t p;
+            for (p = amp[i].left[j] - args->max_delta;
+                 p <= amp[i].left[j] + args->max_delta; p++) {
+                if (p < 1 || p > max_len)
+                    continue;
+                pos2start[p-1] = i;
+            }
+        }
+        for (j = 0; j < amp[i].nright; j++) {
+            int64_t p;
+            for (p = amp[i].right[j] - args->max_delta;
+                 p <= amp[i].right[j] + args->max_delta; p++) {
+                if (p < 1 || p > max_len)
+                    continue;
+                pos2end[p-1] = i;
+            }
+        }
+    }
+
+    return 0;
+}
+
+// Counts amplicons.
+// Assumption: input BED file alternates between LEFT and RIGHT primers
+// per amplicon, thus we can count the number based on the switching
+// orientation.
+static int count_amplicon(bed_entry_list_t *sites) {
+    int i, namp, last_rev = 0;
+    for (i = namp = 0; i < sites->length; i++) {
+        if (sites->bp[i].rev == 0 && last_rev)
+            namp++;
+        last_rev = sites->bp[i].rev;
+    }
+
+    return ++namp;
+}
+
+// We're only interest in the internal part of the amplicon.
+// Our bed file has LEFT start/end followed by RIGHT start/end,
+// so collapse these to LEFT end / RIGHT start.
+//
+// Returns right most amplicon position on success,
+//         < 0 on error
+static int64_t bed2amplicon(astats_args_t *args, bed_entry_list_t *sites,
+                            amplicon_t *amp, int *namp, int do_title,
+                            const char *ref, int first_amp) {
+    int i, j;
+    int64_t max_right = 0;
+    FILE *ofp = args->out_fp;
+
+    *namp = 0;
+
+    // Assume all primers for the same amplicon are adjacent in BED
+    // with all + followed by all -.  Thus - to + signifies next primer set.
+    int last_rev = 0;
+    amp[0].max_left = 0;
+    amp[0].min_right = INT64_MAX;
+    amp[0].min_left = INT64_MAX;
+    amp[0].max_right = 0;
+    if (do_title) {
+        fprintf(ofp, "# Amplicon locations from BED file.\n");
+        fprintf(ofp, "# LEFT/RIGHT are <start>-<end> format and "
+                "comma-separated for alt-primers.\n");
+        if (args->multi_ref)
+            fprintf(ofp, "#\n# AMPLICON\tREF\tNUMBER\tLEFT\tRIGHT\n");
+        else
+            fprintf(ofp, "#\n# AMPLICON\tNUMBER\tLEFT\tRIGHT\n");
+    }
+    for (i = j = 0; i < sites->length; i++) {
+        if (i == 0 && sites->bp[i].rev != 0) {
+            fprintf(samtools_stderr, "[ampliconstats] error: BED file should start"
+                    " with the + strand primer\n");
+            return -1;
+        }
+        if (sites->bp[i].rev == 0 && last_rev) {
+            j++;
+            if (j >= args->max_amp) {
+                fprintf(samtools_stderr, "[ampliconstats] error: too many amplicons"
+                        " (%d). Use -a option to raise this.\n", j);
+                return -1;
+            }
+            amp[j].max_left = 0;
+            amp[j].min_right = INT64_MAX;
+            amp[j].min_left = INT64_MAX;
+            amp[j].max_right = 0;
+        }
+        if (sites->bp[i].rev == 0) {
+            if (i == 0 || last_rev) {
+                if (j>0) fprintf(ofp, "\n");
+                if (args->multi_ref)
+                    fprintf(ofp, "AMPLICON\t%s\t%d", ref, j+1 + first_amp);
+                else
+                    fprintf(ofp, "AMPLICON\t%d", j+1);
+            }
+            if (amp[j].nleft >= MAX_PRIMER_PER_AMPLICON) {
+                print_error_errno("ampliconstats",
+                                  "too many primers per amplicon (%d).\n",
+                                  MAX_PRIMER_PER_AMPLICON);
+                return -1;
+            }
+            amp[j].left[amp[j].nleft++] = sites->bp[i].right;
+            if (amp[j].max_left < sites->bp[i].right+1)
+                amp[j].max_left = sites->bp[i].right+1;
+            if (amp[j].min_left > sites->bp[i].right+1)
+                amp[j].min_left = sites->bp[i].right+1;
+            // BED file, so left+1 as zero based. right(+1-1) as
+            // BED goes one beyond end (and we want inclusive range).
+            fprintf(ofp, "%c%"PRId64"-%"PRId64, "\t,"[amp[j].nleft > 1],
+                    sites->bp[i].left+1, sites->bp[i].right);
+        } else {
+            if (amp[j].nright >= MAX_PRIMER_PER_AMPLICON) {
+                print_error_errno("ampliconstats",
+                                  "too many primers per amplicon (%d)",
+                                  MAX_PRIMER_PER_AMPLICON);
+                return -1;
+            }
+            amp[j].right[amp[j].nright++] = sites->bp[i].left;
+            if (amp[j].min_right > sites->bp[i].left-1)
+                amp[j].min_right = sites->bp[i].left-1;
+            if (amp[j].max_right < sites->bp[i].left-1) {
+                amp[j].max_right = sites->bp[i].left-1;
+                if (amp[j].max_right - amp[j].min_left + 1 >=
+                    args->max_amp_len) {
+                    fprintf(samtools_stderr, "[ampliconstats] error: amplicon "
+                            "longer (%d) than max_amp_len option (%d)\n",
+                            (int)(amp[j].max_right - amp[j].min_left + 2),
+                            args->max_amp_len);
+                    return -1;
+                }
+                if (max_right < amp[j].max_right)
+                    max_right = amp[j].max_right;
+            }
+            fprintf(ofp, "%c%"PRId64"-%"PRId64, "\t,"[amp[j].nright > 1],
+                    sites->bp[i].left+1, sites->bp[i].right);
+        }
+        last_rev = sites->bp[i].rev;
+    }
+    if (last_rev != 1) {
+        fprintf(ofp, "\n"); // useful if going to samtools_stdout
+        fprintf(samtools_stderr, "[ampliconstats] error: bed file does not end on"
+                " a reverse strand primer.\n");
+        return -1;
+    }
+    *namp = ++j;
+    if (j) fprintf(ofp, "\n");
+
+    if (j >= args->max_amp) {
+        fprintf(samtools_stderr, "[ampliconstats] error: "
+                "too many amplicons (%d). Use -a option to raise this.", j);
+        return -1;
+    }
+
+//    for (i = 0; i < *namp; i++) {
+//      fprintf(samtools_stdout, "%d\t%ld", i, amp[i].length);
+//      for (j = 0; j < amp[i].nleft; j++)
+//          fprintf(samtools_stdout, "%c%ld", "\t,"[j>0], amp[i].left[j]);
+//      for (j = 0; j < amp[i].nright; j++)
+//          fprintf(samtools_stdout, "%c%ld", "\t,"[j>0], amp[i].right[j]);
+//      fprintf(samtools_stdout, "\n");
+//    }
+
+    return max_right;
+}
+
+void stats_free(astats_t *st) {
+    if (!st)
+        return;
+
+    free(st->nreads);
+    free(st->nreads2);
+    free(st->nfull_reads);
+    free(st->nrperc);
+    free(st->nrperc2);
+    free(st->nbases);
+    free(st->nbases2);
+    free(st->coverage);
+    free(st->covered_perc);
+    free(st->covered_perc2);
+    free(st->amp_dist);
+
+    free(st->depth_valid);
+    free(st->depth_all);
+
+    if (st->tcoord) {
+        int i;
+        for (i = 0; i <= st->max_amp; i++) {
+            if (st->tcoord[i])
+                kh_destroy(tcoord, st->tcoord[i]);
+        }
+        free(st->tcoord);
+    }
+
+    khiter_t k;
+    for (k = kh_begin(st->qend); k != kh_end(st->qend); k++)
+        if (kh_exist(st->qend, k))
+            free((void *)kh_key(st->qend, k));
+    kh_destroy(qname, st->qend);
+
+    free(st);
+}
+
+astats_t *stats_alloc(int64_t max_len, int max_amp, int max_amp_len) {
+    astats_t *st = calloc(1, sizeof(*st));
+    if (!st)
+        return NULL;
+
+    st->max_amp = max_amp;
+    st->max_amp_len = max_amp_len;
+    st->max_len = max_len;
+
+    if (!(st->nreads  = calloc(max_amp, sizeof(*st->nreads))))  goto err;
+    if (!(st->nreads2 = calloc(max_amp, sizeof(*st->nreads2)))) goto err;
+    if (!(st->nrperc  = calloc(max_amp, sizeof(*st->nrperc))))  goto err;
+    if (!(st->nrperc2 = calloc(max_amp, sizeof(*st->nrperc2)))) goto err;
+    if (!(st->nbases  = calloc(max_amp, sizeof(*st->nbases))))  goto err;
+    if (!(st->nbases2 = calloc(max_amp, sizeof(*st->nbases2)))) goto err;
+
+    if (!(st->nfull_reads = calloc(max_amp, sizeof(*st->nfull_reads))))
+        goto err;
+
+    if (!(st->coverage = calloc(max_amp*max_amp_len, sizeof(*st->coverage))))
+        goto err;
+
+    if (!(st->covered_perc  = calloc(max_amp, sizeof(*st->covered_perc))))
+        goto err;
+    if (!(st->covered_perc2 = calloc(max_amp, sizeof(*st->covered_perc2))))
+        goto err;
+
+    if (!(st->tcoord = calloc(max_amp+1, sizeof(*st->tcoord)))) goto err;
+    int i;
+    for (i = 0; i <= st->max_amp; i++)
+        if (!(st->tcoord[i] = kh_init(tcoord)))
+            goto err;
+
+    if (!(st->qend = kh_init(qname)))
+        goto err;
+
+    if (!(st->depth_valid = calloc(max_len, sizeof(*st->depth_valid))))
+        goto err;
+    if (!(st->depth_all   = calloc(max_len, sizeof(*st->depth_all))))
+        goto err;
+
+    if (!(st->amp_dist  = calloc(max_amp, sizeof(*st->amp_dist))))  goto err;
+
+    return st;
+
+ err:
+    stats_free(st);
+    return NULL;
+}
+
+static void stats_reset(astats_t *st) {
+    st->nseq = 0;
+    st->nfiltered = 0;
+    st->nfailprimer = 0;
+
+    memset(st->nreads,  0, st->max_amp * sizeof(*st->nreads));
+    memset(st->nreads2, 0, st->max_amp * sizeof(*st->nreads2));
+    memset(st->nfull_reads, 0, st->max_amp * sizeof(*st->nfull_reads));
+
+    memset(st->nrperc,  0, st->max_amp * sizeof(*st->nrperc));
+    memset(st->nrperc2, 0, st->max_amp * sizeof(*st->nrperc2));
+
+    memset(st->nbases,  0, st->max_amp * sizeof(*st->nbases));
+    memset(st->nbases2, 0, st->max_amp * sizeof(*st->nbases2));
+
+    memset(st->coverage, 0, st->max_amp * st->max_amp_len
+           * sizeof(*st->coverage));
+    memset(st->covered_perc,  0, st->max_amp * sizeof(*st->covered_perc));
+    memset(st->covered_perc2, 0, st->max_amp * sizeof(*st->covered_perc2));
+
+    // Keep the allocated entries as it's likely all files will share
+    // the same keys.  Instead we reset counters to zero for common ones
+    // and delete rare ones.
+    int i;
+    for (i = 0; i <= st->max_amp; i++) {
+        khiter_t k;
+        for (k = kh_begin(st->tcoord[i]);
+             k != kh_end(st->tcoord[i]); k++)
+            if (kh_exist(st->tcoord[i], k)) {
+                if (kh_value(st->tcoord[i], k) < 5)
+                    kh_del(tcoord, st->tcoord[i], k);
+                else
+                    kh_value(st->tcoord[i], k) = 0;
+            }
+    }
+
+    khiter_t k;
+    for (k = kh_begin(st->qend); k != kh_end(st->qend); k++)
+        if (kh_exist(st->qend, k))
+            free((void *)kh_key(st->qend, k));
+    kh_clear(qname, st->qend);
+
+    memset(st->depth_valid, 0, st->max_len * sizeof(*st->depth_valid));
+    memset(st->depth_all,   0, st->max_len * sizeof(*st->depth_all));
+    memset(st->amp_dist,  0, st->max_amp * sizeof(*st->amp_dist));
+}
+
+static void amp_stats_reset(amplicons_t *amps, int nref) {
+    int i;
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+        stats_reset(amps[i].lstats);
+    }
+}
+
+static int accumulate_stats(astats_args_t *args, amplicons_t *amps,
+                            bam1_t *b) {
+    int ref = b->core.tid;
+    amplicon_t *amp = amps[ref].amp;
+    astats_t *stats = amps[ref].lstats;
+    int len = amps[ref].len;
+
+    if (!stats)
+        return 0;
+
+    stats->nseq++;
+    if ((b->core.flag & args->flag_require) != args->flag_require ||
+        (b->core.flag & args->flag_filter)  != 0) {
+        stats->nfiltered++;
+        return 0;
+    }
+
+    int64_t start = b->core.pos, mstart = start; // modified start
+    int64_t end = bam_endpos(b), i;
+
+    // Compute all-template-depth and valid-template-depth.
+    // We track current end location per read name so we can remove overlaps.
+    // Potentially we could use this data for a better amplicon-depth
+    // count too, but for now it's purely for the per-base plots.
+    int ret;
+    khiter_t k;
+    int prev_start = 0, prev_end = 0;
+    if ((b->core.flag & BAM_FPAIRED)
+        && !(b->core.flag & (BAM_FSUPPLEMENTARY | BAM_FSECONDARY))) {
+        k = kh_put(qname, stats->qend, bam_get_qname(b), &ret);
+        if (ret == 0) {
+            prev_start = kh_value(stats->qend, k) & 0xffffffff;
+            prev_end = kh_value(stats->qend, k)>>32;
+            mstart = MAX(mstart, prev_end);
+            // Ideally we'd reuse strings so we don't thrash free/malloc.
+            // However let's see if the official way of doing that (malloc
+            // itself) is fast enough first.
+            free((void *)kh_key(stats->qend, k));
+            kh_del(qname, stats->qend, k);
+            //fprintf(samtools_stderr, "remove overlap %d to %d\n", (int)start, (int)mstart);
+        } else {
+            if (!(kh_key(stats->qend, k) = strdup(bam_get_qname(b))))
+                return -1;
+
+            kh_value(stats->qend, k) = start | (end << 32);
+        }
+    }
+    for (i = mstart; i < end && i < len; i++)
+        stats->depth_all[i]++;
+    if (i < end) {
+        print_error("ampliconstats", "record %s overhangs end of reference",
+                    bam_get_qname(b));
+        // But keep going, as it's harmless.
+    }
+
+    // On single ended runs, eg ONT or PacBio, we just use the start/end
+    // of the template to assign.
+    int anum = (b->core.flag & BAM_FREVERSE) || !(b->core.flag & BAM_FPAIRED)
+        ? (end-1 >= 0 && end-1 < len ? pos2end[end-1] : -1)
+        : (start >= 0 && start < len ? pos2start[start] : -1);
+
+    // ivar sometimes soft-clips 100% of the bases.
+    // This is essentially unmapped
+    if (end == start && (args->flag_filter & BAM_FUNMAP)) {
+        stats->nfiltered++;
+        return 0;
+    }
+
+    if (anum == -1)
+        stats->nfailprimer++;
+
+    if (anum >= 0) {
+        int64_t c = MIN(end,amp[anum].min_right+1) - MAX(start,amp[anum].max_left);
+        if (c > 0) {
+            stats->nreads[anum]++;
+            // NB: ref bases rather than read bases
+            stats->nbases[anum] += c;
+
+            int64_t i;
+            if (start < 0) start = 0;
+            if (end > len) end = len;
+
+            int64_t ostart = MAX(start, amp[anum].min_left-1);
+            int64_t oend = MIN(end, amp[anum].max_right);
+            int64_t offset = amp[anum].min_left-1;
+            for (i = ostart; i < oend; i++)
+                stats->coverage[anum*stats->max_amp_len + i-offset]++;
+        } else {
+            stats->nfailprimer++;
+        }
+    }
+
+    // Template length in terms of amplicon number to amplicon number.
+    // We expect left to right of same amplicon (len 0), but it may go
+    // to next amplicon (len 1) or prev (len -1), etc.
+    int64_t t_end;
+    int oth_anum = -1;
+
+    if (b->core.flag & BAM_FPAIRED) {
+        t_end = (b->core.flag & BAM_FREVERSE ? end : start)
+            + b->core.isize;
+
+        // If we've clipped the primers but not followed up with a fixmates
+        // then our start+TLEN will take us to a location which is
+        // length(LEFT_PRIMER) + length(RIGHT_PRIMER) too far away.
+        //
+        // The correct solution is to run samtools fixmate so TLEN is correct.
+        // The hacky solution is to fudge the expected tlen by double the
+        // average primer length (e.g. 50).
+        t_end += b->core.isize > 0 ? -args->tlen_adj : +args->tlen_adj;
+
+        if (t_end > 0 && t_end < len && b->core.isize != 0)
+            oth_anum = (b->core.flag & BAM_FREVERSE)
+                ? pos2start[t_end]
+                : pos2end[t_end];
+    } else {
+        // Not paired (see int anum = (REV || !PAIR) ?en :st expr above)
+        oth_anum = pos2start[start];
+        t_end = end;
+    }
+
+    // We don't want to count our pairs twice.
+    // If both left/right are known, count it on left only.
+    // If only one is known, we'll only get to this code once
+    // so we can also count it.
+    int astatus = 2;
+    if (anum != -1 && oth_anum != -1) {
+        astatus = oth_anum == anum ? 0 : 1;
+        if (start <= t_end)
+            stats->amp_dist[anum][astatus]++;
+    } else if (anum >= 0) {
+        stats->amp_dist[anum][astatus = 2]++;
+    }
+
+    if (astatus == 0 && !(b->core.flag & (BAM_FUNMAP | BAM_FMUNMAP))) {
+        if (prev_end && mstart > prev_end) {
+            // 2nd read with gap to 1st; undo previous increment.
+            for (i = prev_start; i < prev_end; i++)
+                stats->depth_valid[i]--;
+            stats->nfull_reads[anum] -= (b->core.flag & BAM_FPAIRED) ? 0.5 : 1;
+        } else {
+            // 1st read, or 2nd read that overlaps 1st
+            for (i = mstart; i < end; i++)
+                stats->depth_valid[i]++;
+            stats->nfull_reads[anum] += (b->core.flag & BAM_FPAIRED) ? 0.5 : 1;
+        }
+    }
+
+    // Track template start,end frequencies, so we can give stats on
+    // amplicon primer usage.
+    if ((b->core.flag & BAM_FPAIRED) && b->core.isize <= 0)
+        // left to right only, so we don't double count template positions.
+        return 0;
+
+    start = b->core.pos;
+    t_end = b->core.flag & BAM_FPAIRED
+        ? start + b->core.isize-1
+        : end;
+    uint64_t tcoord = MIN(start+1, UINT32_MAX) | (MIN(t_end+1, UINT32_MAX)<<32);
+    k = kh_put(tcoord, stats->tcoord[anum+1], tcoord, &ret);
+    if (ret < 0)
+        return -1;
+    if (ret == 0)
+        kh_value(stats->tcoord[anum+1], k)++;
+    else
+        kh_value(stats->tcoord[anum+1], k)=1;
+    kh_value(stats->tcoord[anum+1], k) |= ((int64_t)astatus<<32);
+
+    return 0;
+}
+
+// Append file local stats to global stats
+int append_lstats(astats_t *lstats, astats_t *gstats, int namp, int all_nseq) {
+    gstats->nseq += lstats->nseq;
+    gstats->nfiltered += lstats->nfiltered;
+    gstats->nfailprimer += lstats->nfailprimer;
+
+    int a;
+    for (a = -1; a < namp; a++) {
+        // Add khash local (kl) to khash global (kg)
+        khiter_t kl, kg;
+        for (kl = kh_begin(lstats->tcoord[a+1]);
+             kl != kh_end(lstats->tcoord[a+1]); kl++) {
+            if (!kh_exist(lstats->tcoord[a+1], kl) ||
+                kh_value(lstats->tcoord[a+1], kl) == 0)
+                continue;
+
+            int ret;
+            kg = kh_put(tcoord, gstats->tcoord[a+1],
+                        kh_key(lstats->tcoord[a+1], kl),
+                        &ret);
+            if (ret < 0)
+                return -1;
+
+            kh_value(gstats->tcoord[a+1], kg) =
+                (ret == 0
+                 ? (kh_value(gstats->tcoord[a+1], kg) & 0xFFFFFFFF)
+                 : 0)
+                + kh_value(lstats->tcoord[a+1], kl);
+        }
+        if (a == -1) continue;
+
+        gstats->nreads[a]  += lstats->nreads[a];
+        gstats->nreads2[a] += lstats->nreads[a] * lstats->nreads[a];
+        gstats->nfull_reads[a] += lstats->nfull_reads[a];
+
+        // To get mean & sd for amplicon read percentage, we need
+        // to do the divisions here as nseq differs for each sample.
+        double nrperc = all_nseq ? 100.0 * lstats->nreads[a] / all_nseq : 0;
+        gstats->nrperc[a]  += nrperc;
+        gstats->nrperc2[a] += nrperc*nrperc;
+
+        gstats->nbases[a]  += lstats->nbases[a];
+        gstats->nbases2[a] += lstats->nbases[a] * lstats->nbases[a];
+
+        int d;
+        for (d = 0; d < MAX_DEPTH; d++) {
+            gstats->covered_perc[a][d]  += lstats->covered_perc[a][d];
+            gstats->covered_perc2[a][d] += lstats->covered_perc[a][d]
+                                         * lstats->covered_perc[a][d];
+        }
+
+        for (d = 0; d < 3; d++)
+            gstats->amp_dist[a][d] += lstats->amp_dist[a][d];
+    }
+
+    for (a = 0; a < lstats->max_len; a++) {
+        gstats->depth_valid[a] += lstats->depth_valid[a];
+        gstats->depth_all[a]   += lstats->depth_all[a];
+    }
+
+    return 0;
+}
+
+int append_stats(amplicons_t *amps, int nref) {
+    int i, r, all_nseq = 0;
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = amps[r].lstats;
+        all_nseq  += stats->nseq - stats->nfiltered - stats->nfailprimer;
+    }
+
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+        if (append_lstats(amps[i].lstats, amps[i].gstats, amps[i].namp,
+                          all_nseq) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+typedef struct {
+    int32_t start, end;
+    uint32_t freq;
+    uint32_t status;
+} tcoord_t;
+
+// Sort tcoord by descending frequency and then ascending start and  end.
+static int tcoord_freq_sort(const void *vp1, const void *vp2) {
+    const tcoord_t *t1 = (const tcoord_t *)vp1;
+    const tcoord_t *t2 = (const tcoord_t *)vp2;
+
+    if (t1->freq != t2->freq)
+        return t2->freq - t1->freq;
+
+    if (t1->start != t2->start)
+        return t1->start - t2->start;
+
+    return t1->end - t2->end;
+}
+
+
+/*
+ * Merges tcoord start,end,freq,status tuples if their coordinates are
+ * close together.  We aim to keep the start,end for the most frequent
+ * value and assume that is the correct coordinate and all others are
+ * minor fluctuations due to errors or variants.
+ *
+ * We sort by frequency first and then merge later items in the list into
+ * the earlier more frequent ones.  It's O(N^2), but sufficient for now
+ * given current scale of projects.
+ *
+ * If we ever need to resolve that then consider sorting by start
+ * coordinate and scanning the list to find all items within X, find
+ * the most frequent of those, and then cluster that way.  (I'd have
+ * done that had I thought of it at the time!)
+ */
+static void aggregate_tcoord(astats_args_t *args, tcoord_t *tpos, size_t *np){
+    size_t n = *np, j, j2, j3, k;
+
+    // Sort by frequency and cluster infrequent coords into frequent
+    // ones provided they're close by.
+    // This is O(N^2), but we've already binned by tcoord_bin/2 so
+    // the list isn't intended to be vast at this point.
+    qsort(tpos, n, sizeof(*tpos), tcoord_freq_sort);
+
+    // For frequency ties, find mid start coord, and then find mid end
+    // coord of those matching start.
+    // We make that the first item so we merge into that mid point.
+    for (j = 0; j < n; j++) {
+        for (j2 = j+1; j2 < n; j2++) {
+            if (tpos[j].freq != tpos[j2].freq)
+                break;
+            if (tpos[j2].start - tpos[j].start >= args->tcoord_bin)
+                break;
+        }
+
+        // j to j2 all within bin of a common start,
+        // m is the mid start.
+        if (j2-1 > j) {
+            size_t m = (j2-1 + j)/2;
+
+            // Find mid end for this same start
+            while (m > 1 && tpos[m].start == tpos[m-1].start)
+                m--;
+            for (j3 = m+1; j3 < j2; j3++) {
+                if (tpos[m].start != tpos[j3].start)
+                    break;
+                if (tpos[m].end - tpos[j3].end >= args->tcoord_bin)
+                    break;
+            }
+            if (j3-1 > m)
+                m = (j3-1 + m)/2;
+
+            // Swap with first item.
+            tcoord_t tmp = tpos[j];
+            tpos[j] = tpos[m];
+            tpos[m] = tmp;
+            j = j2-1;
+        }
+    }
+
+    // Now merge in coordinates.
+    // This bit is O(N^2), so consider binning first to reduce the
+    // size of the list if we have excessive positional variation.
+    for (k = j = 0; j < n; j++) {
+        if (!tpos[j].freq)
+            continue;
+
+        if (k < j)
+            tpos[k] = tpos[j];
+
+        for (j2 = j+1; j2 < n; j2++) {
+            if (ABS(tpos[j].start-tpos[j2].start) < args->tcoord_bin/2 &&
+                ABS(tpos[j].end  -tpos[j2].end)  < args->tcoord_bin/2 &&
+                tpos[j].status == tpos[j2].status) {
+                tpos[k].freq += tpos[j2].freq;
+                tpos[j2].freq = 0;
+            }
+        }
+        k++;
+    }
+
+    *np = k;
+}
+
+int dump_stats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref, int local) {
+    int i, r;
+    FILE *ofp = args->out_fp;
+    tcoord_t *tpos = NULL;
+    size_t ntcoord = 0;
+
+    // summary stats for this sample (or for all samples)
+    fprintf(ofp, "# Summary stats.\n");
+    fprintf(ofp, "# Use 'grep ^%cSS | cut -f 2-' to extract this part.\n", type);
+
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        int nmatch = stats->nseq - stats->nfiltered - stats->nfailprimer;
+        char *name_ref = malloc(strlen(name) + strlen(amps[r].ref) + 2);
+        if (!name_ref)
+            return -1;
+        if (args->multi_ref)
+            sprintf(name_ref, "%s\t%s", name, amps[r].ref);
+        else
+            sprintf(name_ref, "%s", name);
+        fprintf(ofp, "%cSS\t%s\traw total sequences:\t%d\n",
+                type, name_ref, stats->nseq);
+        fprintf(ofp, "%cSS\t%s\tfiltered sequences:\t%d\n",
+                type, name_ref, stats->nfiltered);
+        fprintf(ofp, "%cSS\t%s\tfailed primer match:\t%d\n",
+                type, name_ref, stats->nfailprimer);
+        fprintf(ofp, "%cSS\t%s\tmatching sequences:\t%d\n",
+                type, name_ref, nmatch);
+
+        int d = 0;
+        do {
+            // From first to last amplicon only, so not entire consensus.
+            // If contig length is known, maybe we want to add the missing
+            // count to < DEPTH figures?
+            int64_t start = 0, covered = 0, total = 0;
+            amplicon_t *amp = amps[r].amp;
+            for (i = 0; i < amps[r].namp; i++) {
+                int64_t j, offset = amp[i].min_left-1;
+                if (amp[i].min_right - amp[i].min_left > stats->max_amp_len) {
+                    fprintf(samtools_stderr, "[ampliconstats] error: "
+                            "Maximum amplicon length (%d) exceeded for '%s'\n",
+                            stats->max_amp, name);
+                    return -1;
+                }
+                for (j = MAX(start, amp[i].max_left-1);
+                     j < MAX(start, amp[i].min_right); j++) {
+                    if (stats->coverage[i*stats->max_amp_len + j-offset]
+                        >= args->min_depth[d])
+                        covered++;
+                    total++;
+                }
+                start = MAX(start, amp[i].min_right);
+            }
+            fprintf(ofp, "%cSS\t%s\tconsensus depth count < %d and >= %d:\t%"
+                    PRId64"\t%"PRId64"\n", type, name_ref,
+                    args->min_depth[d], args->min_depth[d],
+                    total-covered, covered);
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+
+        free(name_ref);
+    }
+
+    // Read count
+    fprintf(ofp, "# Absolute matching read counts per amplicon.\n");
+    fprintf(ofp, "# Use 'grep ^%cREADS | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cREADS\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            fprintf(ofp, "\t%"PRId64, stats->nreads[i]);
+        }
+    }
+    fprintf(ofp, "\n");
+
+    // Valid depth is the number of full length reads (already divided
+    // by the number we expect to cover), so +0.5 per read in pair.
+    // A.k.a "usable depth" in the plots.
+    fprintf(ofp, "%cVDEPTH\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++)
+            fprintf(ofp, "\t%d", (int)stats->nfull_reads[i]);
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        fprintf(ofp, "CREADS\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                fprintf(ofp, "\t%.1f", stats->nreads[i] / (double)nfile);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CREADS\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                double n1 = stats->nreads[i];
+                fprintf(ofp, "\t%.1f", nfile > 1 && stats->nreads2[i] > 0
+                        ? sqrt(stats->nreads2[i]/(double)nfile
+                               - (n1/nfile)*(n1/nfile))
+                        : 0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    fprintf(ofp, "# Read percentage of distribution between amplicons.\n");
+    fprintf(ofp, "# Use 'grep ^%cRPERC | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cRPERC\t%s", type, name);
+    int all_nseq = 0;
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        all_nseq  += stats->nseq - stats->nfiltered - stats->nfailprimer;
+    }
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            if (type == 'C') {
+                fprintf(ofp, "\t%.3f", (double)stats->nrperc[i] / nfile);
+            } else {
+                fprintf(ofp, "\t%.3f",
+                        all_nseq ? 100.0 * stats->nreads[i] / all_nseq : 0);
+            }
+        }
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we compute mean and standard deviation too
+        fprintf(ofp, "CRPERC\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                fprintf(ofp, "\t%.3f", stats->nrperc[i] / nfile);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CRPERC\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            for (i = 0; i < amps[r].namp; i++) {
+                // variance = SUM(X^2) - ((SUM(X)^2) / N)
+                double n1 = stats->nrperc[i];
+                double v = stats->nrperc2[i]/nfile - (n1/nfile)*(n1/nfile);
+                fprintf(ofp, "\t%.3f", v>0?sqrt(v):0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // Base depth
+    fprintf(ofp, "# Read depth per amplicon.\n");
+    fprintf(ofp, "# Use 'grep ^%cDEPTH | cut -f 2-' to extract this part.\n", type);
+    fprintf(ofp, "%cDEPTH\t%s", type, name);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        amplicon_t *amp = amps[r].amp;
+        for (i = 0; i < amps[r].namp; i++) {
+            int nseq = stats->nseq - stats->nfiltered - stats->nfailprimer;
+            int64_t alen = amp[i].min_right - amp[i].max_left+1;
+            fprintf(ofp, "\t%.1f", nseq ? stats->nbases[i] / (double)alen : 0);
+        }
+    }
+    fprintf(ofp, "\n");
+
+    if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        fprintf(ofp, "CDEPTH\tMEAN");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            amplicon_t *amp = amps[r].amp;
+            int nseq = stats->nseq - stats->nfiltered - stats->nfailprimer;
+            for (i = 0; i < amps[r].namp; i++) {
+                int64_t alen = amp[i].min_right - amp[i].max_left+1;
+                fprintf(ofp, "\t%.1f", nseq ? stats->nbases[i] / (double)alen / nfile : 0);
+            }
+        }
+        fprintf(ofp, "\n");
+
+        fprintf(ofp, "CDEPTH\tSTDDEV");
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].sites)
+                continue;
+            astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+            amplicon_t *amp = amps[r].amp;
+            for (i = 0; i < amps[r].namp; i++) {
+                double alen = amp[i].min_right - amp[i].max_left+1;
+                double n1 = stats->nbases[i] / alen;
+                double v = stats->nbases2[i] / (alen*alen) /nfile
+                    - (n1/nfile)*(n1/nfile);
+                fprintf(ofp, "\t%.1f", v>0?sqrt(v):0);
+            }
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // Percent Coverage
+    if (type == 'F') {
+        fprintf(ofp, "# Percentage coverage per amplicon\n");
+        fprintf(ofp, "# Use 'grep ^%cPCOV | cut -f 2-' to extract this part.\n", type);
+        int d = 0;
+        do {
+            fprintf(ofp, "%cPCOV-%d\t%s", type, args->min_depth[d], name);
+
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                amplicon_t *amp = amps[r].amp;
+                for (i = 0; i < amps[r].namp; i++) {
+                    int covered = 0;
+                    if (amp[i].min_right - amp[i].min_left > stats->max_amp_len) {
+                        fprintf(samtools_stderr, "[ampliconstats] error: "
+                                "Maximum amplicon length (%d) exceeded for '%s'\n",
+                                stats->max_amp, name);
+                        return -1;
+                    }
+                    int64_t j, offset = amp[i].min_left-1;
+                    for (j = amp[i].max_left-1; j < amp[i].min_right; j++) {
+                        int apos = i*stats->max_amp_len + j-offset;
+                        if (stats->coverage[apos] >= args->min_depth[d])
+                            covered++;
+                    }
+                    int64_t alen = amp[i].min_right - amp[i].max_left+1;
+                    stats->covered_perc[i][d] = 100.0 * covered / alen;
+                    fprintf(ofp, "\t%.2f", 100.0 * covered / alen);
+                }
+            }
+            fprintf(ofp, "\n");
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+
+    } else if (type == 'C') {
+        // For combined we can compute mean & standard deviation too
+        int d = 0;
+        do {
+            fprintf(ofp, "CPCOV-%d\tMEAN", args->min_depth[d]);
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                for (i = 0; i < amps[r].namp; i++) {
+                    fprintf(ofp, "\t%.1f", stats->covered_perc[i][d] / nfile);
+                }
+            }
+            fprintf(ofp, "\n");
+
+            fprintf(ofp, "CPCOV-%d\tSTDDEV", args->min_depth[d]);
+            for (r = 0; r < nref; r++) {
+                if (!amps[r].sites)
+                    continue;
+                astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+                for (i = 0; i < amps[r].namp; i++) {
+                    double n1 = stats->covered_perc[i][d] / nfile;
+                    double v = stats->covered_perc2[i][d] / nfile - n1*n1;
+                    fprintf(ofp, "\t%.1f", v>0?sqrt(v):0);
+                }
+            }
+            fprintf(ofp, "\n");
+        } while (++d < MAX_DEPTH && args->min_depth[d]);
+    }
+
+    // Plus base depth for all reads, irrespective of amplicon.
+    // This is post overlap removal, if reads in the read-pair overlap.
+    fprintf(ofp, "# Depth per reference base for ALL data.\n");
+    fprintf(ofp, "# Use 'grep ^%cDP_ALL | cut -f 2-' to extract this part.\n",
+            type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        if (args->multi_ref)
+            fprintf(ofp, "%cDP_ALL\t%s\t%s", type, name, amps[r].ref);
+        else
+            fprintf(ofp, "%cDP_ALL\t%s", type, name);
+
+        for (i = 0; i < amps[r].len; i++) {
+            // Basic run-length encoding provided all values are within
+            // +- depth_bin fraction of the mid-point.
+            int dmin = stats->depth_all[i], dmax = stats->depth_all[i], j;
+            double dmid = (dmin + dmax)/2.0;
+            double low  = dmid*(1-args->depth_bin);
+            double high = dmid*(1+args->depth_bin);
+            for (j = i+1; j < amps[r].len; j++) {
+                int d = stats->depth_all[j];
+                if (d < low || d > high)
+                    break;
+                if (dmin > d) {
+                    dmin = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                } else if (dmax < d) {
+                    dmax = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                }
+            }
+            fprintf(ofp, "\t%d,%d", (int)dmid, j-i);
+            i = j-1;
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // And depth for only reads matching to a single amplicon for full
+    // length.  This is post read overlap removal.
+    fprintf(ofp, "# Depth per reference base for full-length valid amplicon data.\n");
+    fprintf(ofp, "# Use 'grep ^%cDP_VALID | cut -f 2-' to extract this "
+            "part.\n", type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        if (args->multi_ref)
+            fprintf(ofp, "%cDP_VALID\t%s\t%s", type, name, amps[r].ref);
+        else
+            fprintf(ofp, "%cDP_VALID\t%s", type, name);
+
+        for (i = 0; i < amps[r].len; i++) {
+            int dmin = stats->depth_valid[i], dmax = stats->depth_valid[i], j;
+            double dmid = (dmin + dmax)/2.0;
+            double low  = dmid*(1-args->depth_bin);
+            double high = dmid*(1+args->depth_bin);
+            for (j = i+1; j < amps[r].len; j++) {
+                int d = stats->depth_valid[j];
+                if (d < low || d > high)
+                    break;
+                if (dmin > d) {
+                    dmin = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                } else if (dmax < d) {
+                    dmax = d;
+                    dmid = (dmin + dmax)/2.0;
+                    low  = dmid*(1-args->depth_bin);
+                    high = dmid*(1+args->depth_bin);
+                }
+            }
+            fprintf(ofp, "\t%d,%d", (int)dmid, j-i);
+            i = j-1;
+        }
+        fprintf(ofp, "\n");
+    }
+
+    // TCOORD (start to end) distribution
+    fprintf(ofp, "# Distribution of aligned template coordinates.\n");
+    fprintf(ofp, "# Use 'grep ^%cTCOORD | cut -f 2-' to extract this part.\n", type);
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0 - (nref==1); i < amps[r].namp; i++) {
+            if (ntcoord < kh_size(stats->tcoord[i+1])) {
+                ntcoord = kh_size(stats->tcoord[i+1]);
+                tcoord_t *tmp = realloc(tpos, ntcoord * sizeof(*tmp));
+                if (!tmp) {
+                    free(tpos);
+                    return -1;
+                }
+                tpos = tmp;
+            }
+
+            khiter_t k;
+            size_t n = 0, j;
+            for (k = kh_begin(stats->tcoord[i+1]);
+                 k != kh_end(stats->tcoord[i+1]); k++) {
+                if (!kh_exist(stats->tcoord[i+1], k) ||
+                    (kh_value(stats->tcoord[i+1], k) & 0xFFFFFFFF) == 0)
+                    continue;
+                // Key is start,end in 32-bit quantities.
+                // Yes this limits us to 4Gb references, but just how
+                // many primers are we planning on making?  Not that many
+                // I hope.
+                tpos[n].start = kh_key(stats->tcoord[i+1], k)&0xffffffff;
+                tpos[n].end   = kh_key(stats->tcoord[i+1], k)>>32;
+
+                // Value is frequency (top 32-bits) and status (bottom 32).
+                tpos[n].freq   = kh_value(stats->tcoord[i+1], k)&0xffffffff;
+                tpos[n].status = kh_value(stats->tcoord[i+1], k)>>32;
+                n++;
+            }
+
+            if (args->tcoord_bin > 1)
+                aggregate_tcoord(args, tpos, &n);
+
+            fprintf(ofp, "%cTCOORD\t%s\t%d", type, name,
+                    i+1+amps[r].first_amp); // per amplicon
+            for (j = 0; j < n; j++) {
+                if (tpos[j].freq < args->tcoord_min_count)
+                    continue;
+                fprintf(ofp, "\t%d,%d,%u,%u",
+                        tpos[j].start,
+                        tpos[j].end,
+                        tpos[j].freq,
+                        tpos[j].status);
+            }
+            fprintf(ofp, "\n");
+        }
+    }
+
+
+    // AMP length distribution.
+    // 0 = both ends in this amplicon
+    // 1 = ends in different amplicons
+    // 2 = other end matching an unknown amplicon site
+    //     (see tcoord for further analysis of where)
+    fprintf(ofp, "# Classification of amplicon status.  Columns are\n");
+    fprintf(ofp, "# number with both primers from this amplicon, number with\n");
+    fprintf(ofp, "# primers from different amplicon, and number with a position\n");
+    fprintf(ofp, "# not matching any valid amplicon primer site\n");
+    fprintf(ofp, "# Use 'grep ^%cAMP | cut -f 2-' to extract this part.\n", type);
+
+    fprintf(ofp, "%cAMP\t%s\t0", type, name); // all merged
+    int amp_dist[3] = {0};
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) { // accumulate for all amps
+            amp_dist[0] += stats->amp_dist[i][0];
+            amp_dist[1] += stats->amp_dist[i][1];
+            amp_dist[2] += stats->amp_dist[i][2];
+        }
+    }
+    fprintf(ofp, "\t%d\t%d\t%d\n", amp_dist[0], amp_dist[1], amp_dist[2]);
+
+    for (r = 0; r < nref; r++) {
+        if (!amps[r].sites)
+            continue;
+        astats_t *stats = local ? amps[r].lstats : amps[r].gstats;
+        for (i = 0; i < amps[r].namp; i++) {
+            // per amplicon
+            fprintf(ofp, "%cAMP\t%s\t%d", type, name, i+1+amps[r].first_amp);
+            fprintf(ofp, "\t%d\t%d\t%d\n", stats->amp_dist[i][0],
+                    stats->amp_dist[i][1], stats->amp_dist[i][2]);
+        }
+    }
+
+    free(tpos);
+    return 0;
+}
+
+int dump_lstats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref) {
+    return dump_stats(args, type, name, nfile, amps, nref, 1);
+}
+
+int dump_gstats(astats_args_t *args, char type, char *name, int nfile,
+               amplicons_t *amps, int nref) {
+    return dump_stats(args, type, name, nfile, amps, nref, 0);
+}
+
+char const *get_sample_name(sam_hdr_t *header, char *RG) {
+    kstring_t ks = {0};
+    sam_hdr_find_tag_id(header, "RG", RG?"ID":NULL, RG, "SM", &ks);
+    return ks.s;
+}
+
+// Return maximum reference length (SQ is NULL) or the length
+// of the specified reference in SQ.
+int64_t get_ref_len(sam_hdr_t *header, const char *SQ) {
+    if (SQ) {
+        int tid = SQ ? sam_hdr_name2tid(header, SQ) : 0;
+        return tid >= 0 ? sam_hdr_tid2len(header, tid) : -1;
+    } else {
+        int nref = sam_hdr_nref(header), tid;;
+        int64_t len = 0;
+        for (tid = 0; tid < nref; tid++) {
+            int64_t rl = sam_hdr_tid2len(header, tid);
+            if (len < rl)
+                len = rl;
+        }
+        return len;
+    }
+}
+
+static int amplicon_stats(astats_args_t *args,
+                          khash_t(bed_list_hash) *bed_hash,
+                          char **filev, int filec) {
+    int i, ref = -1, ref_tid = -1, ret = -1, nref = 0;
+    samFile *fp = NULL;
+    sam_hdr_t *header = NULL;
+    bam1_t *b = bam_init1();
+    FILE *ofp = args->out_fp;
+    char sname_[8192], *sname = NULL;
+    amplicons_t *amps = NULL;
+
+    // Report initial SS header.  We gather data from the bed_hash entries
+    // as well as from the first SAM header (with the requirement that all
+    // headers should be compatible).
+    if (filec) {
+        if (!(fp = sam_open_format(filev[0], "r", &args->ga.in))) {
+            print_error_errno("ampliconstats",
+                              "Cannot open input file \"%s\"",
+                              filev[0]);
+            goto err;
+        }
+        if (!(header = sam_hdr_read(fp)))
+            goto err;
+
+        if (!amps) {
+            amps = calloc(nref=sam_hdr_nref(header), sizeof(*amps));
+            if (!amps)
+                goto err;
+            fprintf(ofp, "# Summary statistics, used for scaling the plots.\n");
+            fprintf(ofp, "SS\tSamtools version: %s\n", samtools_version());
+            fprintf(ofp, "SS\tCommand line: %s\n", args->argv);
+            fprintf(ofp, "SS\tNumber of files:\t%d\n", filec);
+
+            // Note: order of hash entries will be different to order of
+            // BED file which may also differ to order of SQ headers.
+            // SQ header is canonical ordering (pos sorted file).
+            khiter_t k;
+            int bam_nref = sam_hdr_nref(header);
+            for (i = 0; i < bam_nref; i++) {
+                k = kh_get(bed_list_hash, bed_hash,
+                           sam_hdr_tid2name(header, i));
+                if (!kh_exist(bed_hash, k))
+                    continue;
+
+                bed_entry_list_t *sites = &kh_value(bed_hash, k);
+
+                ref = i;
+                amps[ref].ref = kh_key(bed_hash, k);
+                amps[ref].sites = sites;
+                amps[ref].namp = count_amplicon(sites);
+                amps[ref].amp  = calloc(sites->length,
+                                        sizeof(*amps[ref].amp));
+                if (!amps[ref].amp)
+                    goto err;
+                if (args->multi_ref)
+                    fprintf(ofp, "SS\tNumber of amplicons:\t%s\t%d\n",
+                            kh_key(bed_hash, k), amps[ref].namp);
+                else
+                    fprintf(ofp, "SS\tNumber of amplicons:\t%d\n",
+                            amps[ref].namp);
+
+                amps[ref].tid = ref;
+                if (ref_tid == -1)
+                    ref_tid = ref;
+
+                int64_t len = get_ref_len(header, kh_key(bed_hash, k));
+                amps[ref].len = len;
+                if (args->multi_ref)
+                    fprintf(ofp, "SS\tReference length:\t%s\t%"PRId64"\n",
+                            kh_key(bed_hash, k), len);
+                else
+                    fprintf(ofp, "SS\tReference length:\t%"PRId64"\n",
+                            len);
+
+                amps[ref].lstats = stats_alloc(len, args->max_amp,
+                                               args->max_amp_len);
+                amps[ref].gstats = stats_alloc(len, args->max_amp,
+                                               args->max_amp_len);
+                if (!amps[ref].lstats || !amps[ref].gstats)
+                    goto err;
+            }
+        }
+
+        sam_hdr_destroy(header);
+        header = NULL;
+        if (sam_close(fp) < 0) {
+            fp = NULL;
+            goto err;
+        }
+        fp = NULL;
+    }
+    fprintf(ofp, "SS\tEnd of summary\n");
+
+    // Extract the bits of amplicon data we need from bed hash and turn
+    // it into a position-to-amplicon lookup table.
+    int offset = 0;
+    for (i = 0; i < nref; i++) {
+        if (!amps[i].sites)
+            continue;
+
+        amps[i].first_amp = offset;
+        if (bed2amplicon(args, amps[i].sites, amps[i].amp,
+                         &amps[i].namp, i==0, amps[i].ref, offset) < 0)
+            goto err;
+
+        offset += amps[i].namp; // cumulative amplicon number across refs
+    }
+
+    // Now iterate over file contents, one at a time.
+    for (i = 0; i < filec; i++) {
+        char *nstart = filev[i];
+
+        fp = sam_open_format(filev[i], "r", &args->ga.in);
+        if (!fp) {
+            print_error_errno("ampliconstats",
+                              "Cannot open input file \"%s\"",
+                              filev[i]);
+            goto err;
+        }
+
+        if (args->ga.nthreads > 0)
+            hts_set_threads(fp, args->ga.nthreads);
+
+        if (!(header = sam_hdr_read(fp)))
+            goto err;
+
+        if (nref != sam_hdr_nref(header)) {
+            print_error_errno("ampliconstats",
+                              "SAM headers are not consistent across input files");
+            goto err;
+        }
+        int r;
+        for (r = 0; r < nref; r++) {
+            if (!amps[r].ref ||
+                strcmp(amps[r].ref, sam_hdr_tid2name(header, r)) != 0 ||
+                amps[r].len != sam_hdr_tid2len(header, r)) {
+                print_error_errno("ampliconstats",
+                                  "SAM headers are not consistent across "
+                                  "input files");
+                goto err;
+            }
+        }
+
+        if (args->use_sample_name)
+            sname = (char *)get_sample_name(header, NULL);
+
+        if (!sname) {
+            sname = sname_;
+            char *nend = filev[i] + strlen(filev[i]), *cp;
+            if ((cp = strrchr(filev[i], '/')))
+                nstart = cp+1;
+            if ((cp = strrchr(nstart, '.')) &&
+                (strcmp(cp, ".bam") == 0 ||
+                 strcmp(cp, ".sam") == 0 ||
+                 strcmp(cp, ".cram") == 0))
+                nend = cp;
+            if (nend - nstart >= 8192) nend = nstart+8191;
+            memcpy(sname, nstart, nend-nstart);
+            sname[nend-nstart] = 0;
+        }
+
+        // Stats local to this sample only
+        amp_stats_reset(amps, nref);
+
+        int last_ref = -9;
+        while ((r = sam_read1(fp, header, b)) >= 0) {
+            // Other filter options useful here?
+            if (b->core.tid < 0)
+                continue;
+
+            if (last_ref != b->core.tid) {
+                last_ref  = b->core.tid;
+                if (initialise_amp_pos_lookup(args, amps, last_ref) < 0)
+                    goto err;
+            }
+
+            if (accumulate_stats(args, amps, b) < 0)
+                goto err;
+        }
+
+        if (r < -1) {
+            print_error_errno("ampliconstats", "Fail reading record");
+            goto err;
+        }
+
+        sam_hdr_destroy(header);
+        if (sam_close(fp) < 0) {
+            fp = NULL;
+            goto err;
+        }
+
+        fp = NULL;
+        header = NULL;
+
+        if (dump_lstats(args, 'F', sname, filec, amps, nref) < 0)
+            goto err;
+
+        if (append_stats(amps, nref) < 0)
+            goto err;
+
+        if (sname && sname != sname_)
+            free(sname);
+        sname = NULL;
+    }
+
+    if (dump_gstats(args, 'C', "COMBINED", filec, amps, nref) < 0)
+        goto err;
+
+    ret = 0;
+ err:
+    bam_destroy1(b);
+    if (ret) {
+        if (header)
+            sam_hdr_destroy(header);
+        if (fp)
+            sam_close(fp);
+    }
+    for (i = 0; i < nref; i++) {
+        stats_free(amps[i].lstats);
+        stats_free(amps[i].gstats);
+        free(amps[i].amp);
+    }
+    free(amps);
+    free(pos2start);
+    free(pos2end);
+    if (ret) {
+        if (sname && sname != sname_)
+            free(sname);
+    }
+
+    return ret;
+}
+
+static int usage(astats_args_t *args, FILE *fp, int exit_status) {
+    fprintf(fp,
+"\n"
+"Usage: samtools ampliconstats [options] primers.bed *.bam > astats.txt\n"
+"\n"
+"Options:\n");
+    fprintf(fp, "  -f, --required-flag STR|INT\n"
+            "               Only include reads with all of the FLAGs present [0x%X]\n",args->flag_require);
+    fprintf(fp, "  -F, --filter-flag STR|INT\n"
+            "               Only include reads with none of the FLAGs present [0x%X]\n",args->flag_filter & 0xffff);
+    fprintf(fp, "  -a, --max-amplicons INT\n"
+            "               Change the maximum number of amplicons permitted [%d]\n", MAX_AMP);
+    fprintf(fp, "  -l, --max-amplicon-length INT\n"
+            "               Change the maximum length of an individual amplicon [%d]\n", MAX_AMP_LEN);
+    fprintf(fp, "  -d, --min-depth INT[,INT]...\n"
+            "               Minimum base depth(s) to consider position covered [%d]\n", args->min_depth[0]);
+    fprintf(fp, "  -m, --pos-margin INT\n"
+            "               Margin of error for matching primer positions [%d]\n", args->max_delta);
+    fprintf(fp, "  -o, --output FILE\n"
+            "               Specify output file [samtools_stdout if unset]\n");
+    fprintf(fp, "  -s, --use-sample-name\n"
+            "               Use the sample name from the first @RG header line\n");
+    fprintf(fp, "  -t, --tlen-adjust INT\n"
+            "               Add/subtract from TLEN; use when clipping but no fixmate step\n");
+    fprintf(fp, "  -b, --tcoord-bin INT\n"
+            "               Bin template start,end positions into multiples of INT[1]\n");
+    fprintf(fp, "  -c, --tcoord-min-count INT\n"
+            "               Minimum template start,end frequency for recording [%d]\n", TCOORD_MIN_COUNT);
+    fprintf(fp, "  -D, --depth-bin FRACTION\n"
+            "               Merge FDP values within +/- FRACTION together\n");
+    fprintf(fp, "  -S, --single-ref\n"
+            "               Force single-ref (<=1.12) output format\n");
+    sam_global_opt_help(fp, "I.--.@");
+
+    return exit_status;
+}
+
+int main_ampliconstats(int argc, char **argv) {
+    astats_args_t args = {
+        .ga = SAM_GLOBAL_ARGS_INIT,
+        .flag_require = 0,
+        .flag_filter = 0x10B04,
+        //.sites = BED_LIST_INIT,
+        .max_delta = 30, // large enough to cope with alt primers
+        .min_depth = {1},
+        .use_sample_name = 0,
+        .max_amp = MAX_AMP,
+        .max_amp_len = MAX_AMP_LEN,
+        .tlen_adj = 0,
+        .out_fp = samtools_stdout,
+        .tcoord_min_count = TCOORD_MIN_COUNT,
+        .tcoord_bin = 1,
+        .depth_bin = 0.01,
+        .multi_ref = 1
+    }, oargs = args;
+
+    static const struct option loptions[] =
+    {
+        SAM_OPT_GLOBAL_OPTIONS('I', 0, '-', '-', 0, '@'),
+        {"help", no_argument, NULL, 'h'},
+        {"flag-require", required_argument, NULL, 'f'},
+        {"flag-filter", required_argument, NULL, 'F'},
+        {"min-depth", required_argument, NULL, 'd'},
+        {"output", required_argument, NULL, 'o'},
+        {"pos-margin", required_argument, NULL, 'm'},
+        {"use-sample-name", no_argument, NULL, 's'},
+        {"max-amplicons", required_argument, NULL, 'a'},
+        {"max-amplicon-length", required_argument, NULL, 'l'},
+        {"tlen-adjust", required_argument, NULL, 't'},
+        {"tcoord-min-count", required_argument, NULL, 'c'},
+        {"tcoord-bin", required_argument, NULL, 'b'},
+        {"depth-bin", required_argument, NULL, 'D'},
+        {"single-ref", no_argument, NULL, 'S'},
+        {NULL, 0, NULL, 0}
+    };
+    int opt;
+
+    while ( (opt=getopt_long(argc,argv,"?hf:F:@:p:m:d:sa:l:t:o:c:b:D:S",loptions,NULL))>0 ) {
+        switch (opt) {
+        case 'f': args.flag_require = bam_str2flag(optarg); break;
+        case 'F':
+            if (args.flag_filter & 0x10000)
+                args.flag_filter = 0; // strip default on first -F usage
+            args.flag_filter |= bam_str2flag(optarg); break;
+
+        case 'm': args.max_delta = atoi(optarg); break; // margin
+        case 'D': args.depth_bin = atof(optarg); break; // depth bin fraction
+        case 'd': {
+            int d = 0;
+            char *cp = optarg, *ep;
+            do {
+                long n = strtol(cp, &ep, 10);
+                args.min_depth[d++] = n;
+                if (*ep != ',')
+                    break;
+                cp = ep+1;
+            } while (d < MAX_DEPTH);
+            break;
+        }
+
+        case 'a': args.max_amp = atoi(optarg)+1;break;
+        case 'l': args.max_amp_len = atoi(optarg)+1;break;
+
+        case 'c': args.tcoord_min_count = atoi(optarg);break;
+        case 'b':
+            args.tcoord_bin = atoi(optarg);
+            if (args.tcoord_bin < 1)
+                args.tcoord_bin = 1;
+            break;
+
+        case 't': args.tlen_adj = atoi(optarg);break;
+
+        case 's': args.use_sample_name = 1;break;
+
+        case 'o':
+            if (!(args.out_fp = fopen(optarg, "w"))) {
+                perror(optarg);
+                return 1;
+            }
+            break;
+
+        case 'S':
+            args.multi_ref = 0;
+            break;
+
+        case '?': return usage(&oargs, samtools_stderr, EXIT_FAILURE);
+        case 'h': return usage(&oargs, samtools_stdout, EXIT_SUCCESS);
+
+        default:
+            if (parse_sam_global_opt(opt, optarg, loptions, &args.ga) != 0)
+                usage(&oargs,samtools_stderr, EXIT_FAILURE);
+            break;
+        }
+    }
+
+    if (argc <= optind)
+        return usage(&oargs, samtools_stdout, EXIT_SUCCESS);
+    if (argc <= optind+1 && isatty(STDIN_FILENO))
+        return usage(&oargs, samtools_stderr, EXIT_FAILURE);
+
+    khash_t(bed_list_hash) *bed_hash = kh_init(bed_list_hash);
+    if (load_bed_file_multi_ref(argv[optind], 1, 0, bed_hash)) {
+        print_error_errno("ampliconstats",
+                          "Could not read file \"%s\"", argv[optind]);
+        return 1;
+
+    }
+
+    khiter_t k, ref_count = 0;
+    for (k = kh_begin(bed_hash); k != kh_end(bed_hash); k++) {
+        if (!kh_exist(bed_hash, k))
+            continue;
+        ref_count++;
+    }
+    if (ref_count == 0)
+        return 1;
+    if (ref_count > 1 && args.multi_ref == 0) {
+        print_error("ampliconstats",
+                    "Single-ref mode is not permitted for BED files\n"
+                    "containing more than one reference.");
+        return 1;
+    }
+
+    args.argv = stringify_argv(argc, argv);
+    int ret;
+    if (argc == ++optind) {
+        char *av = "-";
+        ret = amplicon_stats(&args, bed_hash, &av, 1);
+    } else {
+        ret = amplicon_stats(&args, bed_hash, &argv[optind], argc-optind);
+    }
+
+    free(args.argv);
+    destroy_bed_hash(bed_hash);
+
+    return ret;
+}
diff --git a/samtools/bam.c b/samtools/bam.c
index 0c1a06b..926062c 100644
--- a/samtools/bam.c
+++ b/samtools/bam.c
@@ -1,6 +1,6 @@
 /*  bam.c -- BAM format.
 
-    Copyright (C) 2008-2013, 2015, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2013, 2015, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -125,21 +125,21 @@ int bam_remove_B(bam1_t *b)
     uint8_t *seq, *qual, *p;
     // test if removal is necessary
     if (b->core.flag & BAM_FUNMAP) return 0; // unmapped; do nothing
-    cigar = bam1_cigar(b);
+    cigar = bam_get_cigar(b);
     for (k = 0; k < b->core.n_cigar; ++k)
         if (bam_cigar_op(cigar[k]) == BAM_CBACK) break;
     if (k == b->core.n_cigar) return 0; // no 'B'
     if (bam_cigar_op(cigar[0]) == BAM_CBACK) goto rmB_err; // cannot be removed
     // allocate memory for the new CIGAR
-    if (b->data_len + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory
-        b->m_data = b->data_len + b->core.n_cigar * 4;
+    if (b->l_data + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory
+        b->m_data = b->l_data + b->core.n_cigar * 4;
         kroundup32(b->m_data);
         b->data = (uint8_t*)realloc(b->data, b->m_data);
-        cigar = bam1_cigar(b); // after realloc, cigar may be changed
+        cigar = bam_get_cigar(b); // after realloc, cigar may be changed
     }
     new_cigar = (uint32_t*)(b->data + (b->m_data - b->core.n_cigar * 4)); // from the end of b->data
     // the core loop
-    seq = bam1_seq(b); qual = bam1_qual(b);
+    seq = bam_get_seq(b); qual = bam_get_qual(b);
     no_qual = (qual[0] == 0xff); // test whether base quality is available
     i = j = 0; end_j = -1;
     for (k = l = 0; k < b->core.n_cigar; ++k) {
@@ -168,9 +168,9 @@ int bam_remove_B(bam1_t *b)
                 if (i != j) { // no need to copy if i == j
                     int u, c, c0;
                     for (u = 0; u < len; ++u) { // construct the consensus
-                        c = bam1_seqi(seq, i+u);
+                        c = bam_seqi(seq, i+u);
                         if (j + u < end_j) { // in an overlap
-                            c0 = bam1_seqi(seq, j+u);
+                            c0 = bam_seqi(seq, j+u);
                             if (c != c0) { // a mismatch; choose the better base
                                 if (qual[j+u] < qual[i+u]) { // the base in the 2nd segment is better
                                     bam1_seq_seti(seq, j+u, c);
@@ -202,9 +202,9 @@ int bam_remove_B(bam1_t *b)
     p = b->data + b->core.l_qname + l * 4;
     memmove(p, seq, (j+1)>>1); p += (j+1)>>1; // set SEQ
     memmove(p, qual, j); p += j; // set QUAL
-    memmove(p, bam1_aux(b), bam_get_l_aux(b)); p += bam_get_l_aux(b); // set optional fields
+    memmove(p, bam_get_aux(b), bam_get_l_aux(b)); p += bam_get_l_aux(b); // set optional fields
     b->core.n_cigar = l, b->core.l_qseq = j; // update CIGAR length and query length
-    b->data_len = p - b->data; // update record length
+    b->l_data = p - b->data; // update record length
     return 0;
 
 rmB_err:
diff --git a/samtools/bam.c.pysam.c b/samtools/bam.c.pysam.c
index 4c41e23..2f40ca6 100644
--- a/samtools/bam.c.pysam.c
+++ b/samtools/bam.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam.c -- BAM format.
 
-    Copyright (C) 2008-2013, 2015, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2013, 2015, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -127,21 +127,21 @@ int bam_remove_B(bam1_t *b)
     uint8_t *seq, *qual, *p;
     // test if removal is necessary
     if (b->core.flag & BAM_FUNMAP) return 0; // unmapped; do nothing
-    cigar = bam1_cigar(b);
+    cigar = bam_get_cigar(b);
     for (k = 0; k < b->core.n_cigar; ++k)
         if (bam_cigar_op(cigar[k]) == BAM_CBACK) break;
     if (k == b->core.n_cigar) return 0; // no 'B'
     if (bam_cigar_op(cigar[0]) == BAM_CBACK) goto rmB_err; // cannot be removed
     // allocate memory for the new CIGAR
-    if (b->data_len + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory
-        b->m_data = b->data_len + b->core.n_cigar * 4;
+    if (b->l_data + (b->core.n_cigar + 1) * 4 > b->m_data) { // not enough memory
+        b->m_data = b->l_data + b->core.n_cigar * 4;
         kroundup32(b->m_data);
         b->data = (uint8_t*)realloc(b->data, b->m_data);
-        cigar = bam1_cigar(b); // after realloc, cigar may be changed
+        cigar = bam_get_cigar(b); // after realloc, cigar may be changed
     }
     new_cigar = (uint32_t*)(b->data + (b->m_data - b->core.n_cigar * 4)); // from the end of b->data
     // the core loop
-    seq = bam1_seq(b); qual = bam1_qual(b);
+    seq = bam_get_seq(b); qual = bam_get_qual(b);
     no_qual = (qual[0] == 0xff); // test whether base quality is available
     i = j = 0; end_j = -1;
     for (k = l = 0; k < b->core.n_cigar; ++k) {
@@ -170,9 +170,9 @@ int bam_remove_B(bam1_t *b)
                 if (i != j) { // no need to copy if i == j
                     int u, c, c0;
                     for (u = 0; u < len; ++u) { // construct the consensus
-                        c = bam1_seqi(seq, i+u);
+                        c = bam_seqi(seq, i+u);
                         if (j + u < end_j) { // in an overlap
-                            c0 = bam1_seqi(seq, j+u);
+                            c0 = bam_seqi(seq, j+u);
                             if (c != c0) { // a mismatch; choose the better base
                                 if (qual[j+u] < qual[i+u]) { // the base in the 2nd segment is better
                                     bam1_seq_seti(seq, j+u, c);
@@ -204,9 +204,9 @@ int bam_remove_B(bam1_t *b)
     p = b->data + b->core.l_qname + l * 4;
     memmove(p, seq, (j+1)>>1); p += (j+1)>>1; // set SEQ
     memmove(p, qual, j); p += j; // set QUAL
-    memmove(p, bam1_aux(b), bam_get_l_aux(b)); p += bam_get_l_aux(b); // set optional fields
+    memmove(p, bam_get_aux(b), bam_get_l_aux(b)); p += bam_get_l_aux(b); // set optional fields
     b->core.n_cigar = l, b->core.l_qseq = j; // update CIGAR length and query length
-    b->data_len = p - b->data; // update record length
+    b->l_data = p - b->data; // update record length
     return 0;
 
 rmB_err:
diff --git a/samtools/bam.h b/samtools/bam.h
index 8c9d33a..804d590 100644
--- a/samtools/bam.h
+++ b/samtools/bam.h
@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE.  */
   @copyright Genome Research Ltd.
  */
 
-#define BAM_VERSION "1.10"
+#define BAM_VERSION "1.13"
 
 #include <stdint.h>
 #include <stdlib.h>
@@ -77,7 +77,7 @@ typedef bam_hdr_t bam_header_t;
 #define BAM_OFHEX          1
 #define BAM_OFSTR          2
 
-/*! @abstract defautl mask for pileup */
+/*! @abstract default mask for pileup */
 #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
 
 /*! @typedef
diff --git a/samtools/bam2bcf_indel.c b/samtools/bam2bcf_indel.c
index 104d108..17dedf0 100644
--- a/samtools/bam2bcf_indel.c
+++ b/samtools/bam2bcf_indel.c
@@ -408,6 +408,10 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf
                 { // do realignment; this is the bottleneck
                     const uint8_t *qual = bam_get_qual(p->b), *bq;
                     uint8_t *qq;
+                    if (qend < qbeg) {
+                        fprintf(stderr, "Impossible data in bcf_call_gap_prep\n");
+                        exit(1);
+                    }
                     qq = calloc(qend - qbeg, 1);
                     bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
                     if (bq) ++bq; // skip type
diff --git a/samtools/bam2bcf_indel.c.pysam.c b/samtools/bam2bcf_indel.c.pysam.c
index 583f99d..6706298 100644
--- a/samtools/bam2bcf_indel.c.pysam.c
+++ b/samtools/bam2bcf_indel.c.pysam.c
@@ -410,6 +410,10 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf
                 { // do realignment; this is the bottleneck
                     const uint8_t *qual = bam_get_qual(p->b), *bq;
                     uint8_t *qq;
+                    if (qend < qbeg) {
+                        fprintf(samtools_stderr, "Impossible data in bcf_call_gap_prep\n");
+                        samtools_exit(1);
+                    }
                     qq = calloc(qend - qbeg, 1);
                     bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
                     if (bq) ++bq; // skip type
diff --git a/samtools/bam2depth.c b/samtools/bam2depth.c
index 4b537c7..5253dfa 100644
--- a/samtools/bam2depth.c
+++ b/samtools/bam2depth.c
@@ -1,9 +1,11 @@
 /*  bam2depth.c -- depth subcommand.
 
     Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2012-2016, 2018, 2019 Genome Research Ltd.
+    Copyright (C) 2012-2016, 2018, 2019-2021 Genome Research Ltd.
+
+    Author: Heng Li <lh3@sanger.ac.uk> (to 2020)
+    Author: James Bonfield <jkb@sanger.ac.uk> (2021 rewrite)
 
-    Author: Heng Li <lh3@sanger.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +26,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
 /* This program demonstrates how to generate pileup from multiple BAMs
- * simutaneously, to achieve random access and to use the BED interface.
+ * simultaneously, to achieve random access and to use the BED interface.
  * To compile this program separately, you may:
  *
  *   gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
@@ -41,355 +43,913 @@ DEALINGS IN THE SOFTWARE.  */
 #include "samtools.h"
 #include "bedidx.h"
 #include "sam_opts.h"
+#include "htslib/khash.h"
 
-#define BAM_FMAX ((BAM_FSUPPLEMENTARY << 1) - 1)
+// From bam_plcmd.c
+int read_file_list(const char *file_list, int *n, char **argv[]);
 
-typedef struct {     // auxiliary data structure
-    samFile *fp;     // the file handle
-    sam_hdr_t *hdr;  // the file header
-    hts_itr_t *iter; // NULL if a region not specified
-    int min_mapQ, min_len; // mapQ filter; length filter
-    uint32_t flags;  // read filtering flags
-} aux_t;
+// We accumulate to hist[pos & (size-1)].  This is a ring-buffer.
+// We track where we last got to in output and what the biggest value
+// we've written to so far (in absolute unmasked coordinates) in
+// "last_output" and "end_pos" respectively.
+// For each new record we just flush anything we haven't written yet
+// already, between "last_output" and this read's start position, and
+// initialise any newly seen positions between "end_pos" and this read's
+// end position.
+typedef struct {
+    size_t size;
+    int **hist;         // hist[nfiles][size]
+    hts_pos_t *end_pos; // end_pos[nfiles]
+    hts_pos_t last_output;
+    int last_ref;
+    int nfiles;
+    const char *ref;
+    kstring_t ks;
+    hts_pos_t beg, end; // limit to region
+    int tid;
+} depth_hist;
 
-// This function reads a BAM alignment from one BAM file.
-static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup
-{
-    aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
-    int ret;
-    while (1)
-    {
-        ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->hdr, b);
-        if ( ret<0 ) break;
-        if ( b->core.flag & aux->flags) continue;
-        if ( (int)b->core.qual < aux->min_mapQ ) continue;
-        if ( aux->min_len && bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)) < aux->min_len ) continue;
-        break;
+typedef struct {
+    int header;
+    int flag;
+    int min_qual;
+    int min_mqual;
+    int min_len;
+    int skip_del;
+    int all_pos;
+    int remove_overlaps;
+    FILE *out;
+    char *reg;
+    void *bed;
+} depth_opt;
+
+static void zero_region(depth_opt *opt, depth_hist *dh,
+                        const char *name, hts_pos_t start, hts_pos_t end) {
+    hts_pos_t i;
+    kstring_t *ks = &dh->ks;
+
+    kputs(name, ks_clear(ks));
+    kputc('\t', ks);
+    size_t cur_l = ks->l;
+    if (dh->beg >= 0 && start < dh->beg)
+        start = dh->beg;
+    if (dh->end >= 0 && end > dh->end)
+        end = dh->end;
+
+    for (i = start; i < end; i++) {
+        // Could be optimised, but needs better API to skip to next
+        // bed region.
+        if (opt->bed && bed_overlap(opt->bed, name, i, i+1) == 0)
+            continue;
+
+        ks->l = cur_l;
+        kputll(i+1,  ks);
+        int n;
+        for (n = 0; n < dh->nfiles; n++) {
+            kputc_('\t', ks);
+            kputc_('0',  ks);
+        }
+        kputc('\n',  ks);
+        fputs(ks->s, opt->out);
     }
-    return ret;
+    ks->l = cur_l;
 }
 
-int read_file_list(const char *file_list,int *n,char **argv[]);
-
-static int usage() {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
-    fprintf(stderr, "Options:\n");
-    fprintf(stderr, "   -a                  output all positions (including zero depth)\n");
-    fprintf(stderr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
-    fprintf(stderr, "   -b <bed>            list of positions or regions\n");
-    fprintf(stderr, "   -X                  use customized index files\n");
-    fprintf(stderr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
-    fprintf(stderr, "   -H                  print a file header\n");
-    fprintf(stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>) [0]\n");
-    fprintf(stderr, "   -d/-m <int>         maximum coverage depth [8000]. If 0, depth is set to the maximum\n"
-                    "                       integer value, effectively removing any depth limit.\n");  // the htslib's default
-    fprintf(stderr, "   -o FILE             where to write output to [stdout]\n");
-    fprintf(stderr, "   -q <int>            base quality threshold [0]\n");
-    fprintf(stderr, "   -Q <int>            mapping quality threshold [0]\n");
-    fprintf(stderr, "   -r <chr:from-to>    region\n");
-    fprintf(stderr, "   -g <flags>          include reads that have any of the specified flags set [0]\n");
-    fprintf(stderr, "   -G <flags>          filter out reads that have any of the specified flags set"
-                    "                       [UNMAP,SECONDARY,QCFAIL,DUP]\n");
-
-    sam_global_opt_help(stderr, "-.--.--.");
-
-    fprintf(stderr, "\n");
-    fprintf(stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
-    fprintf(stderr, "position, and coverage depth.  Note that positions with zero coverage may be\n");
-    fprintf(stderr, "omitted by default; see the -a option.\n");
-    fprintf(stderr, "\n");
-
-    return EXIT_FAILURE;
+// A variation of bam_cigar2qlen which doesn't count soft-clips in to the
+// equation.  Basically it's the number of bases in query that are aligned
+// in some way to the reference (including insertions, which are considered
+// to be aligned by dint of being anchored either side).
+hts_pos_t qlen_used(bam1_t *b) {
+    int n_cigar = b->core.n_cigar;
+    const uint32_t *cigar = bam_get_cigar(b);
+
+    hts_pos_t l;
+
+    if (b->core.l_qseq) {
+        // Known SEQ permits of short cut of l_qseq minus CSOFT_CLIPs.
+        // Full scan not needed, which helps on excessively long CIGARs.
+        l = b->core.l_qseq;
+        int kl, kr;
+        for (kl = 0; kl < n_cigar; kl++)
+            if (bam_cigar_op(cigar[kl]) == BAM_CSOFT_CLIP)
+                l -= bam_cigar_oplen(cigar[kl]);
+            else
+                break;
+
+        for (kr = n_cigar-1; kr > kl; kr--)
+            if (bam_cigar_op(cigar[kr]) == BAM_CSOFT_CLIP)
+                l -= bam_cigar_oplen(cigar[kr]);
+            else
+                break;
+    } else {
+        // Unknown SEQ ("*") needs a full scan through the CIGAR string.
+        static int query[16] = {
+          //M I D N  S H P =  X B ? ?  ? ? ? ?
+            1,1,0,0, 0,0,0,1, 1,0,0,0, 0,0,0,0
+        };
+        int k;
+        for (k = l = 0; k < n_cigar; k++)
+            if (query[bam_cigar_op(cigar[k])])
+                l += bam_cigar_oplen(cigar[k]);
+    }
+    return l;
+
 }
 
-int main_depth(int argc, char *argv[])
-{
-    int i, n, tid, reg_tid, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, has_index_file = 0;
-    hts_pos_t beg, end, pos, last_pos = -1;
-    int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1;
-    const bam_pileup1_t **plp;
-    char *reg = 0; // specified region
-    void *bed = 0; // BED data structure
-    char *file_list = NULL, **fn = NULL;
-    sam_hdr_t *h = NULL; // BAM header of the 1st input
-    aux_t **data;
-    bam_mplp_t mplp;
-    int last_tid = -1, ret;
-    int print_header = 0;
-    char *output_file = NULL;
-    FILE *file_out = stdout;
-    uint32_t flags = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);
-    int tflags = 0;
+// Adds the depth for a single read to a depth_hist struct.
+// For just one file, this is easy.  We just have a circular buffer
+// where we increment values for bits that overlap existing data
+// and initialise values for coordinates which we're seeing for the first
+// time.  This is tracked by "end_pos" to know where we've got to.
+//
+// As the input is sorted, we can flush output from "last_output" to
+// b->core.pos.
+//
+// With multiple files, we must feed data in sorted order as if all files
+// are merged, but track depth per file.  This also means "end_pos" is per
+// file too, but "last_output" is global as it corresponds to rows printed.
+static int add_depth(depth_opt *opt, depth_hist *dh, sam_hdr_t *h, bam1_t *b,
+                     int overlap_clip, int file) {
+    hts_pos_t i;
+    size_t hmask = dh->size-1;
+    int n;
 
-    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
-        { NULL, 0, NULL, 0 }
-    };
+    if (!b || b->core.tid != dh->last_ref) {
+        // New ref
+        if (dh->last_ref >= 0) {
+            // do end
+            size_t cur_l = dh->ks.l;
+            int nf = dh->nfiles;
+            i = dh->last_output;
+            for (i = dh->last_output; nf; i++) {
+                nf = 0;
+                for (n = 0; n < dh->nfiles; n++) {
+                    if (i < dh->end_pos[n])
+                        nf++;
+                }
+                if (!nf)
+                    break;
+
+                if (opt->bed && bed_overlap(opt->bed, dh->ref, i, i+1) == 0)
+                    continue;
 
-    // parse the command line
-    while ((n = getopt_long(argc, argv, "r:b:Xq:Q:l:f:am:d:Ho:g:G:", lopts, NULL)) >= 0) {
-        switch (n) {
-            case 'l': min_len = atoi(optarg); break; // minimum query length
-            case 'r': reg = strdup(optarg); break;   // parsing a region requires a BAM header
-            case 'b':
-                bed = bed_read(optarg); // BED or position list file can be parsed now
-                if (!bed) {
-                    print_error_errno("depth", "Could not read file \"%s\"", optarg);
-                    return EXIT_FAILURE;
+                dh->ks.l = cur_l;
+                kputll(i+1, &dh->ks);
+                for (n = 0; n < dh->nfiles; n++) {
+                    kputc_('\t', &dh->ks);
+                    int d = i < dh->end_pos[n]
+                        ? dh->hist[n][i & hmask]
+                        : 0;
+                    kputuw(d, &dh->ks);
                 }
-                break;
-            case 'X': has_index_file = 1; break;
-            case 'q': baseQ = atoi(optarg); break;   // base quality threshold
-            case 'Q': mapQ = atoi(optarg); break;    // mapping quality threshold
-            case 'f': file_list = optarg; break;
-            case 'a': all++; break;
-            case 'd': case 'm': max_depth = atoi(optarg); break; // maximum coverage depth
-            case 'H': print_header = 1; break;
-            case 'o': output_file = optarg; break;
-            case 'g':
-                tflags = bam_str2flag(optarg);
-                if (tflags < 0 || tflags > BAM_FMAX) {
-                    print_error_errno("depth", "Flag value \"%s\" is not supported", optarg);
-                    return 1;
+                kputc('\n', &dh->ks);
+                fputs(dh->ks.s, opt->out);
+            }
+            if (opt->all_pos) {
+                // End of last ref
+                zero_region(opt, dh,
+                            sam_hdr_tid2name(h, dh->last_ref),
+                            i, sam_hdr_tid2len(h, dh->last_ref));
+            }
+            dh->ks.l = cur_l;
+        }
+
+        if (opt->all_pos > 1 && !opt->reg) {
+            // Any previous unused refs
+            int lr = dh->last_ref < 0 ? 0 : dh->last_ref+1;
+            int rr = b ? b->core.tid : sam_hdr_nref(h), r;
+            for (r = lr; r < rr; r++)
+                zero_region(opt, dh,
+                            sam_hdr_tid2name(h, r),
+                            0, sam_hdr_tid2len(h, r));
+        }
+
+        if (!b) {
+            // we're just flushing to end of file
+            if (opt->all_pos && opt->reg && dh->last_ref < 0)
+                // -a or -aa without a single read being output yet
+                zero_region(opt, dh, sam_hdr_tid2name(h, dh->tid), dh->beg,
+                            MIN(dh->end, sam_hdr_tid2len(h, dh->tid)));
+
+            return 0;
+        }
+
+        for (n = 0; dh->end_pos && n < dh->nfiles; n++)
+            dh->end_pos[n] = 0;
+        dh->last_output = dh->beg >= 0
+            ? MAX(b->core.pos, dh->beg)
+            : b->core.pos;
+        dh->last_ref = b->core.tid;
+        dh->ref = sam_hdr_tid2name(h, b->core.tid);
+        kputs(dh->ref, ks_clear(&dh->ks));
+        kputc('\t', &dh->ks);
+
+        if (opt->all_pos)
+            // Start of ref
+            zero_region(opt, dh, dh->ref, 0, b->core.pos);
+    } else {
+        if (dh->last_output < b->core.pos) {
+            // Flush any depth outputs up to start of new read
+            size_t cur_l = dh->ks.l;
+            int nf = dh->nfiles;
+            for (i = dh->last_output; i < b->core.pos; i++) {
+                nf = 0;
+                for (n = 0; n < dh->nfiles; n++) {
+                    if (i < dh->end_pos[n])
+                        nf++;
                 }
-                flags &= ~tflags;
-                break;
-            case 'G':
-                tflags = bam_str2flag(optarg);
-                if (tflags < 0 || tflags > BAM_FMAX) {
-                    print_error_errno("depth", "Flag value \"%s\" is not supported", optarg);
-                    return 1;
+                if (!nf)
+                    break;
+
+                if (opt->bed && bed_overlap(opt->bed, dh->ref, i, i+1) == 0)
+                    continue;
+
+                dh->ks.l = cur_l;
+                kputll(i+1, &dh->ks);
+                for (n = 0; n < dh->nfiles; n++) {
+                    kputc_('\t', &dh->ks);
+                    int d = i < dh->end_pos[n]
+                        ? dh->hist[n][i & hmask]
+                        : 0;
+                    kputuw(d, &dh->ks);
                 }
-                flags |= tflags;
-                break;
-            default:  if (parse_sam_global_opt(n, optarg, lopts, &ga) == 0) break;
-                      /* else fall-through */
-            case '?': return usage();
+                kputc('\n', &dh->ks);
+                fputs(dh->ks.s, opt->out);
+            }
+            if (opt->all_pos && i < b->core.pos)
+                // Hole in middle of ref
+                zero_region(opt, dh, dh->ref, i, b->core.pos);
+
+            dh->ks.l = cur_l;
+            dh->last_output = b->core.pos;
         }
     }
-    if (optind == argc && !file_list)
-        return usage();
-
-    /* output file provided by user */
-    if (output_file != NULL && strcmp(output_file,"-")!=0) {
-        file_out = fopen( output_file, "w" );
-        if (file_out == NULL) {
-            print_error_errno("depth", "Cannot open \"%s\" for writing.", output_file);
-            return EXIT_FAILURE;
-        }
+
+    hts_pos_t end_pos = bam_endpos(b); // 0 based, 1 past end.
+    //printf("%d %d\n", (int)b->core.pos+1, (int)end_pos);
+
+    if (b->core.tid < dh->last_ref ||
+        (dh->last_ref == b->core.tid && end_pos < dh->last_output)) {
+        print_error_errno("depth", "Data is not position sorted");
+        return -1;
     }
 
+    // If needed, grow the circular buffer.
+    if (end_pos+1 - b->core.pos >= dh->size) {
+        size_t old_size = dh->size;
+        size_t old_hmask = hmask;
+        while (end_pos+1 - b->core.pos >= dh->size)
+            dh->size = dh->size ? 2*dh->size : 2048;
+        hmask = dh->size-1;
+        if (!dh->hist) {
+            dh->hist = calloc(dh->nfiles, sizeof(*dh->hist));
+            dh->end_pos = calloc(dh->nfiles, sizeof(*dh->end_pos));
+            if (!dh->hist || !dh->end_pos)
+                return -1;
+        }
+        for (n = 0; n < dh->nfiles; n++) {
+            int *hist = calloc(dh->size, sizeof(*dh->hist[n]));
+            if (!hist)
+                return -1;
 
-    // initialize the auxiliary data structures
-    if (file_list)
-    {
-        if (has_index_file) {
-            print_error("depth", "The -f option cannot be combined with -X");
-            return 1;
+            // Simple approach for now; copy over old histogram verbatim.
+            for (i = dh->last_output; i < dh->last_output + old_size; i++)
+                hist[i & hmask] = dh->hist[n][i & old_hmask];
+            free(dh->hist[n]);
+            dh->hist[n] = hist;
         }
-        if ( read_file_list(file_list,&nfiles,&fn) ) return EXIT_FAILURE;
-        n = nfiles;
-        argv = fn;
-        optind = 0;
     }
-    else if (has_index_file) { // Calculate # of input BAM files
-        if ((argc - optind) % 2 != 0) {
-            fprintf(stderr, "Error: Odd number of filenames detected! Each BAM file should have an index file\n");
-            return 1;
-        }
-        n = (argc - optind) / 2;
+
+    // Accumulate depth, based on CIGAR
+    uint32_t *cig = bam_get_cigar(b);
+    int ncig = b->core.n_cigar, j, k, spos = 0;
+
+    // Zero new (previously unseen) coordinates so increment works later.
+    hts_pos_t end = MAX(dh->end_pos[file], b->core.pos);
+    if (end_pos > end && (end & hmask) < (end_pos & hmask)) {
+        memset(&dh->hist[file][end & hmask], 0,
+               sizeof(**dh->hist) * (end_pos - end));
     } else {
-        n = argc - optind;
+        for (i = end; i < end_pos; i++)
+            dh->hist[file][i & hmask] = 0;
     }
-    data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
-    reg_tid = 0; beg = 0; end = HTS_POS_MAX;  // set the default region
-
-    for (i = 0; i < n; ++i) {
-        int rf;
-        data[i] = calloc(1, sizeof(aux_t));
-        data[i]->fp = sam_open_format(argv[optind+i], "r", &ga.in); // open BAM
-        if (data[i]->fp == NULL) {
-            print_error_errno("depth", "Could not open \"%s\"", argv[optind+i]);
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ;
-        if (baseQ) rf |= SAM_QUAL;
-        if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-            print_error_errno("depth", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            print_error_errno("depth", "Failed to set CRAM_OPT_DECODE_MD value");
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        data[i]->min_mapQ = mapQ;                    // set the mapQ filter
-        data[i]->min_len  = min_len;                 // set the qlen filter
-        data[i]->hdr = sam_hdr_read(data[i]->fp);    // read the BAM header
-        if (data[i]->hdr == NULL) {
-            print_error_errno("depth", "Couldn't read header for \"%s\"",
-                              argv[optind+i]);
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        if (reg) { // if a region is specified
-            hts_idx_t *idx = NULL;
-            // If index filename has not been specfied, look in BAM folder
-            if (has_index_file) {
-                idx = sam_index_load2(data[i]->fp, argv[optind+i], argv[optind+i+n]);  // load the index
+
+    i = b->core.pos;
+    uint8_t *qual = bam_get_qual(b);
+    int min_qual = opt->min_qual;
+    for (j = 0; j < ncig; j++) {
+        int op    = bam_cigar_op(cig[j]);
+        int oplen = bam_cigar_oplen(cig[j]);
+
+        switch (op) {
+        case BAM_CDEL:
+        case BAM_CREF_SKIP:
+            if (op != BAM_CDEL || opt->skip_del) {
+                // don't increment reference location
+                if (i + oplen >= dh->end_pos[file]) {
+                    for (k = 0; k < oplen; k++, i++) {
+                        if (i >= dh->end_pos[file])
+                            // redundant due to zero new elements above?
+                            dh->hist[file][i & hmask] = 0;
+                    }
+                } else {
+                    i += oplen;
+                }
+            } else { // op == BAM_CDEL and we count them (-J option),
+                // We don't incr spos here, but we still use qual.
+                // This doesn't make much sense, but it's for compatibility
+                // with the old code.  Arguably DEL shouldn't have a min
+                // qual and should always pass (as we've explicitly asked to
+                // include them).
+                int *hist = dh->hist[file];
+                k = 0;
+                if (overlap_clip) {
+                    if (i+oplen < overlap_clip) {
+                        i += oplen;
+                        break;
+                    } else if (i < overlap_clip) {
+                        k = overlap_clip - i;
+                        i = overlap_clip;
+                    }
+                }
+
+                // Question: should we even check quality values for DEL?
+                // We've explicitly asked to include them, and the quality
+                // is wrong anyway (it's the neighbouring base).  We do this
+                // for now for compatibility with the old depth command.
+
+                if (spos < b->core.l_qseq)
+                    for (; k < oplen; k++, i++)
+                        hist[i & hmask]+=qual[spos]>=min_qual;
+                else
+                    for (; k < oplen; k++, i++)
+                        hist[i & hmask]++;
+            }
+            break;
+
+        case BAM_CMATCH:
+        case BAM_CEQUAL:
+        case BAM_CDIFF:
+            if ((i & hmask) < ((i+oplen) & hmask)) {
+                // Optimisation when not wrapping around
+
+                // Unrolling doesn't help clang, but helps gcc,
+                // especially when not using -O3.
+                int *hist = &dh->hist[file][i & hmask];
+                if (min_qual || overlap_clip) {
+                    k = 0;
+                    if (overlap_clip) {
+                        if (i+oplen < overlap_clip) {
+                            i += oplen;
+                            spos += oplen;
+                            break;
+                        } else if (i < overlap_clip) {
+                            oplen -= overlap_clip - i;
+                            spos += overlap_clip - i;
+                            hist += overlap_clip - i;
+                            i = overlap_clip;
+                        }
+                    }
+
+                    // approx 50% of this func cpu time in this loop
+                    for (; k < (oplen & ~7); k+=8) {
+                        hist[k+0]+=qual[spos+0]>=min_qual;
+                        hist[k+1]+=qual[spos+1]>=min_qual;
+                        hist[k+2]+=qual[spos+2]>=min_qual;
+                        hist[k+3]+=qual[spos+3]>=min_qual;
+                        hist[k+4]+=qual[spos+4]>=min_qual;
+                        hist[k+5]+=qual[spos+5]>=min_qual;
+                        hist[k+6]+=qual[spos+6]>=min_qual;
+                        hist[k+7]+=qual[spos+7]>=min_qual;
+                        spos += 8;
+                    }
+                } else {
+                    // easier to vectorize when no min_qual
+                    for (k = 0; k < (oplen & ~7); k+=8) {
+                        hist[k+0]++;
+                        hist[k+1]++;
+                        hist[k+2]++;
+                        hist[k+3]++;
+                        hist[k+4]++;
+                        hist[k+5]++;
+                        hist[k+6]++;
+                        hist[k+7]++;
+                    }
+                    spos += k;
+                }
+                for (; k < oplen && spos < b->core.l_qseq; k++, spos++)
+                    hist[k]+=qual[spos]>=min_qual;
+                for (; k < oplen; k++, spos++)
+                    hist[k]++;
+                i += oplen;
             } else {
-                idx = sam_index_load(data[i]->fp, argv[optind+i]);
+                // Simple to understand case, but slower.
+                // We use this only for reads with wrap-around.
+                int *hist = dh->hist[file];
+                k = 0;
+                if (overlap_clip) {
+                    if (i+oplen < overlap_clip) {
+                        i += oplen;
+                        break;
+                    } else if (i < overlap_clip) {
+                        oplen -= overlap_clip - i;
+                        spos += overlap_clip - i;
+                        i = overlap_clip;
+                    }
+                }
+                for (; k < oplen && spos < b->core.l_qseq; k++, i++, spos++)
+                    hist[i & hmask]+=qual[spos]>=min_qual;
+                for (; k < oplen; k++, i++, spos++)
+                    hist[i & hmask]++;
             }
-            if (idx == NULL) {
-                print_error("depth", "can't load index for \"%s\"", argv[optind+i]);
-                status = EXIT_FAILURE;
-                goto depth_end;
+            break;
+
+        case BAM_CINS:
+        case BAM_CSOFT_CLIP:
+            spos += oplen;
+            break;
+
+        case BAM_CPAD:
+        case BAM_CHARD_CLIP:
+            // ignore
+            break;
+
+        default:
+            print_error("depth", "Unsupported cigar op '%d'", op);
+            return -1;
+        }
+    }
+
+    if (dh->end >= 0 && end_pos > dh->end)
+        end_pos = dh->end;
+    if (dh->end_pos[file] < end_pos)
+        dh->end_pos[file] = end_pos;
+
+    return 0;
+}
+
+// Hash on name -> alignment end pos. This permits a naive overlap removal.
+// Note it cannot analyse the overlapping sequence and qualities, so the
+// interaction of basecalls/qualities and the -Q parameter cannot be
+// applied here (unlike the full mpileup algorithm).
+KHASH_MAP_INIT_STR(olap_hash, hts_pos_t)
+typedef khash_t(olap_hash) olap_hash_t;
+
+static int fastdepth_core(depth_opt *opt, uint32_t nfiles, char **fn,
+                          samFile **fp, hts_itr_t **itr, sam_hdr_t **h) {
+    int ret = -1, err = 1, i;
+    olap_hash_t **overlaps = NULL;
+    depth_hist dh = {0};
+
+    // An array of bam structs, one per input file, to hold the next entry
+    bam1_t **b = calloc(nfiles, sizeof(*b));
+    int *finished = calloc(nfiles, sizeof(*finished)), to_go = nfiles;
+    if (!b || !finished)
+        goto err;
+
+    for (i = 0; i < nfiles; i++)
+        if (!(b[i] = bam_init1()))
+            goto err;
+
+    // Do we need one overlap hash per file? Or shared?
+    if (opt->remove_overlaps) {
+        if (!(overlaps = calloc(nfiles, sizeof(*overlaps))))
+            return -1;
+        for (i = 0; i < nfiles; i++) {
+            if (!(overlaps[i] = kh_init(olap_hash)))
+                return -1;
+        }
+    }
+
+    // Create the initial histogram
+    dh.nfiles = nfiles;
+    dh.size = 0;
+    dh.hist = NULL;
+    dh.last_ref = -99;
+    dh.end_pos = NULL;
+    dh.last_output = itr && itr[0] ? itr[0]->beg : 0;
+    ks_initialize(&dh.ks);
+
+    // Clip results to region if specified
+    dh.beg = -1;
+    dh.end = -1;
+    dh.tid = 0;
+    if (itr && itr[0]) {
+        dh.tid = itr[0]->tid;
+        dh.beg = itr[0]->beg;
+        dh.end = itr[0]->end;
+    }
+
+    if (opt->header) {
+        fprintf(opt->out, "#CHROM\tPOS");
+        for (i = 0; i < nfiles; i++)
+            fprintf(opt->out, "\t%s", fn[i]);
+        fputc('\n', opt->out);
+    }
+
+    // Populate first record per file
+    for (i = 0; i < nfiles; i++) {
+        for(;;) {
+            ret = itr && itr[i]
+                ? sam_itr_next(fp[i], itr[i], b[i])
+                : sam_read1(fp[i], h[i], b[i]);
+            if (ret < -1)
+                goto err;
+            if (ret == -1) {
+                to_go--;
+                finished[i] = 1;
+                break;
             }
-            data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator
-            hts_idx_destroy(idx); // the index is not needed any more; free the memory
-            if (data[i]->iter == NULL) {
-                print_error("depth", "can't parse region \"%s\"", reg);
-                status = EXIT_FAILURE;
-                goto depth_end;
+
+            if (b[i]->core.tid < 0)
+                continue;
+            if (b[i]->core.flag & opt->flag)
+                continue;
+            if (b[i]->core.qual < opt->min_mqual)
+                continue;
+
+            // Original samtools depth used the total sequence (l_qseq)
+            // including soft-clips.  This doesn't feel like a useful metric
+            // to be filtering on.  We now only count sequence bases that
+            // form the used part of the alignment.
+            if (opt->min_len) {
+                if (qlen_used(b[i]) < opt->min_len)
+                    continue;
             }
+
+            break;
         }
-        data[i]->flags = flags;
     }
-    if (print_header) {
-        fputs("#CHROM\tPOS", file_out);
-        for (i = 0; i < n; ++i) {
-            fputc('\t', file_out);
-            fputs(argv[optind+i], file_out);
+
+    // Loop through input files, merging in order so we're
+    // always adding the next record in sequence
+    while (to_go) {
+        // Find next record in file list
+        int best_tid = INT_MAX, best_file = 0;
+        hts_pos_t best_pos = HTS_POS_MAX;
+
+        for (i = 0; i < nfiles; i++) {
+            if (finished[i])
+                continue;
+            if (best_tid > b[i]->core.tid) {
+                best_tid = b[i]->core.tid;
+                best_pos = b[i]->core.pos;
+                best_file = i;
+            } else if (best_tid == b[i]->core.tid &&
+                       best_pos > b[i]->core.pos) {
+                best_pos = b[i]->core.pos;
+                best_file = i;
             }
-        fputc('\n', file_out);
         }
-    h = data[0]->hdr; // easy access to the header of the 1st BAM
-    if (reg) {
-        beg = data[0]->iter->beg; // and to the parsed region coordinates
-        end = data[0]->iter->end;
-        reg_tid = data[0]->iter->tid;
-    }
+        i = best_file;
 
-    // the core multi-pileup loop
-    mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
-    if (0 < max_depth)
-        bam_mplp_set_maxcnt(mplp,max_depth);  // set maximum coverage depth
-    else if (!max_depth)
-        bam_mplp_set_maxcnt(mplp,INT_MAX);
-    n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
-    plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
-    while ((ret=bam_mplp64_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
-        if (pos < beg || pos >= end) continue; // out of range; skip
-        if (tid >= sam_hdr_nref(h)) continue;     // diff number of @SQ lines per file?
-        if (all) {
-            while (tid > last_tid) {
-                if (last_tid >= 0 && !reg) {
-                    // Deal with remainder or entirety of last tid.
-                    while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
-                        // Horribly inefficient, but the bed API is an obfuscated black box.
-                        if (bed && bed_overlap(bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
-                            continue;
-                        fputs(sam_hdr_tid2name(h, last_tid), file_out);
-                        fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                        for (i = 0; i < n; i++)
-                            fputc('\t', file_out), fputc('0', file_out);
-                        fputc('\n', file_out);
-                    }
+        hts_pos_t clip = 0;
+        if (overlaps && (b[i]->core.flag & BAM_FPAIRED) &&
+            !(b[i]->core.flag & BAM_FMUNMAP)) {
+            khiter_t k = kh_get(olap_hash, overlaps[i], bam_get_qname(b[i]));
+            if (k == kh_end(overlaps[i])) {
+                // not seen before
+                hts_pos_t endpos = bam_endpos(b[i]);
+
+                // Don't add if mate location is known and can't overlap.
+                if (b[i]->core.mpos == -1 ||
+                    (b[i]->core.tid == b[i]->core.mtid &&
+                     b[i]->core.mpos <= endpos)) {
+                    k = kh_put(olap_hash, overlaps[i], bam_get_qname(b[i]),
+                               &ret);
+                    if (ret < 0)
+                        return -1;
+                    kh_key(overlaps[i], k) = strdup(bam_get_qname(b[i]));
+                    kh_value(overlaps[i], k) = endpos;
                 }
-                last_tid++;
-                last_pos = -1;
-                if (all < 2)
-                    break;
+            } else {
+                // seen before
+                clip = kh_value(overlaps[i], k);
+                free((char *)kh_key(overlaps[i], k));
+                kh_del(olap_hash, overlaps[i], k);
             }
+        }
 
-            // Deal with missing portion of current tid
-            while (++last_pos < pos) {
-                if (last_pos < beg) continue; // out of range; skip
-                if (bed && bed_overlap(bed, sam_hdr_tid2name(h, tid), last_pos, last_pos + 1) == 0)
-                    continue;
-                fputs(sam_hdr_tid2name(h, tid), file_out);
-                fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                for (i = 0; i < n; i++)
-                    fputc('\t', file_out), fputc('0', file_out);
-                fputc('\n', file_out);
+        // Add the next merged BAM record to the depth plot
+        if ((ret = add_depth(opt, &dh, h[i], b[i], clip, i)) < 0) {
+            ret = -1;
+            goto err;
+        }
+
+        // Populate next record from this file
+        for(;!finished[i];) {
+            ret = itr && itr[i]
+                ? sam_itr_next(fp[i], itr[i], b[i])
+                : sam_read1(fp[i], h[i], b[i]);
+            if (ret < -1) {
+                ret = -1;
+                goto err;
+            }
+            if (ret == -1) {
+                to_go--;
+                finished[i] = 1;
+                break;
             }
 
-            last_tid = tid;
-            last_pos = pos;
-        }
-        if (bed && bed_overlap(bed, sam_hdr_tid2name(h, tid), pos, pos + 1) == 0) continue;
-        fputs(sam_hdr_tid2name(h, tid), file_out);
-        fprintf(file_out, "\t%"PRIhts_pos, pos+1); // a customized printf() would be faster
-        for (i = 0; i < n; ++i) { // base level filters have to go here
-            int j, m = 0;
-            for (j = 0; j < n_plp[i]; ++j) {
-                const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
-                if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
-                else if (p->qpos < p->b->core.l_qseq &&
-                         bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
+            if (b[i]->core.tid < 0)
+                continue;
+            if (b[i]->core.flag & opt->flag)
+                continue;
+            if (b[i]->core.qual < opt->min_mqual)
+                continue;
+
+            if (opt->min_len) {
+                if (qlen_used(b[i]) < opt->min_len)
+                    continue;
             }
-            fprintf(file_out, "\t%d", n_plp[i] - m); // this the depth to output
+
+            break;
         }
-        fputc('\n', file_out);
     }
-    if (ret < 0) status = EXIT_FAILURE;
-    free(n_plp); free(plp);
-    bam_mplp_destroy(mplp);
-
-    if (all) {
-        // Handle terminating region
-        if (last_tid < 0 && reg) {
-            last_tid = reg_tid;
-            last_pos = beg-1;
+
+    // Tidy up end.
+    ret = add_depth(opt, &dh, h[0], NULL, 0, 0);
+    err = 0;
+
+ err:
+    if (ret == 0 && err)
+        ret = -1;
+
+    for (i = 0; i < nfiles; i++) {
+        if (b[i])
+            bam_destroy1(b[i]);
+        if (dh.hist && dh.hist[i])
+            free(dh.hist[i]);
+    }
+    free(b);
+    free(finished);
+    ks_free(&dh.ks);
+    free(dh.hist);
+    free(dh.end_pos);
+    if (overlaps) {
+        khiter_t k;
+        for (i = 0; i < nfiles; i++) {
+            if (!overlaps[i])
+                continue;
+            for (k = kh_begin(overlaps[i]); k < kh_end(overlaps[i]); k++)
+                if (kh_exist(overlaps[i], k))
+                    free((char *)kh_key(overlaps[i], k));
+            kh_destroy(olap_hash, overlaps[i]);
         }
-        while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
-            while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
-                if (last_pos >= end) break;
-                if (bed && bed_overlap(bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
-                    continue;
-                fputs(sam_hdr_tid2name(h, last_tid), file_out);
-                fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                for (i = 0; i < n; i++)
-                    fputc('\t', file_out), fputc('0', file_out);
-                fputc('\n', file_out);
+        free(overlaps);
+    }
+
+    return ret;
+}
+
+static void usage_exit(FILE *fp, int exit_status)
+{
+    fprintf(fp, "Usage: samtools depth [options] in.bam [in.bam ...]\n");
+    fprintf(fp, "\nOptions:\n");
+    fprintf(fp, "  -a           Output all positions (including zero depth)\n");
+    fprintf(fp, "  -a -a, -aa   Output absolutely all positions, including unused ref seqs\n");
+    fprintf(fp, "  -r REG       Specify a region in chr or chr:from-to syntax\n");
+    fprintf(fp, "  -b FILE      Use bed FILE for list of regions\n");
+    fprintf(fp, "  -f FILE      Specify list of input BAM/SAM/CRAM filenames\n");
+    fprintf(fp, "  -X           Use custom index files (in -X *.bam *.bam.bai order)\n");
+    fprintf(fp, "  -g INT       Remove specified flags from default flag filter\n");
+    fprintf(fp, "  -G INT       Add specified flags to the default flag filter\n");
+    fprintf(fp, "  -H           Print a file header line\n");
+    fprintf(fp, "  -l INT       Minimum read length [0]\n");
+    fprintf(fp, "  -o FILE      Write output to FILE [stdout]\n");
+    fprintf(fp, "  -q INT       Minimum base quality [0]\n");
+    fprintf(fp, "  -Q INT       Minimum mapping quality [0]\n");
+    fprintf(fp, "  -H           Print a file header\n");
+    fprintf(fp, "  -J           Include reads with deletions in depth computation\n");
+    fprintf(fp, "  -s           Do not count overlapping reads within a template\n");
+    sam_global_opt_help(fp, "-.---@-.");
+    exit(exit_status);
+}
+
+int main_depth(int argc, char *argv[])
+{
+    int nfiles, i;
+    samFile **fp;
+    sam_hdr_t **header;
+    int c, has_index_file = 0;
+    char *file_list = NULL, **fn = NULL;
+    depth_opt opt = {
+        .flag = BAM_FUNMAP | BAM_FSECONDARY | BAM_FDUP | BAM_FQCFAIL,
+        .min_qual = 0,
+        .min_mqual = 0,
+        .skip_del = 1,
+        .header = 0,
+        .min_len = 0,
+        .out = stdout,
+        .all_pos = 0,
+        .remove_overlaps = 0,
+        .reg = NULL,
+        .bed = NULL,
+    };
+
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', '-', '@'),
+        {NULL, 0, NULL, 0}
+    };
+
+    while ((c = getopt_long(argc, argv, "@:q:Q:JHd:m:l:g:G:o:ar:Xf:b:s",
+                            lopts, NULL)) >= 0) {
+        switch (c) {
+        case 'a':
+            opt.all_pos++;
+            break;
+
+        case 'b':
+            opt.bed = bed_read(optarg);
+            if (!opt.bed) {
+                print_error_errno("depth", "Could not read file \"%s\"",
+                                  optarg);
+                return 1;
             }
-            last_tid++;
-            last_pos = -1;
-            if (all < 2 || reg)
+            break;
+
+        case 'f':
+            file_list = optarg;
+            break;
+
+        case 'd':
+        case 'm':
+            // depth limit - now ignored
+            break;
+
+        case 'g':
+            opt.flag &= ~bam_str2flag(optarg);
+            break;
+        case 'G':
+            opt.flag |= bam_str2flag(optarg);
+            break;
+
+        case 'l':
+            opt.min_len = atoi(optarg);
+            break;
+
+        case 'H':
+            opt.header = 1;
+            break;
+
+        case 'q':
+            opt.min_qual = atoi(optarg);
+            break;
+        case 'Q':
+            opt.min_mqual = atoi(optarg);
+            break;
+
+        case 'J':
+            opt.skip_del = 0;
+            break;
+
+        case 'o':
+            if (opt.out != stdout)
                 break;
+            opt.out = fopen(optarg, "w");
+            if (!opt.out) {
+                print_error_errno("depth", "Cannot open \"%s\" for writing.",
+                                  optarg);
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'r':
+            opt.reg = optarg;
+            break;
+
+        case 's':
+            opt.remove_overlaps = 1;
+            break;
+
+        case 'X':
+            has_index_file = 1;
+            break;
+
+        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+            /* else fall-through */
+        case '?':
+            usage_exit(stderr, EXIT_FAILURE);
         }
     }
 
-depth_end:
-    if (((file_out != stdout)? fclose(file_out) : fflush(file_out)) != 0) {
-        if (status == EXIT_SUCCESS) {
-            if (file_out != stdout)
-                print_error_errno("depth", "error on closing \"%s\"", output_file);
-            else
-                print_error_errno("depth", "error on flushing standard output");
-            status = EXIT_FAILURE;
+    if (argc < optind+1 && !file_list) {
+        if (argc == optind)
+            usage_exit(stdout, EXIT_SUCCESS);
+        else
+            usage_exit(stderr, EXIT_FAILURE);
+    }
+
+    if (file_list) {
+        if (has_index_file) {
+            print_error("depth", "The -f option cannot be combined with -X");
+            return 1;
+        }
+        if (read_file_list(file_list, &nfiles, &fn))
+            return 1;
+        argv = fn;
+        argc = nfiles;
+        optind = 0;
+    } else {
+        nfiles = argc - optind;
+    }
+
+    if (has_index_file) {
+        if (nfiles%1) {
+            print_error("depth", "-X needs one index specified per bam file");
+            return 1;
         }
+        nfiles /= 2;
+    }
+    fp = malloc(nfiles * sizeof(*fp));
+    header = malloc(nfiles * sizeof(*header));
+    if (!fp || !header) {
+        print_error_errno("depth", "Out of memory");
+        return 1;
     }
 
-    for (i = 0; i < n && data[i]; ++i) {
-        sam_hdr_destroy(data[i]->hdr);
-        if (data[i]->fp) sam_close(data[i]->fp);
-        hts_itr_destroy(data[i]->iter);
-        free(data[i]);
+    hts_itr_t **itr = NULL;
+    if (opt.reg) {
+        itr = calloc(nfiles, sizeof(*itr));
+        if (!itr)
+            return 1;
     }
-    free(data); free(reg);
-    if (bed) bed_destroy(bed);
-    if ( file_list )
-    {
-        for (i=0; i<n; i++) free(fn[i]);
+
+    for (i = 0; i < nfiles; i++, optind++) {
+        fp[i] = sam_open_format(argv[optind], "r", &ga.in);
+        if (fp[i] == NULL) {
+            print_error_errno("depth",
+                              "Cannot open input file \"%s\"", argv[optind]);
+            return 1;
+        }
+
+        if (ga.nthreads > 0)
+            hts_set_threads(fp[i], ga.nthreads);
+
+        if (hts_set_opt(fp[i], CRAM_OPT_REQUIRED_FIELDS,
+                        SAM_FLAG | SAM_RNAME | SAM_POS | SAM_CIGAR
+                        | (opt.remove_overlaps ? SAM_QNAME|SAM_RNEXT|SAM_PNEXT
+                                               : 0)
+                        | (opt.min_mqual       ? SAM_MAPQ  : 0)
+                        | (opt.min_len         ? SAM_SEQ   : 0)
+                        | (opt.min_qual        ? SAM_QUAL  : 0))) {
+            fprintf(stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+            return 1;
+        }
+
+        if (hts_set_opt(fp[i], CRAM_OPT_DECODE_MD, 0)) {
+            fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            return 1;
+        }
+
+        // FIXME: what if headers differ?
+        header[i] = sam_hdr_read(fp[i]);
+        if (header == NULL) {
+            fprintf(stderr, "Failed to read header for \"%s\"\n",
+                    argv[optind]);
+            return 1;
+        }
+
+        if (opt.reg) {
+            hts_idx_t *idx = has_index_file
+                ? sam_index_load2(fp[i], argv[optind], argv[optind+nfiles])
+                : sam_index_load(fp[i], argv[optind]);
+            if (!idx) {
+                print_error("depth", "cannot load index for \"%s\"",
+                            argv[optind]);
+                return 1;
+            }
+            if (!(itr[i] = sam_itr_querys(idx, header[i], opt.reg))) {
+                print_error("depth", "cannot parse region \"%s\"", opt.reg);
+                return 1;
+            }
+            hts_idx_destroy(idx);
+        }
+    }
+
+    int ret = fastdepth_core(&opt, nfiles, &argv[argc-nfiles], fp, itr, header)
+        ? 1 : 0;
+
+    for (i = 0; i < nfiles; i++) {
+        sam_hdr_destroy(header[i]);
+        sam_close(fp[i]);
+        if (itr && itr[i])
+            hts_itr_destroy(itr[i]);
+    }
+    free(header);
+    free(fp);
+    free(itr);
+    if (file_list) {
+        for (i=0; i<nfiles; i++)
+            free(fn[i]);
         free(fn);
     }
+    if (opt.bed)
+        bed_destroy(opt.bed);
     sam_global_args_free(&ga);
-    return status;
+    if (opt.out != stdout) fclose(opt.out);
+    return ret;
 }
 
 #ifdef _MAIN_BAM2DEPTH
diff --git a/samtools/bam2depth.c.pysam.c b/samtools/bam2depth.c.pysam.c
index dbd095a..8b36457 100644
--- a/samtools/bam2depth.c.pysam.c
+++ b/samtools/bam2depth.c.pysam.c
@@ -3,9 +3,11 @@
 /*  bam2depth.c -- depth subcommand.
 
     Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2012-2016, 2018, 2019 Genome Research Ltd.
+    Copyright (C) 2012-2016, 2018, 2019-2021 Genome Research Ltd.
+
+    Author: Heng Li <lh3@sanger.ac.uk> (to 2020)
+    Author: James Bonfield <jkb@sanger.ac.uk> (2021 rewrite)
 
-    Author: Heng Li <lh3@sanger.ac.uk>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +28,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
 /* This program demonstrates how to generate pileup from multiple BAMs
- * simutaneously, to achieve random access and to use the BED interface.
+ * simultaneously, to achieve random access and to use the BED interface.
  * To compile this program separately, you may:
  *
  *   gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
@@ -43,355 +45,913 @@ DEALINGS IN THE SOFTWARE.  */
 #include "samtools.h"
 #include "bedidx.h"
 #include "sam_opts.h"
+#include "htslib/khash.h"
 
-#define BAM_FMAX ((BAM_FSUPPLEMENTARY << 1) - 1)
+// From bam_plcmd.c
+int read_file_list(const char *file_list, int *n, char **argv[]);
 
-typedef struct {     // auxiliary data structure
-    samFile *fp;     // the file handle
-    sam_hdr_t *hdr;  // the file header
-    hts_itr_t *iter; // NULL if a region not specified
-    int min_mapQ, min_len; // mapQ filter; length filter
-    uint32_t flags;  // read filtering flags
-} aux_t;
+// We accumulate to hist[pos & (size-1)].  This is a ring-buffer.
+// We track where we last got to in output and what the biggest value
+// we've written to so far (in absolute unmasked coordinates) in
+// "last_output" and "end_pos" respectively.
+// For each new record we just flush anything we haven't written yet
+// already, between "last_output" and this read's start position, and
+// initialise any newly seen positions between "end_pos" and this read's
+// end position.
+typedef struct {
+    size_t size;
+    int **hist;         // hist[nfiles][size]
+    hts_pos_t *end_pos; // end_pos[nfiles]
+    hts_pos_t last_output;
+    int last_ref;
+    int nfiles;
+    const char *ref;
+    kstring_t ks;
+    hts_pos_t beg, end; // limit to region
+    int tid;
+} depth_hist;
 
-// This function reads a BAM alignment from one BAM file.
-static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup
-{
-    aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
-    int ret;
-    while (1)
-    {
-        ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->hdr, b);
-        if ( ret<0 ) break;
-        if ( b->core.flag & aux->flags) continue;
-        if ( (int)b->core.qual < aux->min_mapQ ) continue;
-        if ( aux->min_len && bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)) < aux->min_len ) continue;
-        break;
+typedef struct {
+    int header;
+    int flag;
+    int min_qual;
+    int min_mqual;
+    int min_len;
+    int skip_del;
+    int all_pos;
+    int remove_overlaps;
+    FILE *out;
+    char *reg;
+    void *bed;
+} depth_opt;
+
+static void zero_region(depth_opt *opt, depth_hist *dh,
+                        const char *name, hts_pos_t start, hts_pos_t end) {
+    hts_pos_t i;
+    kstring_t *ks = &dh->ks;
+
+    kputs(name, ks_clear(ks));
+    kputc('\t', ks);
+    size_t cur_l = ks->l;
+    if (dh->beg >= 0 && start < dh->beg)
+        start = dh->beg;
+    if (dh->end >= 0 && end > dh->end)
+        end = dh->end;
+
+    for (i = start; i < end; i++) {
+        // Could be optimised, but needs better API to skip to next
+        // bed region.
+        if (opt->bed && bed_overlap(opt->bed, name, i, i+1) == 0)
+            continue;
+
+        ks->l = cur_l;
+        kputll(i+1,  ks);
+        int n;
+        for (n = 0; n < dh->nfiles; n++) {
+            kputc_('\t', ks);
+            kputc_('0',  ks);
+        }
+        kputc('\n',  ks);
+        fputs(ks->s, opt->out);
     }
-    return ret;
+    ks->l = cur_l;
 }
 
-int read_file_list(const char *file_list,int *n,char **argv[]);
-
-static int usage() {
-    fprintf(samtools_stderr, "\n");
-    fprintf(samtools_stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
-    fprintf(samtools_stderr, "Options:\n");
-    fprintf(samtools_stderr, "   -a                  output all positions (including zero depth)\n");
-    fprintf(samtools_stderr, "   -a -a (or -aa)      output absolutely all positions, including unused ref. sequences\n");
-    fprintf(samtools_stderr, "   -b <bed>            list of positions or regions\n");
-    fprintf(samtools_stderr, "   -X                  use customized index files\n");
-    fprintf(samtools_stderr, "   -f <list>           list of input BAM filenames, one per line [null]\n");
-    fprintf(samtools_stderr, "   -H                  print a file header\n");
-    fprintf(samtools_stderr, "   -l <int>            read length threshold (ignore reads shorter than <int>) [0]\n");
-    fprintf(samtools_stderr, "   -d/-m <int>         maximum coverage depth [8000]. If 0, depth is set to the maximum\n"
-                    "                       integer value, effectively removing any depth limit.\n");  // the htslib's default
-    fprintf(samtools_stderr, "   -o FILE             where to write output to [samtools_stdout]\n");
-    fprintf(samtools_stderr, "   -q <int>            base quality threshold [0]\n");
-    fprintf(samtools_stderr, "   -Q <int>            mapping quality threshold [0]\n");
-    fprintf(samtools_stderr, "   -r <chr:from-to>    region\n");
-    fprintf(samtools_stderr, "   -g <flags>          include reads that have any of the specified flags set [0]\n");
-    fprintf(samtools_stderr, "   -G <flags>          filter out reads that have any of the specified flags set"
-                    "                       [UNMAP,SECONDARY,QCFAIL,DUP]\n");
-
-    sam_global_opt_help(samtools_stderr, "-.--.--.");
-
-    fprintf(samtools_stderr, "\n");
-    fprintf(samtools_stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
-    fprintf(samtools_stderr, "position, and coverage depth.  Note that positions with zero coverage may be\n");
-    fprintf(samtools_stderr, "omitted by default; see the -a option.\n");
-    fprintf(samtools_stderr, "\n");
-
-    return EXIT_FAILURE;
+// A variation of bam_cigar2qlen which doesn't count soft-clips in to the
+// equation.  Basically it's the number of bases in query that are aligned
+// in some way to the reference (including insertions, which are considered
+// to be aligned by dint of being anchored either side).
+hts_pos_t qlen_used(bam1_t *b) {
+    int n_cigar = b->core.n_cigar;
+    const uint32_t *cigar = bam_get_cigar(b);
+
+    hts_pos_t l;
+
+    if (b->core.l_qseq) {
+        // Known SEQ permits of short cut of l_qseq minus CSOFT_CLIPs.
+        // Full scan not needed, which helps on excessively long CIGARs.
+        l = b->core.l_qseq;
+        int kl, kr;
+        for (kl = 0; kl < n_cigar; kl++)
+            if (bam_cigar_op(cigar[kl]) == BAM_CSOFT_CLIP)
+                l -= bam_cigar_oplen(cigar[kl]);
+            else
+                break;
+
+        for (kr = n_cigar-1; kr > kl; kr--)
+            if (bam_cigar_op(cigar[kr]) == BAM_CSOFT_CLIP)
+                l -= bam_cigar_oplen(cigar[kr]);
+            else
+                break;
+    } else {
+        // Unknown SEQ ("*") needs a full scan through the CIGAR string.
+        static int query[16] = {
+          //M I D N  S H P =  X B ? ?  ? ? ? ?
+            1,1,0,0, 0,0,0,1, 1,0,0,0, 0,0,0,0
+        };
+        int k;
+        for (k = l = 0; k < n_cigar; k++)
+            if (query[bam_cigar_op(cigar[k])])
+                l += bam_cigar_oplen(cigar[k]);
+    }
+    return l;
+
 }
 
-int main_depth(int argc, char *argv[])
-{
-    int i, n, tid, reg_tid, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, has_index_file = 0;
-    hts_pos_t beg, end, pos, last_pos = -1;
-    int all = 0, status = EXIT_SUCCESS, nfiles, max_depth = -1;
-    const bam_pileup1_t **plp;
-    char *reg = 0; // specified region
-    void *bed = 0; // BED data structure
-    char *file_list = NULL, **fn = NULL;
-    sam_hdr_t *h = NULL; // BAM header of the 1st input
-    aux_t **data;
-    bam_mplp_t mplp;
-    int last_tid = -1, ret;
-    int print_header = 0;
-    char *output_file = NULL;
-    FILE *file_out = samtools_stdout;
-    uint32_t flags = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);
-    int tflags = 0;
+// Adds the depth for a single read to a depth_hist struct.
+// For just one file, this is easy.  We just have a circular buffer
+// where we increment values for bits that overlap existing data
+// and initialise values for coordinates which we're seeing for the first
+// time.  This is tracked by "end_pos" to know where we've got to.
+//
+// As the input is sorted, we can flush output from "last_output" to
+// b->core.pos.
+//
+// With multiple files, we must feed data in sorted order as if all files
+// are merged, but track depth per file.  This also means "end_pos" is per
+// file too, but "last_output" is global as it corresponds to rows printed.
+static int add_depth(depth_opt *opt, depth_hist *dh, sam_hdr_t *h, bam1_t *b,
+                     int overlap_clip, int file) {
+    hts_pos_t i;
+    size_t hmask = dh->size-1;
+    int n;
 
-    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    static const struct option lopts[] = {
-        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
-        { NULL, 0, NULL, 0 }
-    };
+    if (!b || b->core.tid != dh->last_ref) {
+        // New ref
+        if (dh->last_ref >= 0) {
+            // do end
+            size_t cur_l = dh->ks.l;
+            int nf = dh->nfiles;
+            i = dh->last_output;
+            for (i = dh->last_output; nf; i++) {
+                nf = 0;
+                for (n = 0; n < dh->nfiles; n++) {
+                    if (i < dh->end_pos[n])
+                        nf++;
+                }
+                if (!nf)
+                    break;
+
+                if (opt->bed && bed_overlap(opt->bed, dh->ref, i, i+1) == 0)
+                    continue;
 
-    // parse the command line
-    while ((n = getopt_long(argc, argv, "r:b:Xq:Q:l:f:am:d:Ho:g:G:", lopts, NULL)) >= 0) {
-        switch (n) {
-            case 'l': min_len = atoi(optarg); break; // minimum query length
-            case 'r': reg = strdup(optarg); break;   // parsing a region requires a BAM header
-            case 'b':
-                bed = bed_read(optarg); // BED or position list file can be parsed now
-                if (!bed) {
-                    print_error_errno("depth", "Could not read file \"%s\"", optarg);
-                    return EXIT_FAILURE;
+                dh->ks.l = cur_l;
+                kputll(i+1, &dh->ks);
+                for (n = 0; n < dh->nfiles; n++) {
+                    kputc_('\t', &dh->ks);
+                    int d = i < dh->end_pos[n]
+                        ? dh->hist[n][i & hmask]
+                        : 0;
+                    kputuw(d, &dh->ks);
                 }
-                break;
-            case 'X': has_index_file = 1; break;
-            case 'q': baseQ = atoi(optarg); break;   // base quality threshold
-            case 'Q': mapQ = atoi(optarg); break;    // mapping quality threshold
-            case 'f': file_list = optarg; break;
-            case 'a': all++; break;
-            case 'd': case 'm': max_depth = atoi(optarg); break; // maximum coverage depth
-            case 'H': print_header = 1; break;
-            case 'o': output_file = optarg; break;
-            case 'g':
-                tflags = bam_str2flag(optarg);
-                if (tflags < 0 || tflags > BAM_FMAX) {
-                    print_error_errno("depth", "Flag value \"%s\" is not supported", optarg);
-                    return 1;
+                kputc('\n', &dh->ks);
+                fputs(dh->ks.s, opt->out);
+            }
+            if (opt->all_pos) {
+                // End of last ref
+                zero_region(opt, dh,
+                            sam_hdr_tid2name(h, dh->last_ref),
+                            i, sam_hdr_tid2len(h, dh->last_ref));
+            }
+            dh->ks.l = cur_l;
+        }
+
+        if (opt->all_pos > 1 && !opt->reg) {
+            // Any previous unused refs
+            int lr = dh->last_ref < 0 ? 0 : dh->last_ref+1;
+            int rr = b ? b->core.tid : sam_hdr_nref(h), r;
+            for (r = lr; r < rr; r++)
+                zero_region(opt, dh,
+                            sam_hdr_tid2name(h, r),
+                            0, sam_hdr_tid2len(h, r));
+        }
+
+        if (!b) {
+            // we're just flushing to end of file
+            if (opt->all_pos && opt->reg && dh->last_ref < 0)
+                // -a or -aa without a single read being output yet
+                zero_region(opt, dh, sam_hdr_tid2name(h, dh->tid), dh->beg,
+                            MIN(dh->end, sam_hdr_tid2len(h, dh->tid)));
+
+            return 0;
+        }
+
+        for (n = 0; dh->end_pos && n < dh->nfiles; n++)
+            dh->end_pos[n] = 0;
+        dh->last_output = dh->beg >= 0
+            ? MAX(b->core.pos, dh->beg)
+            : b->core.pos;
+        dh->last_ref = b->core.tid;
+        dh->ref = sam_hdr_tid2name(h, b->core.tid);
+        kputs(dh->ref, ks_clear(&dh->ks));
+        kputc('\t', &dh->ks);
+
+        if (opt->all_pos)
+            // Start of ref
+            zero_region(opt, dh, dh->ref, 0, b->core.pos);
+    } else {
+        if (dh->last_output < b->core.pos) {
+            // Flush any depth outputs up to start of new read
+            size_t cur_l = dh->ks.l;
+            int nf = dh->nfiles;
+            for (i = dh->last_output; i < b->core.pos; i++) {
+                nf = 0;
+                for (n = 0; n < dh->nfiles; n++) {
+                    if (i < dh->end_pos[n])
+                        nf++;
                 }
-                flags &= ~tflags;
-                break;
-            case 'G':
-                tflags = bam_str2flag(optarg);
-                if (tflags < 0 || tflags > BAM_FMAX) {
-                    print_error_errno("depth", "Flag value \"%s\" is not supported", optarg);
-                    return 1;
+                if (!nf)
+                    break;
+
+                if (opt->bed && bed_overlap(opt->bed, dh->ref, i, i+1) == 0)
+                    continue;
+
+                dh->ks.l = cur_l;
+                kputll(i+1, &dh->ks);
+                for (n = 0; n < dh->nfiles; n++) {
+                    kputc_('\t', &dh->ks);
+                    int d = i < dh->end_pos[n]
+                        ? dh->hist[n][i & hmask]
+                        : 0;
+                    kputuw(d, &dh->ks);
                 }
-                flags |= tflags;
-                break;
-            default:  if (parse_sam_global_opt(n, optarg, lopts, &ga) == 0) break;
-                      /* else fall-through */
-            case '?': return usage();
+                kputc('\n', &dh->ks);
+                fputs(dh->ks.s, opt->out);
+            }
+            if (opt->all_pos && i < b->core.pos)
+                // Hole in middle of ref
+                zero_region(opt, dh, dh->ref, i, b->core.pos);
+
+            dh->ks.l = cur_l;
+            dh->last_output = b->core.pos;
         }
     }
-    if (optind == argc && !file_list)
-        return usage();
-
-    /* output file provided by user */
-    if (output_file != NULL && strcmp(output_file,"-")!=0) {
-        file_out = fopen( output_file, "w" );
-        if (file_out == NULL) {
-            print_error_errno("depth", "Cannot open \"%s\" for writing.", output_file);
-            return EXIT_FAILURE;
-        }
+
+    hts_pos_t end_pos = bam_endpos(b); // 0 based, 1 past end.
+    //printf("%d %d\n", (int)b->core.pos+1, (int)end_pos);
+
+    if (b->core.tid < dh->last_ref ||
+        (dh->last_ref == b->core.tid && end_pos < dh->last_output)) {
+        print_error_errno("depth", "Data is not position sorted");
+        return -1;
     }
 
+    // If needed, grow the circular buffer.
+    if (end_pos+1 - b->core.pos >= dh->size) {
+        size_t old_size = dh->size;
+        size_t old_hmask = hmask;
+        while (end_pos+1 - b->core.pos >= dh->size)
+            dh->size = dh->size ? 2*dh->size : 2048;
+        hmask = dh->size-1;
+        if (!dh->hist) {
+            dh->hist = calloc(dh->nfiles, sizeof(*dh->hist));
+            dh->end_pos = calloc(dh->nfiles, sizeof(*dh->end_pos));
+            if (!dh->hist || !dh->end_pos)
+                return -1;
+        }
+        for (n = 0; n < dh->nfiles; n++) {
+            int *hist = calloc(dh->size, sizeof(*dh->hist[n]));
+            if (!hist)
+                return -1;
 
-    // initialize the auxiliary data structures
-    if (file_list)
-    {
-        if (has_index_file) {
-            print_error("depth", "The -f option cannot be combined with -X");
-            return 1;
+            // Simple approach for now; copy over old histogram verbatim.
+            for (i = dh->last_output; i < dh->last_output + old_size; i++)
+                hist[i & hmask] = dh->hist[n][i & old_hmask];
+            free(dh->hist[n]);
+            dh->hist[n] = hist;
         }
-        if ( read_file_list(file_list,&nfiles,&fn) ) return EXIT_FAILURE;
-        n = nfiles;
-        argv = fn;
-        optind = 0;
     }
-    else if (has_index_file) { // Calculate # of input BAM files
-        if ((argc - optind) % 2 != 0) {
-            fprintf(samtools_stderr, "Error: Odd number of filenames detected! Each BAM file should have an index file\n");
-            return 1;
-        }
-        n = (argc - optind) / 2;
+
+    // Accumulate depth, based on CIGAR
+    uint32_t *cig = bam_get_cigar(b);
+    int ncig = b->core.n_cigar, j, k, spos = 0;
+
+    // Zero new (previously unseen) coordinates so increment works later.
+    hts_pos_t end = MAX(dh->end_pos[file], b->core.pos);
+    if (end_pos > end && (end & hmask) < (end_pos & hmask)) {
+        memset(&dh->hist[file][end & hmask], 0,
+               sizeof(**dh->hist) * (end_pos - end));
     } else {
-        n = argc - optind;
+        for (i = end; i < end_pos; i++)
+            dh->hist[file][i & hmask] = 0;
     }
-    data = calloc(n, sizeof(aux_t*)); // data[i] for the i-th input
-    reg_tid = 0; beg = 0; end = HTS_POS_MAX;  // set the default region
-
-    for (i = 0; i < n; ++i) {
-        int rf;
-        data[i] = calloc(1, sizeof(aux_t));
-        data[i]->fp = sam_open_format(argv[optind+i], "r", &ga.in); // open BAM
-        if (data[i]->fp == NULL) {
-            print_error_errno("depth", "Could not open \"%s\"", argv[optind+i]);
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ;
-        if (baseQ) rf |= SAM_QUAL;
-        if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-            print_error_errno("depth", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            print_error_errno("depth", "Failed to set CRAM_OPT_DECODE_MD value");
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        data[i]->min_mapQ = mapQ;                    // set the mapQ filter
-        data[i]->min_len  = min_len;                 // set the qlen filter
-        data[i]->hdr = sam_hdr_read(data[i]->fp);    // read the BAM header
-        if (data[i]->hdr == NULL) {
-            print_error_errno("depth", "Couldn't read header for \"%s\"",
-                              argv[optind+i]);
-            status = EXIT_FAILURE;
-            goto depth_end;
-        }
-        if (reg) { // if a region is specified
-            hts_idx_t *idx = NULL;
-            // If index filename has not been specfied, look in BAM folder
-            if (has_index_file) {
-                idx = sam_index_load2(data[i]->fp, argv[optind+i], argv[optind+i+n]);  // load the index
+
+    i = b->core.pos;
+    uint8_t *qual = bam_get_qual(b);
+    int min_qual = opt->min_qual;
+    for (j = 0; j < ncig; j++) {
+        int op    = bam_cigar_op(cig[j]);
+        int oplen = bam_cigar_oplen(cig[j]);
+
+        switch (op) {
+        case BAM_CDEL:
+        case BAM_CREF_SKIP:
+            if (op != BAM_CDEL || opt->skip_del) {
+                // don't increment reference location
+                if (i + oplen >= dh->end_pos[file]) {
+                    for (k = 0; k < oplen; k++, i++) {
+                        if (i >= dh->end_pos[file])
+                            // redundant due to zero new elements above?
+                            dh->hist[file][i & hmask] = 0;
+                    }
+                } else {
+                    i += oplen;
+                }
+            } else { // op == BAM_CDEL and we count them (-J option),
+                // We don't incr spos here, but we still use qual.
+                // This doesn't make much sense, but it's for compatibility
+                // with the old code.  Arguably DEL shouldn't have a min
+                // qual and should always pass (as we've explicitly asked to
+                // include them).
+                int *hist = dh->hist[file];
+                k = 0;
+                if (overlap_clip) {
+                    if (i+oplen < overlap_clip) {
+                        i += oplen;
+                        break;
+                    } else if (i < overlap_clip) {
+                        k = overlap_clip - i;
+                        i = overlap_clip;
+                    }
+                }
+
+                // Question: should we even check quality values for DEL?
+                // We've explicitly asked to include them, and the quality
+                // is wrong anyway (it's the neighbouring base).  We do this
+                // for now for compatibility with the old depth command.
+
+                if (spos < b->core.l_qseq)
+                    for (; k < oplen; k++, i++)
+                        hist[i & hmask]+=qual[spos]>=min_qual;
+                else
+                    for (; k < oplen; k++, i++)
+                        hist[i & hmask]++;
+            }
+            break;
+
+        case BAM_CMATCH:
+        case BAM_CEQUAL:
+        case BAM_CDIFF:
+            if ((i & hmask) < ((i+oplen) & hmask)) {
+                // Optimisation when not wrapping around
+
+                // Unrolling doesn't help clang, but helps gcc,
+                // especially when not using -O3.
+                int *hist = &dh->hist[file][i & hmask];
+                if (min_qual || overlap_clip) {
+                    k = 0;
+                    if (overlap_clip) {
+                        if (i+oplen < overlap_clip) {
+                            i += oplen;
+                            spos += oplen;
+                            break;
+                        } else if (i < overlap_clip) {
+                            oplen -= overlap_clip - i;
+                            spos += overlap_clip - i;
+                            hist += overlap_clip - i;
+                            i = overlap_clip;
+                        }
+                    }
+
+                    // approx 50% of this func cpu time in this loop
+                    for (; k < (oplen & ~7); k+=8) {
+                        hist[k+0]+=qual[spos+0]>=min_qual;
+                        hist[k+1]+=qual[spos+1]>=min_qual;
+                        hist[k+2]+=qual[spos+2]>=min_qual;
+                        hist[k+3]+=qual[spos+3]>=min_qual;
+                        hist[k+4]+=qual[spos+4]>=min_qual;
+                        hist[k+5]+=qual[spos+5]>=min_qual;
+                        hist[k+6]+=qual[spos+6]>=min_qual;
+                        hist[k+7]+=qual[spos+7]>=min_qual;
+                        spos += 8;
+                    }
+                } else {
+                    // easier to vectorize when no min_qual
+                    for (k = 0; k < (oplen & ~7); k+=8) {
+                        hist[k+0]++;
+                        hist[k+1]++;
+                        hist[k+2]++;
+                        hist[k+3]++;
+                        hist[k+4]++;
+                        hist[k+5]++;
+                        hist[k+6]++;
+                        hist[k+7]++;
+                    }
+                    spos += k;
+                }
+                for (; k < oplen && spos < b->core.l_qseq; k++, spos++)
+                    hist[k]+=qual[spos]>=min_qual;
+                for (; k < oplen; k++, spos++)
+                    hist[k]++;
+                i += oplen;
             } else {
-                idx = sam_index_load(data[i]->fp, argv[optind+i]);
+                // Simple to understand case, but slower.
+                // We use this only for reads with wrap-around.
+                int *hist = dh->hist[file];
+                k = 0;
+                if (overlap_clip) {
+                    if (i+oplen < overlap_clip) {
+                        i += oplen;
+                        break;
+                    } else if (i < overlap_clip) {
+                        oplen -= overlap_clip - i;
+                        spos += overlap_clip - i;
+                        i = overlap_clip;
+                    }
+                }
+                for (; k < oplen && spos < b->core.l_qseq; k++, i++, spos++)
+                    hist[i & hmask]+=qual[spos]>=min_qual;
+                for (; k < oplen; k++, i++, spos++)
+                    hist[i & hmask]++;
             }
-            if (idx == NULL) {
-                print_error("depth", "can't load index for \"%s\"", argv[optind+i]);
-                status = EXIT_FAILURE;
-                goto depth_end;
+            break;
+
+        case BAM_CINS:
+        case BAM_CSOFT_CLIP:
+            spos += oplen;
+            break;
+
+        case BAM_CPAD:
+        case BAM_CHARD_CLIP:
+            // ignore
+            break;
+
+        default:
+            print_error("depth", "Unsupported cigar op '%d'", op);
+            return -1;
+        }
+    }
+
+    if (dh->end >= 0 && end_pos > dh->end)
+        end_pos = dh->end;
+    if (dh->end_pos[file] < end_pos)
+        dh->end_pos[file] = end_pos;
+
+    return 0;
+}
+
+// Hash on name -> alignment end pos. This permits a naive overlap removal.
+// Note it cannot analyse the overlapping sequence and qualities, so the
+// interaction of basecalls/qualities and the -Q parameter cannot be
+// applied here (unlike the full mpileup algorithm).
+KHASH_MAP_INIT_STR(olap_hash, hts_pos_t)
+typedef khash_t(olap_hash) olap_hash_t;
+
+static int fastdepth_core(depth_opt *opt, uint32_t nfiles, char **fn,
+                          samFile **fp, hts_itr_t **itr, sam_hdr_t **h) {
+    int ret = -1, err = 1, i;
+    olap_hash_t **overlaps = NULL;
+    depth_hist dh = {0};
+
+    // An array of bam structs, one per input file, to hold the next entry
+    bam1_t **b = calloc(nfiles, sizeof(*b));
+    int *finished = calloc(nfiles, sizeof(*finished)), to_go = nfiles;
+    if (!b || !finished)
+        goto err;
+
+    for (i = 0; i < nfiles; i++)
+        if (!(b[i] = bam_init1()))
+            goto err;
+
+    // Do we need one overlap hash per file? Or shared?
+    if (opt->remove_overlaps) {
+        if (!(overlaps = calloc(nfiles, sizeof(*overlaps))))
+            return -1;
+        for (i = 0; i < nfiles; i++) {
+            if (!(overlaps[i] = kh_init(olap_hash)))
+                return -1;
+        }
+    }
+
+    // Create the initial histogram
+    dh.nfiles = nfiles;
+    dh.size = 0;
+    dh.hist = NULL;
+    dh.last_ref = -99;
+    dh.end_pos = NULL;
+    dh.last_output = itr && itr[0] ? itr[0]->beg : 0;
+    ks_initialize(&dh.ks);
+
+    // Clip results to region if specified
+    dh.beg = -1;
+    dh.end = -1;
+    dh.tid = 0;
+    if (itr && itr[0]) {
+        dh.tid = itr[0]->tid;
+        dh.beg = itr[0]->beg;
+        dh.end = itr[0]->end;
+    }
+
+    if (opt->header) {
+        fprintf(opt->out, "#CHROM\tPOS");
+        for (i = 0; i < nfiles; i++)
+            fprintf(opt->out, "\t%s", fn[i]);
+        fputc('\n', opt->out);
+    }
+
+    // Populate first record per file
+    for (i = 0; i < nfiles; i++) {
+        for(;;) {
+            ret = itr && itr[i]
+                ? sam_itr_next(fp[i], itr[i], b[i])
+                : sam_read1(fp[i], h[i], b[i]);
+            if (ret < -1)
+                goto err;
+            if (ret == -1) {
+                to_go--;
+                finished[i] = 1;
+                break;
             }
-            data[i]->iter = sam_itr_querys(idx, data[i]->hdr, reg); // set the iterator
-            hts_idx_destroy(idx); // the index is not needed any more; free the memory
-            if (data[i]->iter == NULL) {
-                print_error("depth", "can't parse region \"%s\"", reg);
-                status = EXIT_FAILURE;
-                goto depth_end;
+
+            if (b[i]->core.tid < 0)
+                continue;
+            if (b[i]->core.flag & opt->flag)
+                continue;
+            if (b[i]->core.qual < opt->min_mqual)
+                continue;
+
+            // Original samtools depth used the total sequence (l_qseq)
+            // including soft-clips.  This doesn't feel like a useful metric
+            // to be filtering on.  We now only count sequence bases that
+            // form the used part of the alignment.
+            if (opt->min_len) {
+                if (qlen_used(b[i]) < opt->min_len)
+                    continue;
             }
+
+            break;
         }
-        data[i]->flags = flags;
     }
-    if (print_header) {
-        fputs("#CHROM\tPOS", file_out);
-        for (i = 0; i < n; ++i) {
-            fputc('\t', file_out);
-            fputs(argv[optind+i], file_out);
+
+    // Loop through input files, merging in order so we're
+    // always adding the next record in sequence
+    while (to_go) {
+        // Find next record in file list
+        int best_tid = INT_MAX, best_file = 0;
+        hts_pos_t best_pos = HTS_POS_MAX;
+
+        for (i = 0; i < nfiles; i++) {
+            if (finished[i])
+                continue;
+            if (best_tid > b[i]->core.tid) {
+                best_tid = b[i]->core.tid;
+                best_pos = b[i]->core.pos;
+                best_file = i;
+            } else if (best_tid == b[i]->core.tid &&
+                       best_pos > b[i]->core.pos) {
+                best_pos = b[i]->core.pos;
+                best_file = i;
             }
-        fputc('\n', file_out);
         }
-    h = data[0]->hdr; // easy access to the header of the 1st BAM
-    if (reg) {
-        beg = data[0]->iter->beg; // and to the parsed region coordinates
-        end = data[0]->iter->end;
-        reg_tid = data[0]->iter->tid;
-    }
+        i = best_file;
 
-    // the core multi-pileup loop
-    mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
-    if (0 < max_depth)
-        bam_mplp_set_maxcnt(mplp,max_depth);  // set maximum coverage depth
-    else if (!max_depth)
-        bam_mplp_set_maxcnt(mplp,INT_MAX);
-    n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
-    plp = calloc(n, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
-    while ((ret=bam_mplp64_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
-        if (pos < beg || pos >= end) continue; // out of range; skip
-        if (tid >= sam_hdr_nref(h)) continue;     // diff number of @SQ lines per file?
-        if (all) {
-            while (tid > last_tid) {
-                if (last_tid >= 0 && !reg) {
-                    // Deal with remainder or entirety of last tid.
-                    while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
-                        // Horribly inefficient, but the bed API is an obfuscated black box.
-                        if (bed && bed_overlap(bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
-                            continue;
-                        fputs(sam_hdr_tid2name(h, last_tid), file_out);
-                        fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                        for (i = 0; i < n; i++)
-                            fputc('\t', file_out), fputc('0', file_out);
-                        fputc('\n', file_out);
-                    }
+        hts_pos_t clip = 0;
+        if (overlaps && (b[i]->core.flag & BAM_FPAIRED) &&
+            !(b[i]->core.flag & BAM_FMUNMAP)) {
+            khiter_t k = kh_get(olap_hash, overlaps[i], bam_get_qname(b[i]));
+            if (k == kh_end(overlaps[i])) {
+                // not seen before
+                hts_pos_t endpos = bam_endpos(b[i]);
+
+                // Don't add if mate location is known and can't overlap.
+                if (b[i]->core.mpos == -1 ||
+                    (b[i]->core.tid == b[i]->core.mtid &&
+                     b[i]->core.mpos <= endpos)) {
+                    k = kh_put(olap_hash, overlaps[i], bam_get_qname(b[i]),
+                               &ret);
+                    if (ret < 0)
+                        return -1;
+                    kh_key(overlaps[i], k) = strdup(bam_get_qname(b[i]));
+                    kh_value(overlaps[i], k) = endpos;
                 }
-                last_tid++;
-                last_pos = -1;
-                if (all < 2)
-                    break;
+            } else {
+                // seen before
+                clip = kh_value(overlaps[i], k);
+                free((char *)kh_key(overlaps[i], k));
+                kh_del(olap_hash, overlaps[i], k);
             }
+        }
 
-            // Deal with missing portion of current tid
-            while (++last_pos < pos) {
-                if (last_pos < beg) continue; // out of range; skip
-                if (bed && bed_overlap(bed, sam_hdr_tid2name(h, tid), last_pos, last_pos + 1) == 0)
-                    continue;
-                fputs(sam_hdr_tid2name(h, tid), file_out);
-                fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                for (i = 0; i < n; i++)
-                    fputc('\t', file_out), fputc('0', file_out);
-                fputc('\n', file_out);
+        // Add the next merged BAM record to the depth plot
+        if ((ret = add_depth(opt, &dh, h[i], b[i], clip, i)) < 0) {
+            ret = -1;
+            goto err;
+        }
+
+        // Populate next record from this file
+        for(;!finished[i];) {
+            ret = itr && itr[i]
+                ? sam_itr_next(fp[i], itr[i], b[i])
+                : sam_read1(fp[i], h[i], b[i]);
+            if (ret < -1) {
+                ret = -1;
+                goto err;
+            }
+            if (ret == -1) {
+                to_go--;
+                finished[i] = 1;
+                break;
             }
 
-            last_tid = tid;
-            last_pos = pos;
-        }
-        if (bed && bed_overlap(bed, sam_hdr_tid2name(h, tid), pos, pos + 1) == 0) continue;
-        fputs(sam_hdr_tid2name(h, tid), file_out);
-        fprintf(file_out, "\t%"PRIhts_pos, pos+1); // a customized fprintf(samtools_stdout, ) would be faster
-        for (i = 0; i < n; ++i) { // base level filters have to go here
-            int j, m = 0;
-            for (j = 0; j < n_plp[i]; ++j) {
-                const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
-                if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
-                else if (p->qpos < p->b->core.l_qseq &&
-                         bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
+            if (b[i]->core.tid < 0)
+                continue;
+            if (b[i]->core.flag & opt->flag)
+                continue;
+            if (b[i]->core.qual < opt->min_mqual)
+                continue;
+
+            if (opt->min_len) {
+                if (qlen_used(b[i]) < opt->min_len)
+                    continue;
             }
-            fprintf(file_out, "\t%d", n_plp[i] - m); // this the depth to output
+
+            break;
         }
-        fputc('\n', file_out);
     }
-    if (ret < 0) status = EXIT_FAILURE;
-    free(n_plp); free(plp);
-    bam_mplp_destroy(mplp);
-
-    if (all) {
-        // Handle terminating region
-        if (last_tid < 0 && reg) {
-            last_tid = reg_tid;
-            last_pos = beg-1;
+
+    // Tidy up end.
+    ret = add_depth(opt, &dh, h[0], NULL, 0, 0);
+    err = 0;
+
+ err:
+    if (ret == 0 && err)
+        ret = -1;
+
+    for (i = 0; i < nfiles; i++) {
+        if (b[i])
+            bam_destroy1(b[i]);
+        if (dh.hist && dh.hist[i])
+            free(dh.hist[i]);
+    }
+    free(b);
+    free(finished);
+    ks_free(&dh.ks);
+    free(dh.hist);
+    free(dh.end_pos);
+    if (overlaps) {
+        khiter_t k;
+        for (i = 0; i < nfiles; i++) {
+            if (!overlaps[i])
+                continue;
+            for (k = kh_begin(overlaps[i]); k < kh_end(overlaps[i]); k++)
+                if (kh_exist(overlaps[i], k))
+                    free((char *)kh_key(overlaps[i], k));
+            kh_destroy(olap_hash, overlaps[i]);
         }
-        while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
-            while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
-                if (last_pos >= end) break;
-                if (bed && bed_overlap(bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
-                    continue;
-                fputs(sam_hdr_tid2name(h, last_tid), file_out);
-                fprintf(file_out, "\t%"PRIhts_pos, last_pos+1);
-                for (i = 0; i < n; i++)
-                    fputc('\t', file_out), fputc('0', file_out);
-                fputc('\n', file_out);
+        free(overlaps);
+    }
+
+    return ret;
+}
+
+static void usage_exit(FILE *fp, int exit_status)
+{
+    fprintf(fp, "Usage: samtools depth [options] in.bam [in.bam ...]\n");
+    fprintf(fp, "\nOptions:\n");
+    fprintf(fp, "  -a           Output all positions (including zero depth)\n");
+    fprintf(fp, "  -a -a, -aa   Output absolutely all positions, including unused ref seqs\n");
+    fprintf(fp, "  -r REG       Specify a region in chr or chr:from-to syntax\n");
+    fprintf(fp, "  -b FILE      Use bed FILE for list of regions\n");
+    fprintf(fp, "  -f FILE      Specify list of input BAM/SAM/CRAM filenames\n");
+    fprintf(fp, "  -X           Use custom index files (in -X *.bam *.bam.bai order)\n");
+    fprintf(fp, "  -g INT       Remove specified flags from default flag filter\n");
+    fprintf(fp, "  -G INT       Add specified flags to the default flag filter\n");
+    fprintf(fp, "  -H           Print a file header line\n");
+    fprintf(fp, "  -l INT       Minimum read length [0]\n");
+    fprintf(fp, "  -o FILE      Write output to FILE [samtools_stdout]\n");
+    fprintf(fp, "  -q INT       Minimum base quality [0]\n");
+    fprintf(fp, "  -Q INT       Minimum mapping quality [0]\n");
+    fprintf(fp, "  -H           Print a file header\n");
+    fprintf(fp, "  -J           Include reads with deletions in depth computation\n");
+    fprintf(fp, "  -s           Do not count overlapping reads within a template\n");
+    sam_global_opt_help(fp, "-.---@-.");
+    samtools_exit(exit_status);
+}
+
+int main_depth(int argc, char *argv[])
+{
+    int nfiles, i;
+    samFile **fp;
+    sam_hdr_t **header;
+    int c, has_index_file = 0;
+    char *file_list = NULL, **fn = NULL;
+    depth_opt opt = {
+        .flag = BAM_FUNMAP | BAM_FSECONDARY | BAM_FDUP | BAM_FQCFAIL,
+        .min_qual = 0,
+        .min_mqual = 0,
+        .skip_del = 1,
+        .header = 0,
+        .min_len = 0,
+        .out = samtools_stdout,
+        .all_pos = 0,
+        .remove_overlaps = 0,
+        .reg = NULL,
+        .bed = NULL,
+    };
+
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', '-', '@'),
+        {NULL, 0, NULL, 0}
+    };
+
+    while ((c = getopt_long(argc, argv, "@:q:Q:JHd:m:l:g:G:o:ar:Xf:b:s",
+                            lopts, NULL)) >= 0) {
+        switch (c) {
+        case 'a':
+            opt.all_pos++;
+            break;
+
+        case 'b':
+            opt.bed = bed_read(optarg);
+            if (!opt.bed) {
+                print_error_errno("depth", "Could not read file \"%s\"",
+                                  optarg);
+                return 1;
             }
-            last_tid++;
-            last_pos = -1;
-            if (all < 2 || reg)
+            break;
+
+        case 'f':
+            file_list = optarg;
+            break;
+
+        case 'd':
+        case 'm':
+            // depth limit - now ignored
+            break;
+
+        case 'g':
+            opt.flag &= ~bam_str2flag(optarg);
+            break;
+        case 'G':
+            opt.flag |= bam_str2flag(optarg);
+            break;
+
+        case 'l':
+            opt.min_len = atoi(optarg);
+            break;
+
+        case 'H':
+            opt.header = 1;
+            break;
+
+        case 'q':
+            opt.min_qual = atoi(optarg);
+            break;
+        case 'Q':
+            opt.min_mqual = atoi(optarg);
+            break;
+
+        case 'J':
+            opt.skip_del = 0;
+            break;
+
+        case 'o':
+            if (opt.out != samtools_stdout)
                 break;
+            opt.out = fopen(optarg, "w");
+            if (!opt.out) {
+                print_error_errno("depth", "Cannot open \"%s\" for writing.",
+                                  optarg);
+                return EXIT_FAILURE;
+            }
+            break;
+
+        case 'r':
+            opt.reg = optarg;
+            break;
+
+        case 's':
+            opt.remove_overlaps = 1;
+            break;
+
+        case 'X':
+            has_index_file = 1;
+            break;
+
+        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+            /* else fall-through */
+        case '?':
+            usage_exit(samtools_stderr, EXIT_FAILURE);
         }
     }
 
-depth_end:
-    if (((file_out != samtools_stdout)? fclose(file_out) : fflush(file_out)) != 0) {
-        if (status == EXIT_SUCCESS) {
-            if (file_out != samtools_stdout)
-                print_error_errno("depth", "error on closing \"%s\"", output_file);
-            else
-                print_error_errno("depth", "error on flushing standard output");
-            status = EXIT_FAILURE;
+    if (argc < optind+1 && !file_list) {
+        if (argc == optind)
+            usage_exit(samtools_stdout, EXIT_SUCCESS);
+        else
+            usage_exit(samtools_stderr, EXIT_FAILURE);
+    }
+
+    if (file_list) {
+        if (has_index_file) {
+            print_error("depth", "The -f option cannot be combined with -X");
+            return 1;
+        }
+        if (read_file_list(file_list, &nfiles, &fn))
+            return 1;
+        argv = fn;
+        argc = nfiles;
+        optind = 0;
+    } else {
+        nfiles = argc - optind;
+    }
+
+    if (has_index_file) {
+        if (nfiles%1) {
+            print_error("depth", "-X needs one index specified per bam file");
+            return 1;
         }
+        nfiles /= 2;
+    }
+    fp = malloc(nfiles * sizeof(*fp));
+    header = malloc(nfiles * sizeof(*header));
+    if (!fp || !header) {
+        print_error_errno("depth", "Out of memory");
+        return 1;
     }
 
-    for (i = 0; i < n && data[i]; ++i) {
-        sam_hdr_destroy(data[i]->hdr);
-        if (data[i]->fp) sam_close(data[i]->fp);
-        hts_itr_destroy(data[i]->iter);
-        free(data[i]);
+    hts_itr_t **itr = NULL;
+    if (opt.reg) {
+        itr = calloc(nfiles, sizeof(*itr));
+        if (!itr)
+            return 1;
     }
-    free(data); free(reg);
-    if (bed) bed_destroy(bed);
-    if ( file_list )
-    {
-        for (i=0; i<n; i++) free(fn[i]);
+
+    for (i = 0; i < nfiles; i++, optind++) {
+        fp[i] = sam_open_format(argv[optind], "r", &ga.in);
+        if (fp[i] == NULL) {
+            print_error_errno("depth",
+                              "Cannot open input file \"%s\"", argv[optind]);
+            return 1;
+        }
+
+        if (ga.nthreads > 0)
+            hts_set_threads(fp[i], ga.nthreads);
+
+        if (hts_set_opt(fp[i], CRAM_OPT_REQUIRED_FIELDS,
+                        SAM_FLAG | SAM_RNAME | SAM_POS | SAM_CIGAR
+                        | (opt.remove_overlaps ? SAM_QNAME|SAM_RNEXT|SAM_PNEXT
+                                               : 0)
+                        | (opt.min_mqual       ? SAM_MAPQ  : 0)
+                        | (opt.min_len         ? SAM_SEQ   : 0)
+                        | (opt.min_qual        ? SAM_QUAL  : 0))) {
+            fprintf(samtools_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+            return 1;
+        }
+
+        if (hts_set_opt(fp[i], CRAM_OPT_DECODE_MD, 0)) {
+            fprintf(samtools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+            return 1;
+        }
+
+        // FIXME: what if headers differ?
+        header[i] = sam_hdr_read(fp[i]);
+        if (header == NULL) {
+            fprintf(samtools_stderr, "Failed to read header for \"%s\"\n",
+                    argv[optind]);
+            return 1;
+        }
+
+        if (opt.reg) {
+            hts_idx_t *idx = has_index_file
+                ? sam_index_load2(fp[i], argv[optind], argv[optind+nfiles])
+                : sam_index_load(fp[i], argv[optind]);
+            if (!idx) {
+                print_error("depth", "cannot load index for \"%s\"",
+                            argv[optind]);
+                return 1;
+            }
+            if (!(itr[i] = sam_itr_querys(idx, header[i], opt.reg))) {
+                print_error("depth", "cannot parse region \"%s\"", opt.reg);
+                return 1;
+            }
+            hts_idx_destroy(idx);
+        }
+    }
+
+    int ret = fastdepth_core(&opt, nfiles, &argv[argc-nfiles], fp, itr, header)
+        ? 1 : 0;
+
+    for (i = 0; i < nfiles; i++) {
+        sam_hdr_destroy(header[i]);
+        sam_close(fp[i]);
+        if (itr && itr[i])
+            hts_itr_destroy(itr[i]);
+    }
+    free(header);
+    free(fp);
+    free(itr);
+    if (file_list) {
+        for (i=0; i<nfiles; i++)
+            free(fn[i]);
         free(fn);
     }
+    if (opt.bed)
+        bed_destroy(opt.bed);
     sam_global_args_free(&ga);
-    return status;
+    if (opt.out != samtools_stdout) fclose(opt.out);
+    return ret;
 }
 
 #ifdef _MAIN_BAM2DEPTH
diff --git a/samtools/bam_addrprg.c b/samtools/bam_addrprg.c
index 58c712f..06c3147 100644
--- a/samtools/bam_addrprg.c
+++ b/samtools/bam_addrprg.c
@@ -1,6 +1,6 @@
 /* bam_addrprg.c -- samtools command to add or replace readgroups.
 
-   Copyright (c) 2013, 2015-2017, 2019 Genome Research Limited.
+   Copyright (c) 2013, 2015-2017, 2019-2021 Genome Research Limited.
 
    Author: Martin O. Pollard <mp15@sanger.ac.uk>
 
@@ -51,6 +51,8 @@ struct parsed_opts {
     rg_mode mode;
     sam_global_args ga;
     htsThreadPool p;
+    int uncompressed;
+    int overwrite_hdr_rg;
 };
 
 struct state;
@@ -164,13 +166,15 @@ static char* get_rg_id(const char *line)
 static void usage(FILE *fp)
 {
     fprintf(fp,
-            "Usage: samtools addreplacerg [options] [-r <@RG line> | -R <existing id>] [-o <output.bam>] <input.bam>\n"
+            "Usage: samtools addreplacerg [options] [-r <@RG line> | -R <existing id>] [-m orphan_only|overwrite_all] [-o <output.bam>] <input.bam>\n"
             "\n"
             "Options:\n"
             "  -m MODE   Set the mode of operation from one of overwrite_all, orphan_only [overwrite_all]\n"
             "  -o FILE   Where to write output to [stdout]\n"
             "  -r STRING @RG line text\n"
             "  -R STRING ID of @RG line in existing header to use\n"
+            "  -u        Output uncompressed data\n"
+            "  -w        Overwrite an existing @RG line\n"
             "  --no-PG   Do not add a PG line\n"
             );
     sam_global_opt_help(fp, "..O..@..");
@@ -198,7 +202,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     };
     kstring_t rg_line = {0,0,NULL};
 
-    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h@:", lopts, NULL)) >= 0) {
+    while ((n = getopt_long(argc, argv, "r:R:m:o:O:h@:uw", lopts, NULL)) >= 0) {
         switch (n) {
             case 'r':
                 // Are we adding to existing rg line?
@@ -235,6 +239,12 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
             case 1:
                 retval->no_pg = 1;
                 break;
+            case 'u':
+                retval->uncompressed = 1;
+                break;
+            case 'w':
+                retval->overwrite_hdr_rg = 1;
+                break;
             case '?':
                 usage(stderr);
                 free(retval);
@@ -314,7 +324,7 @@ static void orphan_only_func(const state_t* state, bam1_t* file_read)
 }
 
 static bool init(const parsed_opts_t* opts, state_t** state_out) {
-    char output_mode[8] = "w";
+    char output_mode[9] = "w";
     state_t* retval = (state_t*) calloc(1, sizeof(state_t));
 
     if (retval == NULL) {
@@ -332,8 +342,12 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     retval->input_header = sam_hdr_read(retval->input_file);
 
     retval->output_header = sam_hdr_dup(retval->input_header);
+
+    if (opts->uncompressed)
+        strcat(output_mode, "0");
     if (opts->output_name) // File format auto-detection
-        sam_open_mode(output_mode + 1, opts->output_name, NULL);
+        sam_open_mode(output_mode + strlen(output_mode),
+                      opts->output_name, NULL);
     retval->output_file = sam_open_format(opts->output_name == NULL?"-":opts->output_name, output_mode, &opts->ga.out);
 
     if (retval->output_file == NULL) {
@@ -351,10 +365,20 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
         // Check does not already exist
         kstring_t hdr_line = { 0, 0, NULL };
         if (sam_hdr_find_line_id(retval->output_header, "RG", "ID", opts->rg_id, &hdr_line) == 0) {
-            fprintf(stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
-            free(hdr_line.s);
-            return false;
+            if (opts->overwrite_hdr_rg) {
+                if(-1 == sam_hdr_remove_line_id(retval->output_header, "RG", "ID", opts->rg_id)) {
+                    fprintf(stderr, "[init] Error removing the RG line with ID:%s from the output header.\n", opts->rg_id);
+                    ks_free(&hdr_line);
+                    return false;
+                }
+            } else {
+                fprintf(stderr, "[init] RG line with ID:%s already present in the header. Use -w to overwrite.\n", opts->rg_id);
+                ks_free(&hdr_line);
+                return false;
+            }
         }
+        ks_free(&hdr_line);
+
         if (-1 == sam_hdr_add_lines(retval->output_header, opts->rg_line, strlen(opts->rg_line))) {
             fprintf(stderr, "[init] Error adding RG line with ID:%s to the output header.\n", opts->rg_id);
             return false;
@@ -374,7 +398,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
                 return false;
             }
             retval->rg_id = strdup(opts->rg_id);
-            free(hdr_line.s);
+            ks_free(&hdr_line);
         } else {
             kstring_t rg_id = { 0, 0, NULL };
             if (sam_hdr_find_tag_id(retval->output_header, "RG", NULL, NULL, "ID", &rg_id) < 0) {
diff --git a/samtools/bam_addrprg.c.pysam.c b/samtools/bam_addrprg.c.pysam.c
index ba1cb08..88ce7e3 100644
--- a/samtools/bam_addrprg.c.pysam.c
+++ b/samtools/bam_addrprg.c.pysam.c
@@ -2,7 +2,7 @@
 
 /* bam_addrprg.c -- samtools command to add or replace readgroups.
 
-   Copyright (c) 2013, 2015-2017, 2019 Genome Research Limited.
+   Copyright (c) 2013, 2015-2017, 2019-2021 Genome Research Limited.
 
    Author: Martin O. Pollard <mp15@sanger.ac.uk>
 
@@ -53,6 +53,8 @@ struct parsed_opts {
     rg_mode mode;
     sam_global_args ga;
     htsThreadPool p;
+    int uncompressed;
+    int overwrite_hdr_rg;
 };
 
 struct state;
@@ -166,13 +168,15 @@ static char* get_rg_id(const char *line)
 static void usage(FILE *fp)
 {
     fprintf(fp,
-            "Usage: samtools addreplacerg [options] [-r <@RG line> | -R <existing id>] [-o <output.bam>] <input.bam>\n"
+            "Usage: samtools addreplacerg [options] [-r <@RG line> | -R <existing id>] [-m orphan_only|overwrite_all] [-o <output.bam>] <input.bam>\n"
             "\n"
             "Options:\n"
             "  -m MODE   Set the mode of operation from one of overwrite_all, orphan_only [overwrite_all]\n"
             "  -o FILE   Where to write output to [samtools_stdout]\n"
             "  -r STRING @RG line text\n"
             "  -R STRING ID of @RG line in existing header to use\n"
+            "  -u        Output uncompressed data\n"
+            "  -w        Overwrite an existing @RG line\n"
             "  --no-PG   Do not add a PG line\n"
             );
     sam_global_opt_help(fp, "..O..@..");
@@ -200,7 +204,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
     };
     kstring_t rg_line = {0,0,NULL};
 
-    while ((n = getopt_long(argc, argv, "r:R:m:o:O:l:h@:", lopts, NULL)) >= 0) {
+    while ((n = getopt_long(argc, argv, "r:R:m:o:O:h@:uw", lopts, NULL)) >= 0) {
         switch (n) {
             case 'r':
                 // Are we adding to existing rg line?
@@ -237,6 +241,12 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
             case 1:
                 retval->no_pg = 1;
                 break;
+            case 'u':
+                retval->uncompressed = 1;
+                break;
+            case 'w':
+                retval->overwrite_hdr_rg = 1;
+                break;
             case '?':
                 usage(samtools_stderr);
                 free(retval);
@@ -316,7 +326,7 @@ static void orphan_only_func(const state_t* state, bam1_t* file_read)
 }
 
 static bool init(const parsed_opts_t* opts, state_t** state_out) {
-    char output_mode[8] = "w";
+    char output_mode[9] = "w";
     state_t* retval = (state_t*) calloc(1, sizeof(state_t));
 
     if (retval == NULL) {
@@ -334,8 +344,12 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
     retval->input_header = sam_hdr_read(retval->input_file);
 
     retval->output_header = sam_hdr_dup(retval->input_header);
+
+    if (opts->uncompressed)
+        strcat(output_mode, "0");
     if (opts->output_name) // File format auto-detection
-        sam_open_mode(output_mode + 1, opts->output_name, NULL);
+        sam_open_mode(output_mode + strlen(output_mode),
+                      opts->output_name, NULL);
     retval->output_file = sam_open_format(opts->output_name == NULL?"-":opts->output_name, output_mode, &opts->ga.out);
 
     if (retval->output_file == NULL) {
@@ -353,10 +367,20 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
         // Check does not already exist
         kstring_t hdr_line = { 0, 0, NULL };
         if (sam_hdr_find_line_id(retval->output_header, "RG", "ID", opts->rg_id, &hdr_line) == 0) {
-            fprintf(samtools_stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
-            free(hdr_line.s);
-            return false;
+            if (opts->overwrite_hdr_rg) {
+                if(-1 == sam_hdr_remove_line_id(retval->output_header, "RG", "ID", opts->rg_id)) {
+                    fprintf(samtools_stderr, "[init] Error removing the RG line with ID:%s from the output header.\n", opts->rg_id);
+                    ks_free(&hdr_line);
+                    return false;
+                }
+            } else {
+                fprintf(samtools_stderr, "[init] RG line with ID:%s already present in the header. Use -w to overwrite.\n", opts->rg_id);
+                ks_free(&hdr_line);
+                return false;
+            }
         }
+        ks_free(&hdr_line);
+
         if (-1 == sam_hdr_add_lines(retval->output_header, opts->rg_line, strlen(opts->rg_line))) {
             fprintf(samtools_stderr, "[init] Error adding RG line with ID:%s to the output header.\n", opts->rg_id);
             return false;
@@ -376,7 +400,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
                 return false;
             }
             retval->rg_id = strdup(opts->rg_id);
-            free(hdr_line.s);
+            ks_free(&hdr_line);
         } else {
             kstring_t rg_id = { 0, 0, NULL };
             if (sam_hdr_find_tag_id(retval->output_header, "RG", NULL, NULL, "ID", &rg_id) < 0) {
diff --git a/samtools/bam_ampliconclip.c b/samtools/bam_ampliconclip.c
new file mode 100644
index 0000000..f3fe2bc
--- /dev/null
+++ b/samtools/bam_ampliconclip.c
@@ -0,0 +1,1079 @@
+/*  bam_ampliconclip.c -- loads amplicon primers from a BED file and cuts reads
+                          from the 5' end.
+
+    Copyright (C) 2020-2021 Genome Research Ltd.
+
+    Authors: Andrew Whitwham <aw7@sanger.ac.uk>
+             Rob Davies <rmd+git@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+*/
+
+#include <config.h>
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "htslib/thread_pool.h"
+#include "sam_opts.h"
+#include <htslib/hts.h>
+#include "htslib/hfile.h"
+#include "htslib/kstring.h"
+#include "htslib/sam.h"
+#include "samtools.h"
+#include "bam_ampliconclip.h"
+
+typedef enum {
+    soft_clip,
+    hard_clip
+} clipping_type;
+
+typedef struct {
+    int add_pg;
+    int use_strand;
+    int write_clipped;
+    int mark_fail;
+    int both;
+    int fail_len;
+    int filter_len;
+    int unmapped;
+    int oa_tag;
+    int del_tag;
+    int tol;
+    char *arg_list;
+    char *stats_file;
+    char *rejects_file;
+} cl_param_t;
+
+
+static int bed_entry_sort(const void *av, const void *bv) {
+    bed_entry_t *a = (bed_entry_t *) av;
+    bed_entry_t *b = (bed_entry_t *) bv;
+    return a->right < b->right ? -1 : (a->right == b->right ? 0 : 1);
+}
+
+
+int load_bed_file_multi_ref(char *infile, int get_strand, int sort_by_pos, khash_t(bed_list_hash) *bed_lists) {
+    hFILE *fp;
+    int line_count = 0, ret;
+    int64_t left, right;
+    kstring_t line = KS_INITIALIZE;
+    bed_entry_list_t *list;
+    khiter_t bed_itr;
+
+    if ((fp = hopen(infile, "r")) == NULL) {
+        print_error_errno("amplicon", "unable to open file %s.", infile);
+        return 1;
+    }
+
+    char ref[1024];
+
+    while (line.l = 0, kgetline(&line, (kgets_func *)hgets, fp) >= 0) {
+        line_count++;
+        int hret;
+        char strand;
+
+        if (line.l == 0 || *line.s == '#') continue;
+        if (strncmp(line.s, "track ", 6) == 0) continue;
+        if (strncmp(line.s, "browser ", 8) == 0) continue;
+
+        if (get_strand) {
+            if (sscanf(line.s, "%1023s %"SCNd64" %"SCNd64" %*s %*s %c",
+                       ref, &left, &right, &strand) != 4) {
+                fprintf(stderr, "[amplicon] error: bad bed file format in line %d of %s.\n"
+                                "(N.B. ref/chrom name limited to 1023 characters.)\n",
+                                    line_count, infile);
+                ret = 1;
+                goto error;
+            }
+        } else {
+            if (sscanf(line.s, "%1023s %"SCNd64" %"SCNd64,
+                       ref, &left, &right) != 3) {
+                fprintf(stderr, "[amplicon] error: bad bed file format in line %d of %s\n"
+                                "(N.B. ref/chrom name limited to 1023 characters.)\n",
+                                    line_count, infile);
+                ret = 1;
+                goto error;
+            }
+        }
+
+        bed_itr = kh_get(bed_list_hash, bed_lists, ref);
+
+        if (bed_itr == kh_end(bed_lists)) { // new ref entry
+            char *ref_name = strdup(ref); // need a copy for the hash key
+
+            if (!ref_name) {
+                fprintf(stderr, "[amplicon] error: unable to allocate memory for ref name.\n");
+                ret = 1;
+                goto error;
+            }
+
+            bed_itr = kh_put(bed_list_hash, bed_lists, ref_name, &hret);
+
+            if (hret > 0) {
+                list = &kh_val(bed_lists, bed_itr);
+
+                // initialise the new hash entry
+                list->longest = 0;
+                list->size = 0;
+                list->length = 0;
+                list->bp = NULL;
+            } else {
+                fprintf(stderr, "[amplicon] error: ref hashing failure.\n");
+                ret = 1;
+                goto error;
+            }
+        } else { // existing ref
+            list = &kh_val(bed_lists, bed_itr);
+        }
+
+        if (list->length == list->size) {
+           bed_entry_t *tmp;
+
+           list->size += list->size / 2 + 256;
+
+           if ((tmp = realloc(list->bp, list->size * sizeof(bed_entry_t))) == NULL) {
+               fprintf(stderr, "[amplicon] error: unable to allocate more memory for bed data.\n");
+               ret = 1;
+               goto error;
+           }
+
+           list->bp = tmp;
+        }
+
+        list->bp[list->length].left  = left;
+        list->bp[list->length].right = right;
+
+        if (get_strand) {
+            if (strand == '+') {
+                list->bp[list->length].rev = 0;
+            } else if (strand == '-') {
+                list->bp[list->length].rev = 1;
+            } else {
+                fprintf(stderr, "[amplicon] error: bad strand value in line %d, expecting '+' or '-', found '%c'.\n",
+                            line_count, strand);
+                ret = 1;
+                goto error;
+            }
+        }
+
+        if (right - left > list->longest)
+            list->longest = right - left;
+
+        list->length++;
+    }
+
+    if (sort_by_pos) {
+        for (bed_itr = kh_begin(bed_lists); bed_itr != kh_end(bed_lists); ++bed_itr) {
+            if (kh_exist(bed_lists, bed_itr)) {
+                list = &kh_val(bed_lists, bed_itr);
+                qsort(list->bp, list->length, sizeof(list->bp[0]), bed_entry_sort);
+            }
+        }
+    }
+
+    if (kh_size(bed_lists) > 0) {// any entries
+        ret = 0;
+    } else {
+        ret = 1;
+    }
+
+error:
+    ks_free(&line);
+
+    if (hclose(fp) != 0) {
+        fprintf(stderr, "[amplicon] warning: failed to close %s", infile);
+    }
+
+    return ret;
+}
+
+
+void destroy_bed_hash(khash_t(bed_list_hash) *hash) {
+    khiter_t itr;
+
+    for (itr = kh_begin(hash); itr != kh_end(hash); ++itr) {
+       if (kh_exist(hash, itr)) {
+           free(kh_val(hash, itr).bp);
+           free((char *)kh_key(hash, itr));
+           kh_key(hash, itr) = NULL;
+        }
+    }
+
+    kh_destroy(bed_list_hash, hash);
+}
+
+
+static int matching_clip_site(bed_entry_list_t *sites, hts_pos_t pos,
+                              int is_rev, int use_strand, int64_t longest,
+                              cl_param_t *param) {
+    int i, size;  // may need this to be variable
+    int tol = param->tol;
+    int l = 0, mid = sites->length / 2, r = sites->length;
+    int pos_tol = is_rev ? (pos > tol ? pos - tol : 0) : pos;
+
+    while (r - l > 1) {
+        if (sites->bp[mid].right <= pos_tol) {
+            l = mid;
+        } else {
+            r = mid;
+        }
+        mid = (l + r) / 2;
+    }
+
+    size = 0;
+
+    for (i = l; i < sites->length; i++) {
+        hts_pos_t mod_left, mod_right;
+
+        if (use_strand && is_rev != sites->bp[i].rev)
+            continue;
+
+        if (is_rev) {
+            mod_left = sites->bp[i].left;
+            mod_right = sites->bp[i].right + tol;
+        } else {
+            if (sites->bp[i].left > tol) {
+                mod_left = sites->bp[i].left - tol;
+            } else {
+                mod_left = 0;
+            }
+            mod_right = sites->bp[i].right;
+        }
+
+        if (pos + longest + tol < mod_right)
+            break;
+
+        if (pos >= mod_left && pos <= mod_right) {
+            if (is_rev) {
+                if (size < pos - sites->bp[i].left) {
+                    size = pos - sites->bp[i].left;
+                }
+            } else {
+                if (size < sites->bp[i].right - pos) {
+                    size = sites->bp[i].right - pos;
+                }
+            }
+        }
+    }
+
+    return size;
+}
+
+
+static int bam_trim_left(bam1_t *rec, bam1_t *rec_out, uint32_t bases,
+                         clipping_type clipping) {
+    uint32_t *orig_cigar = bam_get_cigar(rec);
+    uint8_t *orig_seq = bam_get_seq(rec);
+    uint8_t *orig_qual = bam_get_qual(rec);
+    uint8_t *orig_aux = bam_get_aux(rec);
+    uint32_t *new_cigar;
+    uint8_t *new_qual;
+    size_t orig_l_aux = bam_get_l_aux(rec);
+    uint32_t i, j, odd_base = 0;
+    uint32_t ref_remove = bases, qry_removed = 0, hardclip = 0;
+    hts_pos_t new_pos = rec->core.pos;
+    uint32_t cig_type, cig_op;
+
+    if (rec->l_data + 8 > rec_out->m_data) {
+        uint8_t *new_data = realloc(rec_out->data, rec->l_data + 8);
+        if (!new_data) {
+            fprintf(stderr, "[ampliconclip] error: could not allocate memoy for new bam record\n");
+            return 1;
+        }
+        rec_out->data = new_data;
+        rec_out->m_data = rec->l_data + 8;
+    }
+
+    // Copy core data & name
+    memcpy(&rec_out->core, &rec->core, sizeof(rec->core));
+    memcpy(rec_out->data, rec->data, rec->core.l_qname);
+
+    if (clipping == hard_clip && bases >= rec->core.l_qseq) {
+        rec_out->core.l_qseq = 0;
+        rec_out->core.n_cigar = 0;
+
+        if (orig_l_aux)
+            memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+        rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+        return 0;
+    }
+
+    // Modify CIGAR
+    new_cigar = bam_get_cigar(rec_out);
+
+    for (i = 0;  i < rec->core.n_cigar; i++) {
+        cig_op = bam_cigar_op(orig_cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if (cig_op == BAM_CHARD_CLIP) {
+            hardclip += bam_cigar_oplen(orig_cigar[i]);
+        } else {
+            if (cig_type & 2) {
+                if (bam_cigar_oplen(orig_cigar[i]) <= ref_remove) {
+                    ref_remove -= bam_cigar_oplen(orig_cigar[i]);
+                } else {
+                    break;
+                }
+                new_pos += bam_cigar_oplen(orig_cigar[i]);
+            }
+            if (cig_type & 1) {
+                qry_removed += bam_cigar_oplen(orig_cigar[i]);
+            }
+        }
+    }
+
+    if (i < rec->core.n_cigar) {
+        cig_type = bam_cigar_type(bam_cigar_op(orig_cigar[i]));
+
+        // account for the last operation
+        if (cig_type & 2) {
+            new_pos += ref_remove;
+        }
+        if (cig_type & 1) {
+            qry_removed += ref_remove;
+        }
+    } else {
+        qry_removed = rec->core.l_qseq;
+    }
+
+    j = 0;
+    if (clipping == hard_clip && hardclip + qry_removed > 0) {
+        new_cigar[j++] = bam_cigar_gen(hardclip + qry_removed, BAM_CHARD_CLIP);
+    }
+    if (clipping == soft_clip) {
+        if (hardclip > 0) {
+            new_cigar[j++] = bam_cigar_gen(hardclip, BAM_CHARD_CLIP);
+        }
+        if (qry_removed > 0) {
+            new_cigar[j++] = bam_cigar_gen(qry_removed, BAM_CSOFT_CLIP);
+        }
+    }
+
+    if (i < rec->core.n_cigar
+        && bam_cigar_oplen(orig_cigar[i]) > ref_remove) {
+        new_cigar[j++] = bam_cigar_gen(bam_cigar_oplen(orig_cigar[i]) - ref_remove, bam_cigar_op(orig_cigar[i]));
+
+        // fill in the rest of the cigar
+        i++;
+
+        for (; i < rec->core.n_cigar; i++) {
+            new_cigar[j++] = orig_cigar[i];
+        }
+    }
+
+    rec_out->core.n_cigar = j;
+
+    if (clipping == soft_clip) {
+        qry_removed = 0; // Copy all the sequence and confidence values
+        odd_base = 1; // account for an odd number of bases
+    }
+
+    new_qual = bam_get_seq(rec_out) + (rec->core.l_qseq - qry_removed + 1) / 2;
+    // Copy remaining SEQ
+    if ((qry_removed & 1) == 0) {
+        memcpy(bam_get_seq(rec_out), orig_seq + (qry_removed / 2),
+                (rec->core.l_qseq - qry_removed + odd_base) / 2);
+    } else {
+        uint8_t *in = orig_seq + qry_removed / 2;
+        uint8_t *out = bam_get_seq(rec_out);
+        uint32_t i;
+        for (i = qry_removed; i < rec->core.l_qseq - 1; i += 2) {
+            *out++ = ((in[0] & 0x0f) << 4) | ((in[1] & 0xf0) >> 4);
+            in++;
+        }
+        if (i < rec->core.l_qseq) {
+            *out++ = (in[0] & 0x0f) << 4;
+        }
+        assert(out == new_qual);
+    }
+
+    // Copy remaining QUAL
+    memmove(new_qual, orig_qual, rec->core.l_qseq - qry_removed);
+
+    // Set new l_qseq
+    rec_out->core.l_qseq -= qry_removed;
+
+    // Move AUX
+    if (orig_l_aux)
+        memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+    // Set new l_data
+    rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+    // put in new pos
+    rec_out->core.pos = new_pos;
+
+    return 0;
+}
+
+
+static int bam_trim_right(bam1_t *rec, bam1_t *rec_out, uint32_t bases,
+                          clipping_type clipping) {
+    uint32_t *orig_cigar = bam_get_cigar(rec);
+    uint8_t *orig_seq = bam_get_seq(rec);
+    uint8_t *orig_qual = bam_get_qual(rec);
+    uint8_t *orig_aux = bam_get_aux(rec);
+    uint32_t *new_cigar;
+    uint32_t new_n_cigar = 0;
+    uint8_t *new_qual;
+    size_t orig_l_aux = bam_get_l_aux(rec);
+    int32_t i;
+    int32_t j;
+    uint32_t ref_remove = bases, qry_removed = 0, hardclip = 0;
+    uint32_t cig_type, cig_op;
+
+    if (rec->l_data + 8 > rec_out->m_data) {
+        uint8_t *new_data = realloc(rec_out->data, rec->l_data + 8);
+        if (!new_data) {
+            fprintf(stderr, "[ampliconclip] error: could not allocate memoy for new bam record\n");
+            return 1;
+        }
+        rec_out->data = new_data;
+        rec_out->m_data = rec->l_data + 8;
+    }
+
+    // Copy core data & name
+    memcpy(&rec_out->core, &rec->core, sizeof(rec->core));
+    memcpy(rec_out->data, rec->data, rec->core.l_qname);
+
+    if (clipping == hard_clip && bases >= rec->core.l_qseq) {
+        rec_out->core.l_qseq = 0;
+        rec_out->core.n_cigar = 0;
+
+        if (orig_l_aux)
+            memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+        rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+        return 0;
+    }
+
+    // Modify CIGAR here
+    new_cigar = bam_get_cigar(rec_out);
+
+    for (i = rec->core.n_cigar - 1;  i >= 0; --i) {
+        cig_op = bam_cigar_op(orig_cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if (cig_op == BAM_CHARD_CLIP) {
+            hardclip += bam_cigar_oplen(orig_cigar[i]);
+        } else {
+            if (cig_type & 2) {
+                if (bam_cigar_oplen(orig_cigar[i]) <= ref_remove) {
+                    ref_remove -= bam_cigar_oplen(orig_cigar[i]);
+                } else {
+                    break;
+                }
+            }
+            if (cig_type & 1) {
+                qry_removed += bam_cigar_oplen(orig_cigar[i]);
+            }
+        }
+    }
+
+    if (i >= 0) {
+        cig_type = bam_cigar_type(bam_cigar_op(orig_cigar[i]));
+        if (cig_type & 1) {
+            qry_removed += ref_remove;
+        }
+        j = i;
+        if (qry_removed > 0) j++;
+        if (hardclip > 0 && (clipping == soft_clip || qry_removed == 0)) j++;
+    } else {
+        qry_removed = rec->core.l_qseq;
+        j = 0;
+        if (hardclip > 0 && clipping == soft_clip) j++;
+    }
+
+    if (clipping == hard_clip && hardclip + qry_removed > 0) {
+        new_cigar[j] = bam_cigar_gen(hardclip + qry_removed, BAM_CHARD_CLIP);
+        new_n_cigar++;
+    }
+    if (clipping == soft_clip) {
+        if (hardclip > 0) {
+            new_cigar[j] = bam_cigar_gen(hardclip, BAM_CHARD_CLIP);
+            new_n_cigar++;
+            if (qry_removed > 0) --j;
+        }
+        if (qry_removed > 0) {
+            new_cigar[j] = bam_cigar_gen(qry_removed, BAM_CSOFT_CLIP);
+            new_n_cigar++;
+        }
+    }
+
+    if (j > 0) {
+        new_cigar[--j] = bam_cigar_gen(bam_cigar_oplen(orig_cigar[i]) - ref_remove, bam_cigar_op(orig_cigar[i]));
+        new_n_cigar++;
+    }
+
+    // fill in the rest of the cigar
+    while (j > 0) {
+        new_cigar[--j] = orig_cigar[--i];
+        new_n_cigar++;
+    }
+
+    rec_out->core.n_cigar = new_n_cigar;
+
+    if (clipping == soft_clip)
+        qry_removed = 0; // Copy all the sequence and confidence values
+
+    new_qual = bam_get_seq(rec_out) + (rec->core.l_qseq - qry_removed + 1) / 2;
+    // Copy remaining SEQ
+    memcpy(bam_get_seq(rec_out), orig_seq, (rec->core.l_qseq - qry_removed + 1) / 2);
+
+    // Copy remaining QUAL
+    memcpy(new_qual, orig_qual, rec->core.l_qseq - qry_removed);
+
+    // Set new l_qseq
+    rec_out->core.l_qseq -= qry_removed;
+
+    // Copy AUX
+    if (orig_l_aux)
+        memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+    // Set new l_data
+    rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+    return 0;
+}
+
+
+static hts_pos_t active_query_len(bam1_t *b) {
+    uint32_t *cigar = bam_get_cigar(b);
+    uint32_t cig_type, cig_op;
+    hts_pos_t len = 0;
+    int i;
+
+    for (i = 0; i < b->core.n_cigar; i++) {
+        cig_op =  bam_cigar_op(cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if ((cig_type & 1) && (cig_op != BAM_CSOFT_CLIP)) {
+            len += bam_cigar_oplen(cigar[i]);
+        }
+    }
+
+    return len;
+}
+
+
+static inline void swap_bams(bam1_t **a, bam1_t **b) {
+    bam1_t *tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+
+// Format OA:Z:(RNAME,POS,strand,CIGAR,MAPQ,NM;
+static inline int tag_original_data(bam1_t *orig, kstring_t *oa_tag) {
+    char strand;
+    uint8_t *nm_tag, *old_oa_tag;
+    uint32_t *cigar;
+    int64_t nm = 0;
+    int i, res = 0;
+
+    ks_clear(oa_tag);
+
+    // if there is an existing OA tag the new one gets appended to it
+    if ((old_oa_tag = bam_aux_get(orig, "OA"))) {
+        res |= ksprintf(oa_tag, "%s", bam_aux2Z(old_oa_tag)) < 0;
+    }
+
+    if (orig->core.flag & BAM_FREVERSE)
+        strand = '-';
+    else
+        strand = '+';
+
+    if ((nm_tag = bam_aux_get(orig, "NM"))) {
+        nm = bam_aux2i(nm_tag);
+    }
+
+    res |= ksprintf(oa_tag, "%s,%"PRIhts_pos",%c,", bam_get_qname(orig), orig->core.pos + 1, strand) < 0;
+
+    for (i = 0, cigar = bam_get_cigar(orig); i < orig->core.n_cigar && res == 0; ++i) {
+        res |= kputw(bam_cigar_oplen(cigar[i]), oa_tag) < 0;
+        res |= kputc(bam_cigar_opchr(cigar[i]), oa_tag) < 0;
+    }
+
+    if (nm_tag) {
+        res |= ksprintf(oa_tag, ",%d,%"PRId64";", orig->core.qual, nm) < 0;
+    } else {
+        res |= ksprintf(oa_tag, "%d,;", orig->core.qual) < 0;
+    }
+
+    return res;
+}
+
+
+static int bam_clip(samFile *in, samFile *out, samFile *reject, char *bedfile,
+                    clipping_type clipping, cl_param_t *param) {
+    int ret = 1, r, file_open = 0;
+
+    bam_hdr_t *header = NULL;
+    bam1_t *b = NULL, *b_tmp = NULL;
+    long f_count = 0, r_count = 0, n_count = 0, l_count = 0, l_exclude = 0, b_count = 0;
+    long filtered = 0, written = 0, failed = 0;
+    kstring_t str = KS_INITIALIZE;
+    kstring_t oat = KS_INITIALIZE;
+    bed_entry_list_t *sites;
+    FILE *stats_fp = stderr;
+    khash_t(bed_list_hash) *bed_hash = kh_init(bed_list_hash);
+
+    if (load_bed_file_multi_ref(bedfile, param->use_strand, 1, bed_hash)) {
+        fprintf(stderr, "[ampliconclip] error: unable to load bed file.\n");
+        goto fail;
+    }
+
+    if ((header = sam_hdr_read(in)) == NULL) {
+        fprintf(stderr, "[ampliconclip] error: could not read header\n");
+        goto fail;
+    }
+
+    // changing pos can ruin coordinate sort order
+    if (sam_hdr_find_tag_hd(header, "SO", &str) == 0 && str.s && strcmp(str.s, "coordinate") == 0) {
+        const char *new_order = "unknown";
+
+        if (sam_hdr_update_hd(header, "SO", new_order) == -1) {
+            fprintf(stderr, "[ampliconclip] error: unable to change sort order to 'SO:%s'\n", new_order);
+            goto fail;
+        }
+    }
+
+    ks_free(&str);
+
+    if (param->add_pg && sam_hdr_add_pg(header, "samtools", "VN", samtools_version(),
+                        param->arg_list ? "CL" : NULL,
+                        param->arg_list ? param->arg_list : NULL,
+                        NULL) != 0) {
+        fprintf(stderr, "[ampliconclip] warning: unable to add @PG line to header.\n");
+    }
+    if (sam_hdr_write(out, header) < 0) {
+        fprintf(stderr, "[ampliconclip] error: could not write header.\n");
+        goto fail;
+    }
+
+    if (reject) {
+       if (sam_hdr_write(reject, header) < 0) {
+           fprintf(stderr, "[ampliconclip] error: could not write header to rejects file.\n");
+           goto fail;
+       }
+    }
+
+    b = bam_init1();
+    b_tmp = bam_init1();
+    if (!b || !b_tmp) {
+        fprintf(stderr, "[ampliconclip] error: out of memory when trying to create record.\n");
+        goto fail;
+    }
+
+    int32_t last_tid = -1;
+    int ref_found = 0;
+
+    while ((r = sam_read1(in, header, b)) >= 0) {
+        hts_pos_t pos;
+        int is_rev;
+        int p_size;
+        int been_clipped  = 0, filter = 0;
+        int exclude = (BAM_FUNMAP | BAM_FQCFAIL);
+        khiter_t itr;
+
+        l_count++;
+
+        if (b->core.tid != last_tid) {
+            const char *ref_name;
+
+            ref_found = 0;
+            last_tid = b->core.tid;
+
+            if ((ref_name = sam_hdr_tid2name(header, b->core.tid)) != NULL) {
+                itr = kh_get(bed_list_hash, bed_hash, ref_name);
+
+                if (itr != kh_end(bed_hash)) {
+                    sites = &kh_val(bed_hash, itr);
+                    ref_found = 1;
+                }
+            }
+        }
+
+        if (!(b->core.flag & exclude) && ref_found) {
+            if (param->oa_tag)
+                if (tag_original_data(b, &oat))
+                    goto fail;
+
+            if (!param->both) {
+                if (bam_is_rev(b)) {
+                    pos = bam_endpos(b);
+                    is_rev = 1;
+                } else {
+                    pos = b->core.pos;
+                    is_rev = 0;
+                }
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (is_rev) {
+                        if (bam_trim_right(b, b_tmp, p_size, clipping) != 0)
+                            goto fail;
+
+                        swap_bams(&b, &b_tmp);
+                        r_count++;
+                    } else {
+                        if (bam_trim_left(b, b_tmp, p_size, clipping) != 0)
+                            goto fail;
+
+                        swap_bams(&b, &b_tmp);
+                        f_count++;
+                    }
+
+                    if (param->oa_tag) {
+                        if (bam_aux_update_str(b, "OA", oat.l + 1, (const char *)oat.s))
+                            goto fail;
+                    }
+
+                    if (param->del_tag) {
+                        uint8_t *tag;
+
+                        if ((tag = bam_aux_get(b, "NM")))
+                            bam_aux_del(b, tag);
+
+                        if ((tag = bam_aux_get(b, "MD")))
+                            bam_aux_del(b, tag);
+                    }
+
+                    been_clipped = 1;
+                } else {
+                    if (param->mark_fail) {
+                        b->core.flag |= BAM_FQCFAIL;
+                    }
+
+                    n_count++;
+                }
+            } else {
+                int left = 0, right = 0;
+
+                // left first
+                pos = b->core.pos;
+                is_rev = 0;
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (bam_trim_left(b, b_tmp, p_size, clipping) != 0)
+                        goto fail;
+
+                    swap_bams(&b, &b_tmp);
+                    f_count++;
+                    left = 1;
+                    been_clipped = 1;
+                }
+
+                // the right
+                pos = bam_endpos(b);
+                is_rev = 1;
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (bam_trim_right(b, b_tmp, p_size, clipping) != 0)
+                        goto fail;
+
+                    swap_bams(&b, &b_tmp);
+                    r_count++;
+                    right = 1;
+                    been_clipped = 1;
+                }
+
+                if (left || right) {
+                    uint8_t *tag;
+
+                    if (param->oa_tag) {
+                        if (bam_aux_update_str(b, "OA", oat.l + 1, (const char *)oat.s))
+                            goto fail;
+                    }
+
+                    if (param->del_tag) {
+                        if ((tag = bam_aux_get(b, "NM")))
+                            bam_aux_del(b, tag);
+
+                        if ((tag = bam_aux_get(b, "MD")))
+                            bam_aux_del(b, tag);
+                    }
+                }
+
+                if (left && right) {
+                    b_count++;
+                } else if (!left && !right) {
+                    if (param->mark_fail) {
+                        b->core.flag |= BAM_FQCFAIL;
+                    }
+
+                    n_count++;
+                }
+            }
+
+            if (param->fail_len >= 0 || param->filter_len >= 0) {
+               hts_pos_t aql = active_query_len(b);
+
+               if (param->fail_len >= 0 && aql <= param->fail_len) {
+                   b->core.flag |= BAM_FQCFAIL;
+               }
+
+               if (param->filter_len >= 0 && aql <= param->filter_len) {
+                   filter = 1;
+               }
+           }
+
+           if (b->core.flag & BAM_FQCFAIL) {
+               failed++;
+           }
+
+           if (param->write_clipped && !been_clipped) {
+               filter = 1;
+           }
+
+        } else {
+            l_exclude++;
+
+            if (param->unmapped) {
+                filter = 1;
+            }
+        }
+
+        if (!filter) {
+            if (sam_write1(out, header, b) < 0) {
+                fprintf(stderr, "[ampliconclip] error: could not write line %ld.\n", l_count);
+                goto fail;
+            }
+
+            written++;
+        } else {
+            if (reject) {
+                if (sam_write1(reject, header, b) < 0) {
+                    fprintf(stderr, "[ampliconclip] error: could not write to reject file %s\n",
+                            param->rejects_file);
+                    goto fail;
+                }
+            }
+
+            filtered++;
+        }
+    }
+
+    if (r < -1) {
+        fprintf(stderr, "[ampliconclip] error: failed to read input.\n");
+        goto fail;
+    }
+
+    if (param->stats_file) {
+        if ((stats_fp = fopen(param->stats_file, "w")) == NULL) {
+            fprintf(stderr, "[ampliconclip] warning: cannot write stats to %s.\n", param->stats_file);
+        } else {
+            file_open = 1;
+        }
+    }
+
+    fprintf(stats_fp, "COMMAND: %s\n"
+                    "TOTAL READS: %ld\n"
+                    "TOTAL CLIPPED: %ld\n"
+                    "FORWARD CLIPPED: %ld\n"
+                    "REVERSE CLIPPED: %ld\n"
+                    "BOTH CLIPPED: %ld\n"
+                    "NOT CLIPPED: %ld\n"
+                    "EXCLUDED: %ld\n"
+                    "FILTERED: %ld\n"
+                    "FAILED: %ld\n"
+                    "WRITTEN: %ld\n", param->arg_list, l_count, f_count + r_count,
+                                    f_count, r_count, b_count, n_count, l_exclude,
+                                    filtered, failed, written);
+
+    if (file_open) {
+        fclose(stats_fp);
+    }
+
+    ret = 0;
+
+fail:
+    destroy_bed_hash(bed_hash);
+    ks_free(&oat);
+    sam_hdr_destroy(header);
+    bam_destroy1(b);
+    bam_destroy1(b_tmp);
+    return ret;
+}
+
+
+static void usage(void) {
+    fprintf(stderr, "Usage: samtools ampliconclip -b BED file <input.bam> -o <output.bam>\n\n");
+    fprintf(stderr, "Option: \n");
+    fprintf(stderr, " -b  FILE            BED file of regions (eg amplicon primers) to be removed.\n");
+    fprintf(stderr, " -o  FILE            output file name (default stdout).\n");
+    fprintf(stderr, " -f  FILE            write stats to file name (default stderr)\n");
+    fprintf(stderr, " -u                  Output uncompressed data\n");
+    fprintf(stderr, " --soft-clip         soft clip amplicon primers from reads (default)\n");
+    fprintf(stderr, " --hard-clip         hard clip amplicon primers from reads.\n");
+    fprintf(stderr, " --both-ends         clip on both 5' and 3' ends.\n");
+    fprintf(stderr, " --strand            use strand data from BED file to match read direction.\n");
+    fprintf(stderr, " --clipped           only output clipped reads.\n");
+    fprintf(stderr, " --fail              mark unclipped, mapped reads as QCFAIL.\n");
+    fprintf(stderr, " --filter-len INT    do not output reads INT size or shorter.\n");
+    fprintf(stderr, " --fail-len   INT    mark as QCFAIL reads INT size or shorter.\n");
+    fprintf(stderr, " --no-excluded       do not write excluded reads (unmapped or QCFAIL).\n");
+    fprintf(stderr, " --rejects-file FILE file to write filtered reads.\n");
+    fprintf(stderr, " --original          for clipped entries add an OA tag with original data.\n");
+    fprintf(stderr, " --keep-tag          for clipped entries keep the old NM and MD tags.\n");
+    fprintf(stderr, " --tolerance         match region within this number of bases, default 5.\n");
+    fprintf(stderr, " --no-PG             do not add an @PG line.\n");
+    sam_global_opt_help(stderr, "-.O..@-.");
+    fprintf(stderr, "\nAbout: Soft clips read alignments where they match BED file defined regions.\n"
+                    "Default clipping is only on the 5' end.\n\n");
+}
+
+
+int amplicon_clip_main(int argc, char **argv) {
+    int c, ret;
+    char wmode[4] = {'w', 'b', 0, 0};
+    char *bedfile = NULL, *fnout = "-";
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    htsThreadPool p = {NULL, 0};
+    samFile *in = NULL, *out = NULL, *reject = NULL;
+    clipping_type clipping = soft_clip;
+    cl_param_t param = {1, 0, 0, 0, 0, -1, -1, 0, 0, 1, 5, NULL, NULL, NULL};
+
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
+        {"no-PG", no_argument, NULL, 1002},
+        {"soft-clip", no_argument, NULL, 1003},
+        {"hard-clip", no_argument, NULL, 1004},
+        {"strand", no_argument, NULL, 1005},
+        {"clipped", no_argument, NULL, 1006},
+        {"fail", no_argument, NULL, 1007},
+        {"both-ends", no_argument, NULL, 1008},
+        {"filter-len", required_argument, NULL, 1009},
+        {"fail-len", required_argument, NULL, 1010},
+        {"no-excluded", no_argument, NULL, 1011},
+        {"rejects-file", required_argument, NULL, 1012},
+        {"original", no_argument, NULL, 1013},
+        {"keep-tag", no_argument, NULL, 1014},
+        {"tolerance", required_argument, NULL, 1015},
+        {NULL, 0, NULL, 0}
+    };
+
+    while ((c = getopt_long(argc, argv, "b:@:o:O:f:u", lopts, NULL)) >= 0) {
+        switch (c) {
+            case 'b': bedfile = optarg; break;
+            case 'o': fnout = optarg; break;
+            case 'f': param.stats_file = optarg; break;
+            case 'u': wmode[2] = '0'; break;
+            case 1002: param.add_pg = 0; break;
+            case 1003: clipping = soft_clip; break;
+            case 1004: clipping = hard_clip; break;
+            case 1005: param.use_strand = 1; break;
+            case 1006: param.write_clipped = 1; break;
+            case 1007: param.mark_fail = 1; break;
+            case 1008: param.both = 1; break;
+            case 1009: param.filter_len = atoi(optarg); break;
+            case 1010: param.fail_len = atoi(optarg); break;
+            case 1011: param.unmapped = 1; break;
+            case 1012: param.rejects_file = optarg; break;
+            case 1013: param.oa_tag = 1; break;
+            case 1014: param.del_tag = 0; break;
+            case 1015: param.tol = atoi(optarg); break;
+            default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+                      /* else fall-through */
+            case '?': usage(); exit(1);
+        }
+    }
+
+    if (!bedfile) {
+        usage();
+        return 1;
+    }
+
+    if (optind + 1 > argc) {
+        usage();
+        return 1;
+    }
+
+    if (param.tol < 0) {
+        fprintf(stderr, "[ampliconclip] warning: invalid tolerance of %d,"
+                        " reseting tolerance to default of 5.\n", param.tol);
+        param.tol = 5;
+    }
+
+    if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
+        print_error_errno("ampliconclip", "cannot open input file");
+        return 1;
+    }
+
+    sam_open_mode(wmode+1, fnout, NULL);
+
+    if ((out = sam_open_format(fnout, wmode, &ga.out)) == NULL) {
+        print_error_errno("ampliconclip", "cannot open output file");
+        return 1;
+    }
+
+    if (param.rejects_file) {
+        sam_open_mode(wmode+1, param.rejects_file, NULL);
+
+        if ((reject = sam_open_format(param.rejects_file, wmode, &ga.out)) == NULL) {
+            print_error_errno("ampliconclip", "cannot open rejects file");
+            return 1;
+        }
+    }
+
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(stderr, "[ampliconclip] error: cannot create thread pool.\n");
+            return 1;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+
+        if (reject) {
+           hts_set_opt(reject,  HTS_OPT_THREAD_POOL, &p);
+        }
+    }
+
+    param.arg_list = stringify_argv(argc + 1, argv - 1);
+
+    ret = bam_clip(in, out, reject, bedfile, clipping, &param);
+
+    // cleanup
+    sam_close(in);
+
+    if (sam_close(out) < 0) {
+        fprintf(stderr, "[ampliconclip] error: error while closing output file %s.\n", argv[optind+1]);
+        ret = 1;
+    }
+
+    if (reject) {
+        if (sam_close(reject) < 0) {
+            fprintf(stderr, "[ampliconclip] error: error while closing reject file %s.\n", param.rejects_file);
+            ret = 1;
+        }
+    }
+
+    if (p.pool) hts_tpool_destroy(p.pool);
+
+    sam_global_args_free(&ga);
+    free(param.arg_list);
+
+    return ret;
+}
+
diff --git a/samtools/bam_ampliconclip.c.pysam.c b/samtools/bam_ampliconclip.c.pysam.c
new file mode 100644
index 0000000..3b2ed29
--- /dev/null
+++ b/samtools/bam_ampliconclip.c.pysam.c
@@ -0,0 +1,1081 @@
+#include "samtools.pysam.h"
+
+/*  bam_ampliconclip.c -- loads amplicon primers from a BED file and cuts reads
+                          from the 5' end.
+
+    Copyright (C) 2020-2021 Genome Research Ltd.
+
+    Authors: Andrew Whitwham <aw7@sanger.ac.uk>
+             Rob Davies <rmd+git@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE
+*/
+
+#include <config.h>
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "htslib/thread_pool.h"
+#include "sam_opts.h"
+#include <htslib/hts.h>
+#include "htslib/hfile.h"
+#include "htslib/kstring.h"
+#include "htslib/sam.h"
+#include "samtools.h"
+#include "bam_ampliconclip.h"
+
+typedef enum {
+    soft_clip,
+    hard_clip
+} clipping_type;
+
+typedef struct {
+    int add_pg;
+    int use_strand;
+    int write_clipped;
+    int mark_fail;
+    int both;
+    int fail_len;
+    int filter_len;
+    int unmapped;
+    int oa_tag;
+    int del_tag;
+    int tol;
+    char *arg_list;
+    char *stats_file;
+    char *rejects_file;
+} cl_param_t;
+
+
+static int bed_entry_sort(const void *av, const void *bv) {
+    bed_entry_t *a = (bed_entry_t *) av;
+    bed_entry_t *b = (bed_entry_t *) bv;
+    return a->right < b->right ? -1 : (a->right == b->right ? 0 : 1);
+}
+
+
+int load_bed_file_multi_ref(char *infile, int get_strand, int sort_by_pos, khash_t(bed_list_hash) *bed_lists) {
+    hFILE *fp;
+    int line_count = 0, ret;
+    int64_t left, right;
+    kstring_t line = KS_INITIALIZE;
+    bed_entry_list_t *list;
+    khiter_t bed_itr;
+
+    if ((fp = hopen(infile, "r")) == NULL) {
+        print_error_errno("amplicon", "unable to open file %s.", infile);
+        return 1;
+    }
+
+    char ref[1024];
+
+    while (line.l = 0, kgetline(&line, (kgets_func *)hgets, fp) >= 0) {
+        line_count++;
+        int hret;
+        char strand;
+
+        if (line.l == 0 || *line.s == '#') continue;
+        if (strncmp(line.s, "track ", 6) == 0) continue;
+        if (strncmp(line.s, "browser ", 8) == 0) continue;
+
+        if (get_strand) {
+            if (sscanf(line.s, "%1023s %"SCNd64" %"SCNd64" %*s %*s %c",
+                       ref, &left, &right, &strand) != 4) {
+                fprintf(samtools_stderr, "[amplicon] error: bad bed file format in line %d of %s.\n"
+                                "(N.B. ref/chrom name limited to 1023 characters.)\n",
+                                    line_count, infile);
+                ret = 1;
+                goto error;
+            }
+        } else {
+            if (sscanf(line.s, "%1023s %"SCNd64" %"SCNd64,
+                       ref, &left, &right) != 3) {
+                fprintf(samtools_stderr, "[amplicon] error: bad bed file format in line %d of %s\n"
+                                "(N.B. ref/chrom name limited to 1023 characters.)\n",
+                                    line_count, infile);
+                ret = 1;
+                goto error;
+            }
+        }
+
+        bed_itr = kh_get(bed_list_hash, bed_lists, ref);
+
+        if (bed_itr == kh_end(bed_lists)) { // new ref entry
+            char *ref_name = strdup(ref); // need a copy for the hash key
+
+            if (!ref_name) {
+                fprintf(samtools_stderr, "[amplicon] error: unable to allocate memory for ref name.\n");
+                ret = 1;
+                goto error;
+            }
+
+            bed_itr = kh_put(bed_list_hash, bed_lists, ref_name, &hret);
+
+            if (hret > 0) {
+                list = &kh_val(bed_lists, bed_itr);
+
+                // initialise the new hash entry
+                list->longest = 0;
+                list->size = 0;
+                list->length = 0;
+                list->bp = NULL;
+            } else {
+                fprintf(samtools_stderr, "[amplicon] error: ref hashing failure.\n");
+                ret = 1;
+                goto error;
+            }
+        } else { // existing ref
+            list = &kh_val(bed_lists, bed_itr);
+        }
+
+        if (list->length == list->size) {
+           bed_entry_t *tmp;
+
+           list->size += list->size / 2 + 256;
+
+           if ((tmp = realloc(list->bp, list->size * sizeof(bed_entry_t))) == NULL) {
+               fprintf(samtools_stderr, "[amplicon] error: unable to allocate more memory for bed data.\n");
+               ret = 1;
+               goto error;
+           }
+
+           list->bp = tmp;
+        }
+
+        list->bp[list->length].left  = left;
+        list->bp[list->length].right = right;
+
+        if (get_strand) {
+            if (strand == '+') {
+                list->bp[list->length].rev = 0;
+            } else if (strand == '-') {
+                list->bp[list->length].rev = 1;
+            } else {
+                fprintf(samtools_stderr, "[amplicon] error: bad strand value in line %d, expecting '+' or '-', found '%c'.\n",
+                            line_count, strand);
+                ret = 1;
+                goto error;
+            }
+        }
+
+        if (right - left > list->longest)
+            list->longest = right - left;
+
+        list->length++;
+    }
+
+    if (sort_by_pos) {
+        for (bed_itr = kh_begin(bed_lists); bed_itr != kh_end(bed_lists); ++bed_itr) {
+            if (kh_exist(bed_lists, bed_itr)) {
+                list = &kh_val(bed_lists, bed_itr);
+                qsort(list->bp, list->length, sizeof(list->bp[0]), bed_entry_sort);
+            }
+        }
+    }
+
+    if (kh_size(bed_lists) > 0) {// any entries
+        ret = 0;
+    } else {
+        ret = 1;
+    }
+
+error:
+    ks_free(&line);
+
+    if (hclose(fp) != 0) {
+        fprintf(samtools_stderr, "[amplicon] warning: failed to close %s", infile);
+    }
+
+    return ret;
+}
+
+
+void destroy_bed_hash(khash_t(bed_list_hash) *hash) {
+    khiter_t itr;
+
+    for (itr = kh_begin(hash); itr != kh_end(hash); ++itr) {
+       if (kh_exist(hash, itr)) {
+           free(kh_val(hash, itr).bp);
+           free((char *)kh_key(hash, itr));
+           kh_key(hash, itr) = NULL;
+        }
+    }
+
+    kh_destroy(bed_list_hash, hash);
+}
+
+
+static int matching_clip_site(bed_entry_list_t *sites, hts_pos_t pos,
+                              int is_rev, int use_strand, int64_t longest,
+                              cl_param_t *param) {
+    int i, size;  // may need this to be variable
+    int tol = param->tol;
+    int l = 0, mid = sites->length / 2, r = sites->length;
+    int pos_tol = is_rev ? (pos > tol ? pos - tol : 0) : pos;
+
+    while (r - l > 1) {
+        if (sites->bp[mid].right <= pos_tol) {
+            l = mid;
+        } else {
+            r = mid;
+        }
+        mid = (l + r) / 2;
+    }
+
+    size = 0;
+
+    for (i = l; i < sites->length; i++) {
+        hts_pos_t mod_left, mod_right;
+
+        if (use_strand && is_rev != sites->bp[i].rev)
+            continue;
+
+        if (is_rev) {
+            mod_left = sites->bp[i].left;
+            mod_right = sites->bp[i].right + tol;
+        } else {
+            if (sites->bp[i].left > tol) {
+                mod_left = sites->bp[i].left - tol;
+            } else {
+                mod_left = 0;
+            }
+            mod_right = sites->bp[i].right;
+        }
+
+        if (pos + longest + tol < mod_right)
+            break;
+
+        if (pos >= mod_left && pos <= mod_right) {
+            if (is_rev) {
+                if (size < pos - sites->bp[i].left) {
+                    size = pos - sites->bp[i].left;
+                }
+            } else {
+                if (size < sites->bp[i].right - pos) {
+                    size = sites->bp[i].right - pos;
+                }
+            }
+        }
+    }
+
+    return size;
+}
+
+
+static int bam_trim_left(bam1_t *rec, bam1_t *rec_out, uint32_t bases,
+                         clipping_type clipping) {
+    uint32_t *orig_cigar = bam_get_cigar(rec);
+    uint8_t *orig_seq = bam_get_seq(rec);
+    uint8_t *orig_qual = bam_get_qual(rec);
+    uint8_t *orig_aux = bam_get_aux(rec);
+    uint32_t *new_cigar;
+    uint8_t *new_qual;
+    size_t orig_l_aux = bam_get_l_aux(rec);
+    uint32_t i, j, odd_base = 0;
+    uint32_t ref_remove = bases, qry_removed = 0, hardclip = 0;
+    hts_pos_t new_pos = rec->core.pos;
+    uint32_t cig_type, cig_op;
+
+    if (rec->l_data + 8 > rec_out->m_data) {
+        uint8_t *new_data = realloc(rec_out->data, rec->l_data + 8);
+        if (!new_data) {
+            fprintf(samtools_stderr, "[ampliconclip] error: could not allocate memoy for new bam record\n");
+            return 1;
+        }
+        rec_out->data = new_data;
+        rec_out->m_data = rec->l_data + 8;
+    }
+
+    // Copy core data & name
+    memcpy(&rec_out->core, &rec->core, sizeof(rec->core));
+    memcpy(rec_out->data, rec->data, rec->core.l_qname);
+
+    if (clipping == hard_clip && bases >= rec->core.l_qseq) {
+        rec_out->core.l_qseq = 0;
+        rec_out->core.n_cigar = 0;
+
+        if (orig_l_aux)
+            memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+        rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+        return 0;
+    }
+
+    // Modify CIGAR
+    new_cigar = bam_get_cigar(rec_out);
+
+    for (i = 0;  i < rec->core.n_cigar; i++) {
+        cig_op = bam_cigar_op(orig_cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if (cig_op == BAM_CHARD_CLIP) {
+            hardclip += bam_cigar_oplen(orig_cigar[i]);
+        } else {
+            if (cig_type & 2) {
+                if (bam_cigar_oplen(orig_cigar[i]) <= ref_remove) {
+                    ref_remove -= bam_cigar_oplen(orig_cigar[i]);
+                } else {
+                    break;
+                }
+                new_pos += bam_cigar_oplen(orig_cigar[i]);
+            }
+            if (cig_type & 1) {
+                qry_removed += bam_cigar_oplen(orig_cigar[i]);
+            }
+        }
+    }
+
+    if (i < rec->core.n_cigar) {
+        cig_type = bam_cigar_type(bam_cigar_op(orig_cigar[i]));
+
+        // account for the last operation
+        if (cig_type & 2) {
+            new_pos += ref_remove;
+        }
+        if (cig_type & 1) {
+            qry_removed += ref_remove;
+        }
+    } else {
+        qry_removed = rec->core.l_qseq;
+    }
+
+    j = 0;
+    if (clipping == hard_clip && hardclip + qry_removed > 0) {
+        new_cigar[j++] = bam_cigar_gen(hardclip + qry_removed, BAM_CHARD_CLIP);
+    }
+    if (clipping == soft_clip) {
+        if (hardclip > 0) {
+            new_cigar[j++] = bam_cigar_gen(hardclip, BAM_CHARD_CLIP);
+        }
+        if (qry_removed > 0) {
+            new_cigar[j++] = bam_cigar_gen(qry_removed, BAM_CSOFT_CLIP);
+        }
+    }
+
+    if (i < rec->core.n_cigar
+        && bam_cigar_oplen(orig_cigar[i]) > ref_remove) {
+        new_cigar[j++] = bam_cigar_gen(bam_cigar_oplen(orig_cigar[i]) - ref_remove, bam_cigar_op(orig_cigar[i]));
+
+        // fill in the rest of the cigar
+        i++;
+
+        for (; i < rec->core.n_cigar; i++) {
+            new_cigar[j++] = orig_cigar[i];
+        }
+    }
+
+    rec_out->core.n_cigar = j;
+
+    if (clipping == soft_clip) {
+        qry_removed = 0; // Copy all the sequence and confidence values
+        odd_base = 1; // account for an odd number of bases
+    }
+
+    new_qual = bam_get_seq(rec_out) + (rec->core.l_qseq - qry_removed + 1) / 2;
+    // Copy remaining SEQ
+    if ((qry_removed & 1) == 0) {
+        memcpy(bam_get_seq(rec_out), orig_seq + (qry_removed / 2),
+                (rec->core.l_qseq - qry_removed + odd_base) / 2);
+    } else {
+        uint8_t *in = orig_seq + qry_removed / 2;
+        uint8_t *out = bam_get_seq(rec_out);
+        uint32_t i;
+        for (i = qry_removed; i < rec->core.l_qseq - 1; i += 2) {
+            *out++ = ((in[0] & 0x0f) << 4) | ((in[1] & 0xf0) >> 4);
+            in++;
+        }
+        if (i < rec->core.l_qseq) {
+            *out++ = (in[0] & 0x0f) << 4;
+        }
+        assert(out == new_qual);
+    }
+
+    // Copy remaining QUAL
+    memmove(new_qual, orig_qual, rec->core.l_qseq - qry_removed);
+
+    // Set new l_qseq
+    rec_out->core.l_qseq -= qry_removed;
+
+    // Move AUX
+    if (orig_l_aux)
+        memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+    // Set new l_data
+    rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+    // put in new pos
+    rec_out->core.pos = new_pos;
+
+    return 0;
+}
+
+
+static int bam_trim_right(bam1_t *rec, bam1_t *rec_out, uint32_t bases,
+                          clipping_type clipping) {
+    uint32_t *orig_cigar = bam_get_cigar(rec);
+    uint8_t *orig_seq = bam_get_seq(rec);
+    uint8_t *orig_qual = bam_get_qual(rec);
+    uint8_t *orig_aux = bam_get_aux(rec);
+    uint32_t *new_cigar;
+    uint32_t new_n_cigar = 0;
+    uint8_t *new_qual;
+    size_t orig_l_aux = bam_get_l_aux(rec);
+    int32_t i;
+    int32_t j;
+    uint32_t ref_remove = bases, qry_removed = 0, hardclip = 0;
+    uint32_t cig_type, cig_op;
+
+    if (rec->l_data + 8 > rec_out->m_data) {
+        uint8_t *new_data = realloc(rec_out->data, rec->l_data + 8);
+        if (!new_data) {
+            fprintf(samtools_stderr, "[ampliconclip] error: could not allocate memoy for new bam record\n");
+            return 1;
+        }
+        rec_out->data = new_data;
+        rec_out->m_data = rec->l_data + 8;
+    }
+
+    // Copy core data & name
+    memcpy(&rec_out->core, &rec->core, sizeof(rec->core));
+    memcpy(rec_out->data, rec->data, rec->core.l_qname);
+
+    if (clipping == hard_clip && bases >= rec->core.l_qseq) {
+        rec_out->core.l_qseq = 0;
+        rec_out->core.n_cigar = 0;
+
+        if (orig_l_aux)
+            memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+        rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+        return 0;
+    }
+
+    // Modify CIGAR here
+    new_cigar = bam_get_cigar(rec_out);
+
+    for (i = rec->core.n_cigar - 1;  i >= 0; --i) {
+        cig_op = bam_cigar_op(orig_cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if (cig_op == BAM_CHARD_CLIP) {
+            hardclip += bam_cigar_oplen(orig_cigar[i]);
+        } else {
+            if (cig_type & 2) {
+                if (bam_cigar_oplen(orig_cigar[i]) <= ref_remove) {
+                    ref_remove -= bam_cigar_oplen(orig_cigar[i]);
+                } else {
+                    break;
+                }
+            }
+            if (cig_type & 1) {
+                qry_removed += bam_cigar_oplen(orig_cigar[i]);
+            }
+        }
+    }
+
+    if (i >= 0) {
+        cig_type = bam_cigar_type(bam_cigar_op(orig_cigar[i]));
+        if (cig_type & 1) {
+            qry_removed += ref_remove;
+        }
+        j = i;
+        if (qry_removed > 0) j++;
+        if (hardclip > 0 && (clipping == soft_clip || qry_removed == 0)) j++;
+    } else {
+        qry_removed = rec->core.l_qseq;
+        j = 0;
+        if (hardclip > 0 && clipping == soft_clip) j++;
+    }
+
+    if (clipping == hard_clip && hardclip + qry_removed > 0) {
+        new_cigar[j] = bam_cigar_gen(hardclip + qry_removed, BAM_CHARD_CLIP);
+        new_n_cigar++;
+    }
+    if (clipping == soft_clip) {
+        if (hardclip > 0) {
+            new_cigar[j] = bam_cigar_gen(hardclip, BAM_CHARD_CLIP);
+            new_n_cigar++;
+            if (qry_removed > 0) --j;
+        }
+        if (qry_removed > 0) {
+            new_cigar[j] = bam_cigar_gen(qry_removed, BAM_CSOFT_CLIP);
+            new_n_cigar++;
+        }
+    }
+
+    if (j > 0) {
+        new_cigar[--j] = bam_cigar_gen(bam_cigar_oplen(orig_cigar[i]) - ref_remove, bam_cigar_op(orig_cigar[i]));
+        new_n_cigar++;
+    }
+
+    // fill in the rest of the cigar
+    while (j > 0) {
+        new_cigar[--j] = orig_cigar[--i];
+        new_n_cigar++;
+    }
+
+    rec_out->core.n_cigar = new_n_cigar;
+
+    if (clipping == soft_clip)
+        qry_removed = 0; // Copy all the sequence and confidence values
+
+    new_qual = bam_get_seq(rec_out) + (rec->core.l_qseq - qry_removed + 1) / 2;
+    // Copy remaining SEQ
+    memcpy(bam_get_seq(rec_out), orig_seq, (rec->core.l_qseq - qry_removed + 1) / 2);
+
+    // Copy remaining QUAL
+    memcpy(new_qual, orig_qual, rec->core.l_qseq - qry_removed);
+
+    // Set new l_qseq
+    rec_out->core.l_qseq -= qry_removed;
+
+    // Copy AUX
+    if (orig_l_aux)
+        memcpy(bam_get_aux(rec_out), orig_aux, orig_l_aux);
+
+    // Set new l_data
+    rec_out->l_data = bam_get_aux(rec_out) - rec_out->data + orig_l_aux;
+
+    return 0;
+}
+
+
+static hts_pos_t active_query_len(bam1_t *b) {
+    uint32_t *cigar = bam_get_cigar(b);
+    uint32_t cig_type, cig_op;
+    hts_pos_t len = 0;
+    int i;
+
+    for (i = 0; i < b->core.n_cigar; i++) {
+        cig_op =  bam_cigar_op(cigar[i]);
+        cig_type = bam_cigar_type(cig_op);
+
+        if ((cig_type & 1) && (cig_op != BAM_CSOFT_CLIP)) {
+            len += bam_cigar_oplen(cigar[i]);
+        }
+    }
+
+    return len;
+}
+
+
+static inline void swap_bams(bam1_t **a, bam1_t **b) {
+    bam1_t *tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+
+// Format OA:Z:(RNAME,POS,strand,CIGAR,MAPQ,NM;
+static inline int tag_original_data(bam1_t *orig, kstring_t *oa_tag) {
+    char strand;
+    uint8_t *nm_tag, *old_oa_tag;
+    uint32_t *cigar;
+    int64_t nm = 0;
+    int i, res = 0;
+
+    ks_clear(oa_tag);
+
+    // if there is an existing OA tag the new one gets appended to it
+    if ((old_oa_tag = bam_aux_get(orig, "OA"))) {
+        res |= ksprintf(oa_tag, "%s", bam_aux2Z(old_oa_tag)) < 0;
+    }
+
+    if (orig->core.flag & BAM_FREVERSE)
+        strand = '-';
+    else
+        strand = '+';
+
+    if ((nm_tag = bam_aux_get(orig, "NM"))) {
+        nm = bam_aux2i(nm_tag);
+    }
+
+    res |= ksprintf(oa_tag, "%s,%"PRIhts_pos",%c,", bam_get_qname(orig), orig->core.pos + 1, strand) < 0;
+
+    for (i = 0, cigar = bam_get_cigar(orig); i < orig->core.n_cigar && res == 0; ++i) {
+        res |= kputw(bam_cigar_oplen(cigar[i]), oa_tag) < 0;
+        res |= kputc(bam_cigar_opchr(cigar[i]), oa_tag) < 0;
+    }
+
+    if (nm_tag) {
+        res |= ksprintf(oa_tag, ",%d,%"PRId64";", orig->core.qual, nm) < 0;
+    } else {
+        res |= ksprintf(oa_tag, "%d,;", orig->core.qual) < 0;
+    }
+
+    return res;
+}
+
+
+static int bam_clip(samFile *in, samFile *out, samFile *reject, char *bedfile,
+                    clipping_type clipping, cl_param_t *param) {
+    int ret = 1, r, file_open = 0;
+
+    bam_hdr_t *header = NULL;
+    bam1_t *b = NULL, *b_tmp = NULL;
+    long f_count = 0, r_count = 0, n_count = 0, l_count = 0, l_exclude = 0, b_count = 0;
+    long filtered = 0, written = 0, failed = 0;
+    kstring_t str = KS_INITIALIZE;
+    kstring_t oat = KS_INITIALIZE;
+    bed_entry_list_t *sites;
+    FILE *stats_fp = samtools_stderr;
+    khash_t(bed_list_hash) *bed_hash = kh_init(bed_list_hash);
+
+    if (load_bed_file_multi_ref(bedfile, param->use_strand, 1, bed_hash)) {
+        fprintf(samtools_stderr, "[ampliconclip] error: unable to load bed file.\n");
+        goto fail;
+    }
+
+    if ((header = sam_hdr_read(in)) == NULL) {
+        fprintf(samtools_stderr, "[ampliconclip] error: could not read header\n");
+        goto fail;
+    }
+
+    // changing pos can ruin coordinate sort order
+    if (sam_hdr_find_tag_hd(header, "SO", &str) == 0 && str.s && strcmp(str.s, "coordinate") == 0) {
+        const char *new_order = "unknown";
+
+        if (sam_hdr_update_hd(header, "SO", new_order) == -1) {
+            fprintf(samtools_stderr, "[ampliconclip] error: unable to change sort order to 'SO:%s'\n", new_order);
+            goto fail;
+        }
+    }
+
+    ks_free(&str);
+
+    if (param->add_pg && sam_hdr_add_pg(header, "samtools", "VN", samtools_version(),
+                        param->arg_list ? "CL" : NULL,
+                        param->arg_list ? param->arg_list : NULL,
+                        NULL) != 0) {
+        fprintf(samtools_stderr, "[ampliconclip] warning: unable to add @PG line to header.\n");
+    }
+    if (sam_hdr_write(out, header) < 0) {
+        fprintf(samtools_stderr, "[ampliconclip] error: could not write header.\n");
+        goto fail;
+    }
+
+    if (reject) {
+       if (sam_hdr_write(reject, header) < 0) {
+           fprintf(samtools_stderr, "[ampliconclip] error: could not write header to rejects file.\n");
+           goto fail;
+       }
+    }
+
+    b = bam_init1();
+    b_tmp = bam_init1();
+    if (!b || !b_tmp) {
+        fprintf(samtools_stderr, "[ampliconclip] error: out of memory when trying to create record.\n");
+        goto fail;
+    }
+
+    int32_t last_tid = -1;
+    int ref_found = 0;
+
+    while ((r = sam_read1(in, header, b)) >= 0) {
+        hts_pos_t pos;
+        int is_rev;
+        int p_size;
+        int been_clipped  = 0, filter = 0;
+        int exclude = (BAM_FUNMAP | BAM_FQCFAIL);
+        khiter_t itr;
+
+        l_count++;
+
+        if (b->core.tid != last_tid) {
+            const char *ref_name;
+
+            ref_found = 0;
+            last_tid = b->core.tid;
+
+            if ((ref_name = sam_hdr_tid2name(header, b->core.tid)) != NULL) {
+                itr = kh_get(bed_list_hash, bed_hash, ref_name);
+
+                if (itr != kh_end(bed_hash)) {
+                    sites = &kh_val(bed_hash, itr);
+                    ref_found = 1;
+                }
+            }
+        }
+
+        if (!(b->core.flag & exclude) && ref_found) {
+            if (param->oa_tag)
+                if (tag_original_data(b, &oat))
+                    goto fail;
+
+            if (!param->both) {
+                if (bam_is_rev(b)) {
+                    pos = bam_endpos(b);
+                    is_rev = 1;
+                } else {
+                    pos = b->core.pos;
+                    is_rev = 0;
+                }
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (is_rev) {
+                        if (bam_trim_right(b, b_tmp, p_size, clipping) != 0)
+                            goto fail;
+
+                        swap_bams(&b, &b_tmp);
+                        r_count++;
+                    } else {
+                        if (bam_trim_left(b, b_tmp, p_size, clipping) != 0)
+                            goto fail;
+
+                        swap_bams(&b, &b_tmp);
+                        f_count++;
+                    }
+
+                    if (param->oa_tag) {
+                        if (bam_aux_update_str(b, "OA", oat.l + 1, (const char *)oat.s))
+                            goto fail;
+                    }
+
+                    if (param->del_tag) {
+                        uint8_t *tag;
+
+                        if ((tag = bam_aux_get(b, "NM")))
+                            bam_aux_del(b, tag);
+
+                        if ((tag = bam_aux_get(b, "MD")))
+                            bam_aux_del(b, tag);
+                    }
+
+                    been_clipped = 1;
+                } else {
+                    if (param->mark_fail) {
+                        b->core.flag |= BAM_FQCFAIL;
+                    }
+
+                    n_count++;
+                }
+            } else {
+                int left = 0, right = 0;
+
+                // left first
+                pos = b->core.pos;
+                is_rev = 0;
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (bam_trim_left(b, b_tmp, p_size, clipping) != 0)
+                        goto fail;
+
+                    swap_bams(&b, &b_tmp);
+                    f_count++;
+                    left = 1;
+                    been_clipped = 1;
+                }
+
+                // the right
+                pos = bam_endpos(b);
+                is_rev = 1;
+
+                if ((p_size = matching_clip_site(sites, pos, is_rev, param->use_strand, sites->longest, param))) {
+                    if (bam_trim_right(b, b_tmp, p_size, clipping) != 0)
+                        goto fail;
+
+                    swap_bams(&b, &b_tmp);
+                    r_count++;
+                    right = 1;
+                    been_clipped = 1;
+                }
+
+                if (left || right) {
+                    uint8_t *tag;
+
+                    if (param->oa_tag) {
+                        if (bam_aux_update_str(b, "OA", oat.l + 1, (const char *)oat.s))
+                            goto fail;
+                    }
+
+                    if (param->del_tag) {
+                        if ((tag = bam_aux_get(b, "NM")))
+                            bam_aux_del(b, tag);
+
+                        if ((tag = bam_aux_get(b, "MD")))
+                            bam_aux_del(b, tag);
+                    }
+                }
+
+                if (left && right) {
+                    b_count++;
+                } else if (!left && !right) {
+                    if (param->mark_fail) {
+                        b->core.flag |= BAM_FQCFAIL;
+                    }
+
+                    n_count++;
+                }
+            }
+
+            if (param->fail_len >= 0 || param->filter_len >= 0) {
+               hts_pos_t aql = active_query_len(b);
+
+               if (param->fail_len >= 0 && aql <= param->fail_len) {
+                   b->core.flag |= BAM_FQCFAIL;
+               }
+
+               if (param->filter_len >= 0 && aql <= param->filter_len) {
+                   filter = 1;
+               }
+           }
+
+           if (b->core.flag & BAM_FQCFAIL) {
+               failed++;
+           }
+
+           if (param->write_clipped && !been_clipped) {
+               filter = 1;
+           }
+
+        } else {
+            l_exclude++;
+
+            if (param->unmapped) {
+                filter = 1;
+            }
+        }
+
+        if (!filter) {
+            if (sam_write1(out, header, b) < 0) {
+                fprintf(samtools_stderr, "[ampliconclip] error: could not write line %ld.\n", l_count);
+                goto fail;
+            }
+
+            written++;
+        } else {
+            if (reject) {
+                if (sam_write1(reject, header, b) < 0) {
+                    fprintf(samtools_stderr, "[ampliconclip] error: could not write to reject file %s\n",
+                            param->rejects_file);
+                    goto fail;
+                }
+            }
+
+            filtered++;
+        }
+    }
+
+    if (r < -1) {
+        fprintf(samtools_stderr, "[ampliconclip] error: failed to read input.\n");
+        goto fail;
+    }
+
+    if (param->stats_file) {
+        if ((stats_fp = fopen(param->stats_file, "w")) == NULL) {
+            fprintf(samtools_stderr, "[ampliconclip] warning: cannot write stats to %s.\n", param->stats_file);
+        } else {
+            file_open = 1;
+        }
+    }
+
+    fprintf(stats_fp, "COMMAND: %s\n"
+                    "TOTAL READS: %ld\n"
+                    "TOTAL CLIPPED: %ld\n"
+                    "FORWARD CLIPPED: %ld\n"
+                    "REVERSE CLIPPED: %ld\n"
+                    "BOTH CLIPPED: %ld\n"
+                    "NOT CLIPPED: %ld\n"
+                    "EXCLUDED: %ld\n"
+                    "FILTERED: %ld\n"
+                    "FAILED: %ld\n"
+                    "WRITTEN: %ld\n", param->arg_list, l_count, f_count + r_count,
+                                    f_count, r_count, b_count, n_count, l_exclude,
+                                    filtered, failed, written);
+
+    if (file_open) {
+        fclose(stats_fp);
+    }
+
+    ret = 0;
+
+fail:
+    destroy_bed_hash(bed_hash);
+    ks_free(&oat);
+    sam_hdr_destroy(header);
+    bam_destroy1(b);
+    bam_destroy1(b_tmp);
+    return ret;
+}
+
+
+static void usage(void) {
+    fprintf(samtools_stderr, "Usage: samtools ampliconclip -b BED file <input.bam> -o <output.bam>\n\n");
+    fprintf(samtools_stderr, "Option: \n");
+    fprintf(samtools_stderr, " -b  FILE            BED file of regions (eg amplicon primers) to be removed.\n");
+    fprintf(samtools_stderr, " -o  FILE            output file name (default samtools_stdout).\n");
+    fprintf(samtools_stderr, " -f  FILE            write stats to file name (default samtools_stderr)\n");
+    fprintf(samtools_stderr, " -u                  Output uncompressed data\n");
+    fprintf(samtools_stderr, " --soft-clip         soft clip amplicon primers from reads (default)\n");
+    fprintf(samtools_stderr, " --hard-clip         hard clip amplicon primers from reads.\n");
+    fprintf(samtools_stderr, " --both-ends         clip on both 5' and 3' ends.\n");
+    fprintf(samtools_stderr, " --strand            use strand data from BED file to match read direction.\n");
+    fprintf(samtools_stderr, " --clipped           only output clipped reads.\n");
+    fprintf(samtools_stderr, " --fail              mark unclipped, mapped reads as QCFAIL.\n");
+    fprintf(samtools_stderr, " --filter-len INT    do not output reads INT size or shorter.\n");
+    fprintf(samtools_stderr, " --fail-len   INT    mark as QCFAIL reads INT size or shorter.\n");
+    fprintf(samtools_stderr, " --no-excluded       do not write excluded reads (unmapped or QCFAIL).\n");
+    fprintf(samtools_stderr, " --rejects-file FILE file to write filtered reads.\n");
+    fprintf(samtools_stderr, " --original          for clipped entries add an OA tag with original data.\n");
+    fprintf(samtools_stderr, " --keep-tag          for clipped entries keep the old NM and MD tags.\n");
+    fprintf(samtools_stderr, " --tolerance         match region within this number of bases, default 5.\n");
+    fprintf(samtools_stderr, " --no-PG             do not add an @PG line.\n");
+    sam_global_opt_help(samtools_stderr, "-.O..@-.");
+    fprintf(samtools_stderr, "\nAbout: Soft clips read alignments where they match BED file defined regions.\n"
+                    "Default clipping is only on the 5' end.\n\n");
+}
+
+
+int amplicon_clip_main(int argc, char **argv) {
+    int c, ret;
+    char wmode[4] = {'w', 'b', 0, 0};
+    char *bedfile = NULL, *fnout = "-";
+    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    htsThreadPool p = {NULL, 0};
+    samFile *in = NULL, *out = NULL, *reject = NULL;
+    clipping_type clipping = soft_clip;
+    cl_param_t param = {1, 0, 0, 0, 0, -1, -1, 0, 0, 1, 5, NULL, NULL, NULL};
+
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
+        {"no-PG", no_argument, NULL, 1002},
+        {"soft-clip", no_argument, NULL, 1003},
+        {"hard-clip", no_argument, NULL, 1004},
+        {"strand", no_argument, NULL, 1005},
+        {"clipped", no_argument, NULL, 1006},
+        {"fail", no_argument, NULL, 1007},
+        {"both-ends", no_argument, NULL, 1008},
+        {"filter-len", required_argument, NULL, 1009},
+        {"fail-len", required_argument, NULL, 1010},
+        {"no-excluded", no_argument, NULL, 1011},
+        {"rejects-file", required_argument, NULL, 1012},
+        {"original", no_argument, NULL, 1013},
+        {"keep-tag", no_argument, NULL, 1014},
+        {"tolerance", required_argument, NULL, 1015},
+        {NULL, 0, NULL, 0}
+    };
+
+    while ((c = getopt_long(argc, argv, "b:@:o:O:f:u", lopts, NULL)) >= 0) {
+        switch (c) {
+            case 'b': bedfile = optarg; break;
+            case 'o': fnout = optarg; break;
+            case 'f': param.stats_file = optarg; break;
+            case 'u': wmode[2] = '0'; break;
+            case 1002: param.add_pg = 0; break;
+            case 1003: clipping = soft_clip; break;
+            case 1004: clipping = hard_clip; break;
+            case 1005: param.use_strand = 1; break;
+            case 1006: param.write_clipped = 1; break;
+            case 1007: param.mark_fail = 1; break;
+            case 1008: param.both = 1; break;
+            case 1009: param.filter_len = atoi(optarg); break;
+            case 1010: param.fail_len = atoi(optarg); break;
+            case 1011: param.unmapped = 1; break;
+            case 1012: param.rejects_file = optarg; break;
+            case 1013: param.oa_tag = 1; break;
+            case 1014: param.del_tag = 0; break;
+            case 1015: param.tol = atoi(optarg); break;
+            default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+                      /* else fall-through */
+            case '?': usage(); samtools_exit(1);
+        }
+    }
+
+    if (!bedfile) {
+        usage();
+        return 1;
+    }
+
+    if (optind + 1 > argc) {
+        usage();
+        return 1;
+    }
+
+    if (param.tol < 0) {
+        fprintf(samtools_stderr, "[ampliconclip] warning: invalid tolerance of %d,"
+                        " reseting tolerance to default of 5.\n", param.tol);
+        param.tol = 5;
+    }
+
+    if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
+        print_error_errno("ampliconclip", "cannot open input file");
+        return 1;
+    }
+
+    sam_open_mode(wmode+1, fnout, NULL);
+
+    if ((out = sam_open_format(fnout, wmode, &ga.out)) == NULL) {
+        print_error_errno("ampliconclip", "cannot open output file");
+        return 1;
+    }
+
+    if (param.rejects_file) {
+        sam_open_mode(wmode+1, param.rejects_file, NULL);
+
+        if ((reject = sam_open_format(param.rejects_file, wmode, &ga.out)) == NULL) {
+            print_error_errno("ampliconclip", "cannot open rejects file");
+            return 1;
+        }
+    }
+
+    if (ga.nthreads > 0) {
+        if (!(p.pool = hts_tpool_init(ga.nthreads))) {
+            fprintf(samtools_stderr, "[ampliconclip] error: cannot create thread pool.\n");
+            return 1;
+        }
+        hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
+        hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
+
+        if (reject) {
+           hts_set_opt(reject,  HTS_OPT_THREAD_POOL, &p);
+        }
+    }
+
+    param.arg_list = stringify_argv(argc + 1, argv - 1);
+
+    ret = bam_clip(in, out, reject, bedfile, clipping, &param);
+
+    // cleanup
+    sam_close(in);
+
+    if (sam_close(out) < 0) {
+        fprintf(samtools_stderr, "[ampliconclip] error: error while closing output file %s.\n", argv[optind+1]);
+        ret = 1;
+    }
+
+    if (reject) {
+        if (sam_close(reject) < 0) {
+            fprintf(samtools_stderr, "[ampliconclip] error: error while closing reject file %s.\n", param.rejects_file);
+            ret = 1;
+        }
+    }
+
+    if (p.pool) hts_tpool_destroy(p.pool);
+
+    sam_global_args_free(&ga);
+    free(param.arg_list);
+
+    return ret;
+}
+
diff --git a/samtools/bam_ampliconclip.h b/samtools/bam_ampliconclip.h
new file mode 100644
index 0000000..ef35357
--- /dev/null
+++ b/samtools/bam_ampliconclip.h
@@ -0,0 +1,54 @@
+/*  bam_ampliconclip.h -- shared functions between amplicon clip/stats
+
+    Copyright (C) 2020-2021 Genome Research Ltd.
+
+    Author: James Bonfield <jkb@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#ifndef BAM_AMPLICONCLIP_H
+#define BAM_AMPLICONCLIP_H
+
+#include "htslib/khash.h"
+
+typedef struct {
+    int64_t left;
+    int64_t right;
+    int rev;
+} bed_entry_t;
+
+typedef struct {
+    bed_entry_t *bp;
+    int64_t longest;
+    int length;
+    int size;
+} bed_entry_list_t;
+
+KHASH_MAP_INIT_STR(bed_list_hash, bed_entry_list_t);
+
+#define BED_LIST_INIT {NULL, 0, 0, 0, {0}}
+
+
+int load_bed_file_multi_ref(char *infile, int get_strand,
+                        int sort_by_pos, khash_t(bed_list_hash) *bed_lists);
+
+void destroy_bed_hash(khash_t(bed_list_hash) *hash);
+
+
+#endif /* BAM_AMPLICONCLIP_H */
diff --git a/samtools/bam_aux.c b/samtools/bam_aux.c
index 4e222a0..77d94f8 100644
--- a/samtools/bam_aux.c
+++ b/samtools/bam_aux.c
@@ -50,13 +50,13 @@ int bam_aux_drop_other(bam1_t *b, uint8_t *s)
 {
     if (s) {
         uint8_t *p, *aux;
-        aux = bam1_aux(b);
+        aux = bam_get_aux(b);
         p = s - 2;
         __skip_tag(s);
         memmove(aux, p, s - p);
-        b->data_len -= bam_get_l_aux(b) - (s - p);
+        b->l_data -= bam_get_l_aux(b) - (s - p);
     } else {
-        b->data_len -= bam_get_l_aux(b);
+        b->l_data -= bam_get_l_aux(b);
     }
     return 0;
 }
diff --git a/samtools/bam_aux.c.pysam.c b/samtools/bam_aux.c.pysam.c
index 0763976..39fe5ce 100644
--- a/samtools/bam_aux.c.pysam.c
+++ b/samtools/bam_aux.c.pysam.c
@@ -52,13 +52,13 @@ int bam_aux_drop_other(bam1_t *b, uint8_t *s)
 {
     if (s) {
         uint8_t *p, *aux;
-        aux = bam1_aux(b);
+        aux = bam_get_aux(b);
         p = s - 2;
         __skip_tag(s);
         memmove(aux, p, s - p);
-        b->data_len -= bam_get_l_aux(b) - (s - p);
+        b->l_data -= bam_get_l_aux(b) - (s - p);
     } else {
-        b->data_len -= bam_get_l_aux(b);
+        b->l_data -= bam_get_l_aux(b);
     }
     return 0;
 }
diff --git a/samtools/bam_cat.c b/samtools/bam_cat.c
index f3c812a..ed8cf58 100644
--- a/samtools/bam_cat.c
+++ b/samtools/bam_cat.c
@@ -1,6 +1,6 @@
 /*  bam_cat.c -- efficiently concatenates bam files.
 
-    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019, 2021 Genome Research Ltd.
     Modified SAMtools work copyright (C) 2010 Illumina, Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -270,22 +270,13 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
 
         // Copy contains and blocks within them
         while ((c = cram_read_container(in_c))) {
-            cram_block *blk;
-
-           if (cram_container_is_empty(in_c)) {
-                if (cram_write_container(out_c, c) != 0)
-                    return -1;
-
+            if (cram_container_is_empty(in_c)) {
+                cram_block *blk;
                 // Container compression header
                 if (!(blk = cram_read_block(in_c)))
                     return -1;
-                if (cram_write_block(out_c, blk) != 0) {
-                    cram_free_block(blk);
-                    return -1;
-                }
                 cram_free_block(blk);
                 cram_free_container(c);
-
                 continue;
             }
 
@@ -297,6 +288,7 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
                 cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
             } else {
                 int32_t num_slices;
+                cram_block *blk;
 
                 // Not switching rg so do the usual read/write loop
                 if (cram_write_container(out_c, c) != 0)
@@ -467,7 +459,7 @@ int main_cat(int argc, char *argv[])
     char *outfn = 0;
     char **infns = NULL; // files to concatenate
     int infns_size = 0;
-    int c, ret = 0, no_pg = 0;
+    int c, ret = 0, no_pg = 0, usage = 0;
     samFile *in;
     sam_global_args ga;
 
@@ -481,7 +473,7 @@ int main_cat(int argc, char *argv[])
 
     sam_global_args_init(&ga);
 
-    while ((c = getopt_long(argc, argv, "h:o:b:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h:o:b:@:", lopts, NULL)) >= 0) {
         switch (c) {
             case 'h': {
                 samFile *fph = sam_open(optarg, "r");
@@ -522,6 +514,8 @@ int main_cat(int argc, char *argv[])
                 break;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+                /* else fall-through */
+            case '?': usage=1; break;
         }
     }
 
@@ -539,7 +533,7 @@ int main_cat(int argc, char *argv[])
     }
 
     // Require at least one input file
-    if (infns_size + nargv_fns == 0) {
+    if (infns_size + nargv_fns == 0 || usage) {
         fprintf(stderr, "Usage: samtools cat [options] <in1.bam>  [... <inN.bam>]\n");
         fprintf(stderr, "       samtools cat [options] <in1.cram> [... <inN.cram>]\n\n");
         fprintf(stderr, "Concatenate BAM or CRAM files, first those in <bamlist.fofn>, then those\non the command line.\n\n");
diff --git a/samtools/bam_cat.c.pysam.c b/samtools/bam_cat.c.pysam.c
index 58a41b7..ef2199c 100644
--- a/samtools/bam_cat.c.pysam.c
+++ b/samtools/bam_cat.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_cat.c -- efficiently concatenates bam files.
 
-    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2009, 2011-2013, 2015-2017, 2019, 2021 Genome Research Ltd.
     Modified SAMtools work copyright (C) 2010 Illumina, Inc.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -272,22 +272,13 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
 
         // Copy contains and blocks within them
         while ((c = cram_read_container(in_c))) {
-            cram_block *blk;
-
-           if (cram_container_is_empty(in_c)) {
-                if (cram_write_container(out_c, c) != 0)
-                    return -1;
-
+            if (cram_container_is_empty(in_c)) {
+                cram_block *blk;
                 // Container compression header
                 if (!(blk = cram_read_block(in_c)))
                     return -1;
-                if (cram_write_block(out_c, blk) != 0) {
-                    cram_free_block(blk);
-                    return -1;
-                }
                 cram_free_block(blk);
                 cram_free_container(c);
-
                 continue;
             }
 
@@ -299,6 +290,7 @@ int cram_cat(int nfn, char * const *fn, const sam_hdr_t *h, const char* outcram,
                 cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
             } else {
                 int32_t num_slices;
+                cram_block *blk;
 
                 // Not switching rg so do the usual read/write loop
                 if (cram_write_container(out_c, c) != 0)
@@ -469,7 +461,7 @@ int main_cat(int argc, char *argv[])
     char *outfn = 0;
     char **infns = NULL; // files to concatenate
     int infns_size = 0;
-    int c, ret = 0, no_pg = 0;
+    int c, ret = 0, no_pg = 0, usage = 0;
     samFile *in;
     sam_global_args ga;
 
@@ -483,7 +475,7 @@ int main_cat(int argc, char *argv[])
 
     sam_global_args_init(&ga);
 
-    while ((c = getopt_long(argc, argv, "h:o:b:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h:o:b:@:", lopts, NULL)) >= 0) {
         switch (c) {
             case 'h': {
                 samFile *fph = sam_open(optarg, "r");
@@ -524,6 +516,8 @@ int main_cat(int argc, char *argv[])
                 break;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
+                /* else fall-through */
+            case '?': usage=1; break;
         }
     }
 
@@ -541,7 +535,7 @@ int main_cat(int argc, char *argv[])
     }
 
     // Require at least one input file
-    if (infns_size + nargv_fns == 0) {
+    if (infns_size + nargv_fns == 0 || usage) {
         fprintf(samtools_stderr, "Usage: samtools cat [options] <in1.bam>  [... <inN.bam>]\n");
         fprintf(samtools_stderr, "       samtools cat [options] <in1.cram> [... <inN.cram>]\n\n");
         fprintf(samtools_stderr, "Concatenate BAM or CRAM files, first those in <bamlist.fofn>, then those\non the command line.\n\n");
diff --git a/samtools/bam_color.c b/samtools/bam_color.c
index bee19b9..6decbc1 100644
--- a/samtools/bam_color.c
+++ b/samtools/bam_color.c
@@ -25,7 +25,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <config.h>
 
 #include <ctype.h>
-#include "bam.h"
+#include <string.h>
+
+#include "htslib/sam.h"
 
 /*!
  @abstract     Get the color encoding the previous and current base
@@ -45,10 +47,10 @@ char bam_aux_getCSi(bam1_t *b, int i)
 
     cs = bam_aux2Z(c);
     // adjust for strandedness and leading adaptor
-    if(bam1_strand(b)) {
+    if(bam_is_rev(b)) {
         i = strlen(cs) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
         i -= cigar >> BAM_CIGAR_SHIFT;
         }
@@ -74,10 +76,10 @@ char bam_aux_getCQi(bam1_t *b, int i)
 
     cq = bam_aux2Z(c);
     // adjust for strandedness
-    if(bam1_strand(b)) {
+    if(bam_is_rev(b)) {
         i = strlen(cq) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
         i -= (cigar >> BAM_CIGAR_SHIFT);
         }
@@ -135,28 +137,28 @@ char bam_aux_getCEi(bam1_t *b, int i)
     cs = bam_aux2Z(c);
 
     // adjust for strandedness and leading adaptor
-    if(bam1_strand(b)) { //reverse strand
+    if(bam_is_rev(b)) { //reverse strand
         cs_i = strlen(cs) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
             cs_i -= cigar >> BAM_CIGAR_SHIFT;
         }
         // get current color
         cur_color = cs[cs_i];
         // get previous base.  Note: must rc adaptor
-        prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)];
+        prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : seq_nt16_str[bam_seqi(bam_get_seq(b), i+1)];
         // get current base
-        cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
+        cur_b = seq_nt16_str[bam_seqi(bam_get_seq(b), i)];
     }
     else {
         cs_i=i+1;
         // get current color
         cur_color = cs[cs_i];
         // get previous base
-        prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)];
+        prev_b = (0 == i) ? cs[0] : seq_nt16_str[bam_seqi(bam_get_seq(b), i-1)];
         // get current base
-        cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
+        cur_b = seq_nt16_str[bam_seqi(bam_get_seq(b), i)];
     }
 
     // corrected color
diff --git a/samtools/bam_color.c.pysam.c b/samtools/bam_color.c.pysam.c
index 762e83b..105cc33 100644
--- a/samtools/bam_color.c.pysam.c
+++ b/samtools/bam_color.c.pysam.c
@@ -27,7 +27,9 @@ DEALINGS IN THE SOFTWARE.  */
 #include <config.h>
 
 #include <ctype.h>
-#include "bam.h"
+#include <string.h>
+
+#include "htslib/sam.h"
 
 /*!
  @abstract     Get the color encoding the previous and current base
@@ -47,10 +49,10 @@ char bam_aux_getCSi(bam1_t *b, int i)
 
     cs = bam_aux2Z(c);
     // adjust for strandedness and leading adaptor
-    if(bam1_strand(b)) {
+    if(bam_is_rev(b)) {
         i = strlen(cs) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
         i -= cigar >> BAM_CIGAR_SHIFT;
         }
@@ -76,10 +78,10 @@ char bam_aux_getCQi(bam1_t *b, int i)
 
     cq = bam_aux2Z(c);
     // adjust for strandedness
-    if(bam1_strand(b)) {
+    if(bam_is_rev(b)) {
         i = strlen(cq) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
         i -= (cigar >> BAM_CIGAR_SHIFT);
         }
@@ -137,28 +139,28 @@ char bam_aux_getCEi(bam1_t *b, int i)
     cs = bam_aux2Z(c);
 
     // adjust for strandedness and leading adaptor
-    if(bam1_strand(b)) { //reverse strand
+    if(bam_is_rev(b)) { //reverse strand
         cs_i = strlen(cs) - 1 - i;
         // adjust for leading hard clip
-        uint32_t cigar = bam1_cigar(b)[0];
+        uint32_t cigar = bam_get_cigar(b)[0];
         if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) {
             cs_i -= cigar >> BAM_CIGAR_SHIFT;
         }
         // get current color
         cur_color = cs[cs_i];
         // get previous base.  Note: must rc adaptor
-        prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)];
+        prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : seq_nt16_str[bam_seqi(bam_get_seq(b), i+1)];
         // get current base
-        cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
+        cur_b = seq_nt16_str[bam_seqi(bam_get_seq(b), i)];
     }
     else {
         cs_i=i+1;
         // get current color
         cur_color = cs[cs_i];
         // get previous base
-        prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)];
+        prev_b = (0 == i) ? cs[0] : seq_nt16_str[bam_seqi(bam_get_seq(b), i-1)];
         // get current base
-        cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
+        cur_b = seq_nt16_str[bam_seqi(bam_get_seq(b), i)];
     }
 
     // corrected color
diff --git a/samtools/bam_fastq.c b/samtools/bam_fastq.c
index 44879c2..a4d757c 100644
--- a/samtools/bam_fastq.c
+++ b/samtools/bam_fastq.c
@@ -1,6 +1,6 @@
 /*  bam_fastq.c -- FASTA and FASTQ file generation
 
-    Copyright (C) 2009-2017, 2019 Genome Research Ltd.
+    Copyright (C) 2009-2017, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -42,16 +42,11 @@ DEALINGS IN THE SOFTWARE.  */
 #include "samtools.h"
 #include "sam_opts.h"
 
-#define taglist_free(p)
-KLIST_INIT(ktaglist, char*, taglist_free)
-
 #define DEFAULT_BARCODE_TAG "BC"
 #define DEFAULT_QUALITY_TAG "QT"
 #define INDEX_SEPARATOR "+"
 
 int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
-static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
-
 static void bam2fq_usage(FILE *to, const char *command)
 {
     int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
@@ -60,64 +55,71 @@ static void bam2fq_usage(FILE *to, const char *command)
     fprintf(to,
 "\n"
 "Description:\n"
-"Converts a SAM, BAM or CRAM into either FASTQ or FASTA format depending on the command invoked.\n"
+"Converts a SAM, BAM or CRAM to %s format.\n"
 "\n"
 "Options:\n"
-"  -0 FILE              write reads designated READ_OTHER to FILE\n"
-"  -1 FILE              write reads designated READ1 to FILE\n"
-"  -2 FILE              write reads designated READ2 to FILE\n"
-"  -o FILE              write reads designated READ1 or READ2 to FILE\n"
-"                       note: if a singleton file is specified with -s, only\n"
-"                       paired reads will be written to the -1 and -2 files.\n"
-"  -f INT               only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
-"  -F INT               only include reads with none of the FLAGS in INT present [0x900]\n"       //   F&x == 0
-"  -G INT               only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
-"  -n                   don't append /1 and /2 to the read name\n"
-"  -N                   always append /1 and /2 to the read name\n");
+"  -0 FILE      write reads designated READ_OTHER to FILE\n"
+"  -1 FILE      write reads designated READ1 to FILE\n"
+"  -2 FILE      write reads designated READ2 to FILE\n"
+"  -o FILE      write reads designated READ1 or READ2 to FILE\n"
+"               note: if a singleton file is specified with -s, only\n"
+"               paired reads will be written to the -1 and -2 files.\n"
+"  -f INT       only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT       only include reads with none of the FLAGS in INT present [0x900]\n"       //   F&x == 0
+"  -G INT       only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -n           don't append /1 and /2 to the read name\n"
+"  -N           always append /1 and /2 to the read name\n",
+    fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -O                   output quality in the OQ tag if present\n");
+"  -O           output quality in the OQ tag if present\n");
     fprintf(to,
-"  -s FILE              write singleton reads designated READ1 or READ2 to FILE\n"
-"  -t                   copy RG, BC and QT tags to the %s header line\n",
+"  -s FILE      write singleton reads designated READ1 or READ2 to FILE\n"
+"  -t           copy RG, BC and QT tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     fprintf(to,
-"  -T TAGLIST           copy arbitrary tags to the %s header line\n",
+"  -T TAGLIST   copy arbitrary tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -v INT               default quality score if not given in file [1]\n"
-"  -i                   add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)\n"
-"  -c                   compression level [0..9] to use when creating gz or bgzf fastq files [1]\n"
-"  --i1 FILE            write first index reads to FILE\n"
-"  --i2 FILE            write second index reads to FILE\n"
-"  --barcode-tag TAG    Barcode tag [default: " DEFAULT_BARCODE_TAG "]\n"
-"  --quality-tag TAG    Quality tag [default: " DEFAULT_QUALITY_TAG "]\n"
-"  --index-format STR   How to parse barcode and quality tags\n\n");
+"  -v INT       default quality score if not given in file [1]\n"
+"  -i           add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)\n"
+"  -c INT       compression level [0..9] to use when writing bgzf files [1]\n"
+"  --i1 FILE    write first index reads to FILE\n"
+"  --i2 FILE    write second index reads to FILE\n"
+"  --barcode-tag TAG\n"
+"               Barcode tag [" DEFAULT_BARCODE_TAG "]\n"
+"  --quality-tag TAG\n"
+"               Quality tag [" DEFAULT_QUALITY_TAG "]\n"
+"  --index-format STR\n"
+"               How to parse barcode and quality tags\n\n");
     sam_global_opt_help(to, "-.--.@-.");
     fprintf(to,
 "\n"
-"The files will be automatically compressed if the file names have a .gz or .bgzf extension.\n"
-"The input to this program must be collated by name. Run 'samtools collate' or 'samtools sort -n'.\n"
+"The files will be automatically compressed if the file names have a .gz\n"
+"or .bgzf extension.  The input to this program must be collated by name.\n"
+"Run 'samtools collate' or 'samtools sort -n' to achieve this.\n"
 "\n"
 "Reads are designated READ1 if FLAG READ1 is set and READ2 is not set.\n"
 "Reads are designated READ2 if FLAG READ1 is not set and READ2 is set.\n"
-"Reads are designated READ_OTHER if FLAGs READ1 and READ2 are either both set\n"
-"or both unset.\n"
+"Otherwise reads are designated READ_OTHER (both flags set or both flags unset).\n"
 "Run 'samtools flags' for more information on flag codes and meanings.\n");
     fprintf(to,
 "\n"
-"The index-format string describes how to parse the barcode and quality tags, for example:\n"
-"   i14i8       the first 14 characters are index 1, the next 8 characters are index 2\n"
-"   n8i14       ignore the first 8 characters, and use the next 14 characters for index 1\n"
-"If the tag contains a separator, then the numeric part can be replaced with '*' to mean\n"
-"'read until the separator or end of tag', for example:\n"
-"   n*i*        ignore the left part of the tag until the separator, then use the second part\n"
-"               of the tag as index 1\n");
+"The index-format string describes how to parse the barcode and quality tags.\n"
+"It is made up of 'i' or 'n' followed by a length or '*'.  For example:\n"
+"   i14i8       The first 14 characters are index 1, the next 8 are index 2\n"
+"   n8i14       Ignore the first 8 characters, and use the next 14 for index 1\n\n"
+"If the tag contains a separator, then the numeric part can be replaced with\n"
+"'*' to mean 'read until the separator or end of tag', for example:\n"
+"   i*i*        Break the tag at the separator into index 1 and index 2\n"
+"   n*i*        Ignore the left part of the tag until the separator,\n"
+"               then use the second part of the tag as index 1\n");
     fprintf(to,
 "\n"
 "Examples:\n"
-" To get just the paired reads in separate files, use:\n"
-"   samtools %s -1 paired1.%s -2 paired2.%s -0 /dev/null -s /dev/null -n in.bam\n"
-"\n To get all non-supplementary/secondary reads in a single file, redirect the output:\n"
+"To get just the paired reads in separate files, use:\n"
+"   samtools %s -1 pair1.%s -2 pair2.%s -0 /dev/null -s /dev/null -n in.bam\n"
+"\nTo get all non-supplementary/secondary reads in a single file, redirect\n"
+"the output:\n"
 "   samtools %s in.bam > all_reads.%s\n",
             command, fq ? "fq" : "fa", fq ? "fq" : "fa",
             command, fq ? "fq" : "fa");
@@ -144,96 +146,20 @@ typedef struct bam2fq_opts {
 
 typedef struct bam2fq_state {
     samFile *fp;
-    BGZF *fpse;
-    BGZF *fpr[3];
-    BGZF *fpi[2];
-    BGZF *hstdout;
+    samFile *fpse;
+    samFile *fpr[3];
+    samFile *fpi[3];
+    samFile *hstdout;
     sam_hdr_t *h;
     bool has12, use_oq, copy_tags, illumina_tag;
     int flag_on, flag_off, flag_alloff;
     fastfile filetype;
     int def_qual;
-    klist_t(ktaglist) *taglist;
     char *index_sequence;
     char compression_level;
     htsThreadPool p;
 } bam2fq_state_t;
 
-/*
- * Get and decode the read from a BAM record.
- *
- * TODO: htslib really needs an interface for this.  Consider this or perhaps
- * bam_get_seq_str (current vs original orientation) and bam_get_qual_str
- * functions as string formatted equivalents to bam_get_{seq,qual}?
- */
-
-/*
- * Reverse a string in place.
- * From http://stackoverflow.com/questions/8534274/is-the-strrev-function-not-available-in-linux.
- * Author Sumit-naik: http://stackoverflow.com/users/4590926/sumit-naik
- */
-static char *reverse(char *str)
-{
-    int i = strlen(str)-1,j=0;
-    char ch;
-    while (i>j) {
-        ch = str[i];
-        str[i]= str[j];
-        str[j] = ch;
-        i--;
-        j++;
-    }
-    return str;
-}
-
-/* return the read, reverse complemented if necessary */
-static char *get_read(const bam1_t *rec)
-{
-    int len = rec->core.l_qseq + 1;
-    char *read = calloc(1, len);
-    char *seq = (char *)bam_get_seq(rec);
-    int n;
-
-    if (!read) return NULL;
-
-    for (n=0; n < rec->core.l_qseq; n++) {
-        if (rec->core.flag & BAM_FREVERSE) read[n] = seq_nt16_str[seq_comp_table[bam_seqi(seq,n)]];
-        else                               read[n] = seq_nt16_str[bam_seqi(seq,n)];
-    }
-    if (rec->core.flag & BAM_FREVERSE) reverse(read);
-    return read;
-}
-
-/*
- * get and decode the quality from a BAM record
- */
-static int get_quality(const bam1_t *rec, char **qual_out)
-{
-    char *quality = calloc(1, rec->core.l_qseq + 1);
-    char *q = (char *)bam_get_qual(rec);
-    int n;
-
-    if (!quality) return -1;
-
-    if (*q == '\xff') {
-        free(quality);
-        *qual_out = NULL;
-        return 0;
-    }
-
-    for (n=0; n < rec->core.l_qseq; n++) {
-        quality[n] = q[n]+33;
-    }
-    if (rec->core.flag & BAM_FREVERSE) reverse(quality);
-    *qual_out = quality;
-    return 0;
-}
-
-//
-// End of htslib complaints
-//
-
-
 static readpart which_readpart(const bam1_t *b)
 {
     if ((b->core.flag & BAM_FREAD1) && !(b->core.flag & BAM_FREAD2)) {
@@ -245,290 +171,8 @@ static readpart which_readpart(const bam1_t *b)
     }
 }
 
-/*
- * parse the length part from the index-format string
- */
-static int getLength(char **s)
-{
-    int n = 0;
-    while (**s) {
-        if (**s == '*') { n=-1; (*s)++; break; }
-        if ( !isdigit(**s)) break;
-        n = n*10 + ((**s)-'0');
-        (*s)++;
-    }
-    return n;
-}
-
-static bool copy_tag(const char *tag, const bam1_t *rec, kstring_t *linebuf)
-{
-    uint8_t *s = bam_aux_get(rec, tag);
-    if (s) {
-        char aux_type = *s;
-        switch (aux_type) {
-            case 'C':
-            case 'S': aux_type = 'I'; break;
-            case 'c':
-            case 's': aux_type = 'i'; break;
-            case 'd': aux_type = 'f'; break;
-        }
-
-        // Ensure space.  Need 6 chars + length of tag.  Max length of
-        // i is 16, A is 21, B currently 26, Z is unknown, so
-        // have to check that one later.
-        if (ks_resize(linebuf, ks_len(linebuf) + 64) < 0) return false;
-
-        kputc('\t', linebuf);
-        kputsn(tag, 2, linebuf);
-        kputc(':', linebuf);
-        kputc(aux_type=='I'? 'i': aux_type, linebuf);
-        kputc(':', linebuf);
-        switch (aux_type) {
-            case 'H':
-            case 'Z':
-                if (kputs(bam_aux2Z(s), linebuf) < 0) return false;
-                break;
-            case 'i': kputw(bam_aux2i(s), linebuf); break;
-            case 'I': kputuw(bam_aux2i(s), linebuf); break;
-            case 'A': kputc(bam_aux2A(s), linebuf); break;
-            case 'f': kputd(bam_aux2f(s), linebuf); break;
-            case 'B': kputs("*** Unhandled aux type ***", linebuf); return false;
-            default:  kputs("*** Unknown aux type ***", linebuf); return false;
-       }
-    }
-    return true;
-}
-
-static int insert_index_sequence_into_linebuf(char *index_sequence, kstring_t *linebuf, bam1_t *rec)
-{
-    if (!index_sequence) return 0;
-
-    kstring_t new = {0,0,NULL};
-    if (linebuf->s) {
-        char *s = strchr(linebuf->s, '\n');
-        if (s) {
-            if (ks_resize(&new, linebuf->l + strlen(index_sequence) + 16) < 0)
-                return -1;
-            *s = 0;
-            kputs(linebuf->s, &new);
-            kputc(' ', &new);
-            readpart readpart = which_readpart(rec);
-            if (readpart == READ_1) kputc('1', &new);
-            else if (readpart == READ_2) kputc('2', &new);
-            else kputc('0', &new);
-
-            kputc(':', &new);
-            if (rec->core.flag & BAM_FQCFAIL) kputc('Y', &new);
-            else                              kputc('N', &new);
-
-            kputs(":0:", &new);
-            kputs(index_sequence, &new);
-            kputc('\n', &new);
-            kputs(s+1, &new);
-            free(ks_release(linebuf));
-            linebuf->s = new.s; linebuf->l = new.l; linebuf->m = new.m;
-        }
-    }
-    return 0;
-}
-
-static bool make_fq_line(const bam1_t *rec, char *seq, char *qual, kstring_t *linebuf, const bam2fq_state_t *state)
-{
-    int i;
-
-    linebuf->l = 0;
-    // Write read name
-    if (kputc(state->filetype == FASTA? '>' : '@', linebuf) < 0) return false;
-    if (kputs(bam_get_qname(rec), linebuf) < 0) return false;
-    // Add the /1 /2 if requested
-    if (state->has12) {
-        readpart readpart = which_readpart(rec);
-        if (readpart == READ_1) {
-            if (kputs("/1", linebuf) < 0) return false;
-        } else if (readpart == READ_2) {
-            if (kputs("/2", linebuf) < 0) return false;
-        }
-    }
-    if (state->copy_tags) {
-        for (i = 0; copied_tags[i]; ++i) {
-            if (!copy_tag(copied_tags[i], rec, linebuf)) {
-                fprintf(stderr, "Problem copying aux tags: [%s]\n", linebuf->s);
-                return false;
-            }
-        }
-    }
-
-    if (state->taglist->size) {
-        kliter_t(ktaglist) *p;
-        for (p = kl_begin(state->taglist); p != kl_end(state->taglist); p = kl_next(p)) {
-            if (!copy_tag(kl_val(p), rec, linebuf)) {
-                fprintf(stderr, "Problem copying aux tags: [%s]\n", linebuf->s);
-                return false;
-            }
-        }
-    }
-
-    if (kputc('\n', linebuf) < 0) return false;
-    if (kputs(seq, linebuf) < 0) return false;
-    if (kputc('\n', linebuf) < 0) return false;
-
-    if (state->filetype == FASTQ) {
-        // Write quality
-        if (kputs("+\n", linebuf) < 0) return false;
-        if (qual && *qual) {
-            if (kputs(qual, linebuf) < 0) return false;
-        } else {
-            int len = strlen(seq);
-            if (ks_resize(linebuf, ks_len(linebuf) + len + 1) < 0) return false;
-            for (i = 0; i < len; ++i) {
-                kputc(33 + state->def_qual, linebuf);
-            }
-        }
-        if (kputc('\n', linebuf) < 0) return false;
-    }
-    return true;
-}
-
-/*
- * Create FASTQ lines from the barcode tag using the index-format
- */
-static bool tags2fq(bam1_t *rec, bam2fq_state_t *state, const bam2fq_opts_t* opts)
-{
-    uint8_t *p;
-    char *ifmt = opts->index_format;
-    char *tag = NULL;
-    char *qual = NULL;
-    char *sub_tag = NULL;
-    char *sub_qual = NULL;
-    size_t tag_len;
-    int file_number = 0;
-    kstring_t linebuf = { 0, 0, NULL }; // Buffer
-
-    if (!ifmt) return true;
-
-    // read barcode tag
-    p = bam_aux_get(rec,opts->barcode_tag);
-    if (p) tag = bam_aux2Z(p);
-
-    if (!tag) return true; // there is no tag
-
-    tag_len = strlen(tag);
-    sub_tag = calloc(1, tag_len + 1);
-    if (!sub_tag) goto fail;
-    sub_qual = calloc(1, tag_len + 1);
-    if (!sub_qual) goto fail;
-
-    // read quality tag
-    p = bam_aux_get(rec, opts->quality_tag);
-    if (p) qual = bam_aux2Z(p);
-
-    // Parse the index-format string
-    while (*ifmt) {
-        if (file_number > 1) break;     // shouldn't happen if we've validated paramaters correctly
-        char action = *ifmt;        // should be 'i' or 'n'
-        ifmt++; // skip over action
-        int index_len = getLength(&ifmt);
-        int n = 0;
-
-        if (index_len < 0) {
-            // read until separator
-            while (isalpha(*tag)) {
-                sub_tag[n] = *tag++;
-                if (qual) sub_qual[n] = *qual++;
-                n++;
-            }
-            if (*tag) { // skip separator
-                tag++;
-                if (qual) qual++;
-            }
-        } else {
-            // read index_len characters
-            while (index_len-- && *tag) {
-                sub_tag[n] = *tag++;
-                if (qual) sub_qual[n] = *qual++;
-                n++;
-            }
-        }
-        sub_tag[n] = '\0';
-        sub_qual[n] = '\0';
-
-        if (action=='i' && *sub_tag) {
-            if (state->index_sequence) {
-                char *new_index_sequence = realloc(state->index_sequence, strlen(state->index_sequence) + strlen(sub_tag) + 2);
-                if (!new_index_sequence) goto fail;
-                state->index_sequence = new_index_sequence;
-                strcat(state->index_sequence, INDEX_SEPARATOR);
-                strcat(state->index_sequence, sub_tag);
-            } else {
-                state->index_sequence = strdup(sub_tag);    // we're going to need this later...
-            }
-            if (!state->index_sequence) goto fail;
-            if (!make_fq_line(rec, sub_tag, sub_qual, &linebuf, state)) goto fail;
-            if (state->illumina_tag) {
-                if (insert_index_sequence_into_linebuf(sub_tag, &linebuf, rec) < 0) {
-                    goto fail;
-                }
-            }
-            if (state->fpi[file_number]) {
-                if (bgzf_write(state->fpi[file_number++], linebuf.s, linebuf.l) < 0)
-                    goto fail;
-            }
-        }
-
-    }
-
-    free(sub_qual); free(sub_tag);
-    free(linebuf.s);
-    return true;
-
- fail:
-    perror(__func__);
-    free(sub_qual); free(sub_tag);
-    free(linebuf.s);
-    return false;
-}
-
-// Transform a bam1_t record into a string with the FASTQ representation of it
-// @returns false for error, true for success
-static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
-{
-    int32_t qlen = b->core.l_qseq;
-    assert(qlen >= 0);
-    const uint8_t *oq = NULL;
-    char *qual = NULL;
-
-    char *seq = get_read(b);
-    if (!seq) return false;
-
-    if (state->use_oq) oq = bam_aux_get(b, "OQ");
-    if (oq && *oq=='Z') {
-        qual = strdup(bam_aux2Z(oq));
-        if (!qual) goto fail;
-        if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-            reverse(qual);
-        }
-    } else {
-        if (get_quality(b, &qual) < 0) goto fail;
-    }
-
-    if (!make_fq_line(b, seq, qual, linebuf, state)) goto fail;
-
-    free(qual);
-    free(seq);
-    return true;
-
- fail:
-    free(seq);
-    free(qual);
-    return false;
-}
-
 static void free_opts(bam2fq_opts_t *opts)
 {
-    free(opts->barcode_tag);
-    free(opts->quality_tag);
-    free(opts->index_format);
-    free(opts->extra_tags);
     free(opts);
 }
 
@@ -566,13 +210,14 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
         {"quality-tag", required_argument, NULL, 'q'},
         { NULL, 0, NULL, 0 }
     };
-    while ((c = getopt_long(argc, argv, "0:1:2:o:f:F:G:niNOs:c:tT:v:@:", lopts, NULL)) > 0) {
+    while ((c = getopt_long(argc, argv, "0:1:2:o:f:F:G:niNOs:c:tT:v:@:",
+                            lopts, NULL)) > 0) {
         switch (c) {
-            case 'b': opts->barcode_tag = strdup(optarg); break;
-            case 'q': opts->quality_tag = strdup(optarg); break;
+            case 'b': opts->barcode_tag = optarg; break;
+            case 'q': opts->quality_tag = optarg; break;
             case  1 : opts->index_file[0] = optarg; break;
             case  2 : opts->index_file[1] = optarg; break;
-            case  3 : opts->index_format = strdup(optarg); break;
+            case  3 : opts->index_format = optarg; break;
             case '0': opts->fnr[0] = optarg; break;
             case '1': opts->fnr[1] = optarg; break;
             case '2': opts->fnr[2] = optarg; break;
@@ -583,7 +228,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
                     flag_off_set = 1;
                     opts->flag_off = 0;
                 }
-                opts->flag_off |= strtol(optarg, 0, 0); break;
+                opts->flag_off |= strtol(optarg, 0, 0);
+                break;
             case 'G': opts->flag_alloff |= strtol(optarg, 0, 0); break;
             case 'n': opts->has12 = false; break;
             case 'N': opts->has12always = true; break;
@@ -591,13 +237,25 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
             case 's': opts->fnse = optarg; break;
             case 't': opts->copy_tags = true; break;
             case 'i': opts->illumina_tag = true; break;
-            case 'c': opts->compression_level = atoi(optarg); break;
-            case 'T': opts->extra_tags = strdup(optarg); break;
+            case 'c':
+                opts->compression_level = atoi(optarg);
+                if (opts->compression_level < 0)
+                    opts->compression_level = 0;
+                if (opts->compression_level > 9)
+                    opts->compression_level = 9;
+                break;
+            case 'T': opts->extra_tags = optarg; break;
             case 'v': opts->def_qual = atoi(optarg); break;
-            case '?': bam2fq_usage(stderr, argv[0]); free_opts(opts); return false;
+
+            case '?':
+                bam2fq_usage(stderr, argv[0]);
+                free_opts(opts);
+                return false;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
-                    bam2fq_usage(stderr, argv[0]); free_opts(opts); return false;
+                    bam2fq_usage(stderr, argv[0]);
+                    free_opts(opts);
+                    return false;
                 }
                 break;
         }
@@ -606,8 +264,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
     if (opts->has12always) opts->has12 = true;
 
-    if (!opts->barcode_tag) opts->barcode_tag = strdup(DEFAULT_BARCODE_TAG);
-    if (!opts->quality_tag) opts->quality_tag = strdup(DEFAULT_QUALITY_TAG);
+    if (!opts->barcode_tag) opts->barcode_tag = DEFAULT_BARCODE_TAG;
+    if (!opts->quality_tag) opts->quality_tag = DEFAULT_QUALITY_TAG;
 
     int nIndex = 0;
     if (opts->index_format) {
@@ -652,7 +310,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     }
 
     const char* type_str = argv[0];
-    if (strcasecmp("fastq", type_str) == 0 || strcasecmp("bam2fq", type_str) == 0) {
+    if (strcasecmp("fastq", type_str) == 0 ||
+        strcasecmp("bam2fq", type_str) == 0) {
         opts->filetype = FASTQ;
     } else if (strcasecmp("fasta", type_str) == 0) {
         opts->filetype = FASTA;
@@ -680,34 +339,61 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     return true;
 }
 
-static BGZF *open_fqfile(char *filename, int c, htsThreadPool *tp)
-{
-    char mode[4] = "w";
-    size_t len = strlen(filename);
-
-    mode[2] = 0; mode[3] = 0;
-    if (len > 3 && strstr(filename + (len - 3),".gz")) {
-        mode[1] = 'g'; mode[2] = c+'0';
-    } else if ((len > 4 && strstr(filename + (len - 4),".bgz"))
-               || (len > 5 && strstr(filename + (len - 5),".bgzf"))) {
-        mode[1] = c+'0';
-    } else {
-        mode[1] = 'u';
+void set_sam_opts(samFile *fp, bam2fq_state_t *state,
+                  const bam2fq_opts_t *opts) {
+    if (state->has12)
+        hts_set_opt(fp, FASTQ_OPT_RNUM, 1);
+
+    if (state->illumina_tag)
+        hts_set_opt(fp, FASTQ_OPT_CASAVA, 1);
+
+    hts_set_opt(fp, FASTQ_OPT_BARCODE, opts->barcode_tag);
+
+    kstring_t tag_list = {0,0};
+    if (state->copy_tags)
+        kputs("RG,BC,QT", &tag_list);
+    if (opts->extra_tags) {
+        if (tag_list.l)
+            kputc(',', &tag_list);
+        kputs(opts->extra_tags, &tag_list);
     }
+    if (tag_list.l)
+        hts_set_opt(fp, FASTQ_OPT_AUX, tag_list.s);
+    ks_free(&tag_list);
+}
 
-    BGZF *fp = bgzf_open(filename,mode);
+// Open a file as normal or gzipped based on filename.
+// Note we always use bgzf and don't bother to attempt non-blocked
+// gzip streams.  This is a departure from the old fastq code.
+static samFile *sam_open_z(char *fn, char *mode, bam2fq_state_t *state) {
+    char modez[6];
+    strcpy(modez, mode);
+
+    size_t l = strlen(fn);
+    if ((l > 3 && strcmp(fn+l-3, ".gz") == 0) ||
+        (l > 4 && strcmp(fn+l-4, ".bgz") == 0) ||
+        (l > 5 && strcmp(fn+l-5, ".bgzf") == 0)) {
+        char m[3] = {'z', state->compression_level+'0', '\0'};
+        strcat(modez, m);
+    }
+
+    samFile *fp = sam_open(fn, modez);
     if (!fp)
-        return fp;
-    if (tp->pool && bgzf_thread_pool(fp, tp->pool, tp->qsize) < 0) {
-        bgzf_close(fp);
         return NULL;
-    }
+
+    if (state->p.pool)
+        hts_set_thread_pool(fp, &state->p);
+
     return fp;
 }
 
 static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
 {
+    char *mode = opts->filetype == FASTA ? "wF" : "wf";
+
     bam2fq_state_t* state = calloc(1, sizeof(bam2fq_state_t));
+    if (!state)
+        return false;
     state->flag_on = opts->flag_on;
     state->flag_off = opts->flag_off;
     state->flag_alloff = opts->flag_alloff;
@@ -721,22 +407,6 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
     state->hstdout = NULL;
     state->compression_level = opts->compression_level;
 
-    state->taglist = kl_init(ktaglist);
-    if (opts->extra_tags) {
-        char *save_p;
-        char *s = strtok_r(opts->extra_tags, ",", &save_p);
-        while (s) {
-            if (strlen(s) != 2) {
-                fprintf(stderr, "Parsing extra tags - '%s' is not two characters\n", s);
-                free(state);
-                return false;
-            }
-            char **et = kl_pushp(ktaglist, state->taglist);
-            *et = s;
-            s = strtok_r(NULL, ",", &save_p);
-        }
-    }
-
     state->fp = sam_open(opts->fn_input, "r");
     if (state->fp == NULL) {
         print_error_errno("bam2fq","Cannot read file \"%s\"", opts->fn_input);
@@ -768,12 +438,12 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         return false;
     }
     if (opts->fnse) {
-        state->fpse = open_fqfile(opts->fnse, state->compression_level, &state->p);
-        if (state->fpse == NULL) {
-            print_error_errno("bam2fq", "Cannot write to singleton file \"%s\"", opts->fnse);
+        if (!(state->fpse = sam_open_z(opts->fnse, mode, state))) {
+            print_error_errno("bam2fq", "Cannot open singleton file \"%s\"", opts->fnse);
             free(state);
             return false;
         }
+        set_sam_opts(state->fpse, state, opts);
     }
 
     if (opts->ga.reference) {
@@ -784,6 +454,7 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         }
     }
 
+    // single, read1, read2
     int i, j;
     for (i = 0; i < 3; ++i) {
         if (opts->fnr[i]) {
@@ -791,28 +462,30 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
                 if (opts->fnr[j] && strcmp(opts->fnr[j], opts->fnr[i]) == 0)
                     break;
             if (j == i) {
-                state->fpr[i] = open_fqfile(opts->fnr[i], state->compression_level, &state->p);
-                if (state->fpr[i] == NULL) {
-                    print_error_errno("bam2fq", "Cannot write to r%d file \"%s\"",
+                if (!(state->fpr[i] = sam_open_z(opts->fnr[i], mode, state))) {
+                    print_error_errno("bam2fq", "Cannot open r%d file \"%s\"",
                                       i, opts->fnr[i]);
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->fpr[i], state, opts);
             } else {
                 state->fpr[i] = state->fpr[j];
             }
         } else {
             if (!state->hstdout) {
-                state->hstdout = bgzf_dopen(fileno(stdout), "wu");
-                if (!state->hstdout) {
+                if (!(state->hstdout = sam_open_z("-", mode, state))) {
                     print_error_errno("bam2fq", "Cannot open STDOUT");
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->hstdout, state, opts);
             }
             state->fpr[i] = state->hstdout;
         }
     }
+
+    // index 1, index 2
     for (i = 0; i < 2; i++) {
         state->fpi[i] = NULL;
         if (opts->index_file[i]) {
@@ -823,13 +496,14 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
                 if (opts->index_file[j] && strcmp(opts->index_file[j], opts->index_file[i]) == 0)
                     break;
             if (i == j) {
-                state->fpi[i] = open_fqfile(opts->index_file[i], state->compression_level, &state->p);
-                if (state->fpi[i] == NULL) {
-                    print_error_errno("bam2fq", "Cannot write to i%d file \"%s\"",
+                if (!(state->fpi[i] = sam_open_z(opts->index_file[i], mode,
+                                                 state))) {
+                    print_error_errno("bam2fq", "Cannot open i%d file \"%s\"",
                                       i+1, opts->index_file[i]);
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->fpi[i], state, opts);
             } else if (j < 0) {
                 state->fpi[i] = state->fpr[j+3];
             } else {
@@ -854,21 +528,25 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
     bool valid = true;
     sam_hdr_destroy(state->h);
     check_sam_close("bam2fq", state->fp, opts->fn_input, "file", status);
-    if (state->fpse && bgzf_close(state->fpse)) { print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse); valid = false; }
+    if (state->fpse && sam_close(state->fpse) < 0) {
+        print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse);
+        valid = false;
+    }
+
     int i, j;
     for (i = 0; i < 3; ++i) {
         if (state->fpr[i] != state->hstdout) {
             for (j = 0; j < i; j++)
                 if (state->fpr[i] == state->fpr[j])
                     break;
-            if (j == i && bgzf_close(state->fpr[i])) {
+            if (j == i && sam_close(state->fpr[i])) {
                 print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]);
                 valid = false;
             }
         }
     }
     if (state->hstdout) {
-        if (bgzf_close(state->hstdout)) {
+        if (sam_close(state->hstdout) < 0) {
             print_error_errno("bam2fq", "Error closing STDOUT");
             valid = false;
         }
@@ -880,12 +558,11 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
         for (j -= 3; j >= 0 && j < i; j++)
             if (state->fpi[i] == state->fpi[j])
                 break;
-        if (j == i && state->fpi[i] && bgzf_close(state->fpi[i])) {
+        if (j == i && state->fpi[i] && sam_close(state->fpi[i]) < 0) {
             print_error_errno("bam2fq", "Error closing i%d file \"%s\"", i+1, opts->index_file[i]);
             valid = false;
         }
     }
-    kl_destroy(ktaglist,state->taglist);
     free(state->index_sequence);
     if (state->p.pool)
         hts_tpool_destroy(state->p.pool);
@@ -901,135 +578,300 @@ static inline bool filter_it_out(const bam1_t *b, const bam2fq_state_t *state)
 
 }
 
+int write_index_rec(samFile *fp, bam1_t *b, bam2fq_state_t *state,
+                    bam2fq_opts_t* opts, char *seq, int seq_len,
+                    char *qual, int qual_len) {
+    if (!fp || !b || !seq_len)
+        return 0;
+
+    int ret = -1;
+    bam1_t *b2 = bam_init1(); // FIXME: reuse
+    if (!b2)
+        return -1;
+
+    size_t aux_len = b->data + b->l_data - bam_get_aux(b);
+    if (bam_set1(b2, b->core.l_qname, bam_get_qname(b),
+                 (b->core.flag | BAM_FUNMAP) & ~BAM_FREVERSE,
+                 -1, -1, 0,    // refid, pos, mapq
+                 0, NULL,      // cigar
+                 -1, -1, 0,    // rnext, pnext, tlen
+                 seq_len, seq, qual,
+                 aux_len) < 0)
+        goto err;
+
+    uint8_t *q = bam_get_qual(b2);
+    if (qual) {
+        int i;
+        for (i = 0; i < seq_len; i++)
+            q[i] -= '!';
+    } else {
+        memset(q, opts->def_qual, seq_len);
+    }
+
+    memcpy(bam_get_aux(b2), bam_get_aux(b), aux_len);
+    b2->l_data += aux_len;
+    if (sam_write1(fp, state->h, b2) < 0)
+        goto err;
+
+    ret = 0;
+ err:
+    if (b2)
+        bam_destroy1(b2);
+    return ret;
+}
+
+int output_index(bam1_t *b1, bam1_t *b2, bam2fq_state_t *state,
+                 bam2fq_opts_t* opts) {
+    bam1_t *b[2] = {b1, b2};
+
+    char *ifmt = opts->index_format;
+    if (!ifmt)
+        ifmt = "i*i*";
+
+    // Get seq / qual elements
+    char *bc = NULL, *qt = NULL;
+    if (b1)
+        bc = (char *)bam_aux_get(b1, opts->barcode_tag);
+    if (b2 && !bc)
+        bc = (char *)bam_aux_get(b2, opts->barcode_tag);
+    if (!bc)
+        return 0;
+    else
+        bc++; // skip Z
+
+    if (b1)
+        qt = (char *)bam_aux_get(b1, opts->quality_tag);
+    if (b2 && !qt)
+        qt = (char *)bam_aux_get(b2, opts->quality_tag);
+    if (qt && strlen(bc) != strlen(qt)-1)
+        qt = NULL;
+    else if (qt)
+        qt++;
+
+    int inum = 0;
+    while (inum < 2) {
+        char fc = *ifmt++;
+        if (!fc)
+            break; // ran out of index-format
+
+        long len, rem = 0;
+        if (isdigit(*ifmt)) {
+            rem = len = strtol(ifmt, &ifmt, 10);
+        } else {
+            ifmt++;
+            len = 0;
+        }
+
+        char *bc_end = bc, *qt_end = qt;
+        while (len ? *bc_end && rem-- : isalpha(*bc_end))
+            bc_end++, qt_end += qt != NULL;
+
+        switch (fc) {
+        case 'n':
+            // skip
+            bc = bc_end + (len==0);
+            if (qt)
+                qt = qt_end + (len==0);
+            break;
+
+        case 'i':
+            if (write_index_rec(state->fpi[inum], b[inum], state, opts,
+                                bc, bc_end-bc, qt, qt_end-qt) < 0)
+                return -1;
+            bc = bc_end + (len==0);
+            if (qt)
+                qt = qt_end + (len==0);
+            inum++;
+            break;
+
+        default:
+            fprintf(stderr, "Unknown index-format code\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int flush_rec(bam2fq_state_t *state, bam2fq_opts_t* opts,
+                     bam1_t *b[4], int score[3], int best[3],
+                     int64_t *n_singletons) {
+    // Paired data, with 1 or 2 ends present.
+    if (score[1] > 0 && score[2] > 0) {
+        // If CASAVA tag is required and barcode is only on R1,
+        // copy it to R2
+        if (state->illumina_tag) {
+            char *tag;
+            if ((tag = (char *)bam_aux_get(b[best[1]],
+                                           opts->barcode_tag)))
+                if (bam_aux_update_str(b[best[2]],
+                                       opts->barcode_tag,
+                                       strlen(tag), tag+1) < 0)
+                    goto err;
+            if ((tag = (char *)bam_aux_get(b[best[1]],
+                                           opts->quality_tag)))
+                if (bam_aux_update_str(b[best[2]],
+                                       opts->quality_tag,
+                                       strlen(tag), tag+1) < 0)
+                    goto err;
+
+        }
+        if (sam_write1(state->fpr[1], state->h, b[best[1]]) < 0)
+            goto err;
+        if (sam_write1(state->fpr[2], state->h, b[best[2]]) < 0)
+            goto err;
+
+        if (output_index(b[best[1]], b[best[2]], state, opts) < 0)
+            goto err;
+    } else if (score[1] > 0 || score[2] > 0) {
+        if (state->fpse) {
+            // print whichever one exists to fpse
+            if (score[1] > 0) {
+                if (sam_write1(state->fpse, state->h, b[best[1]]) < 0)
+                    goto err;
+            } else {
+                if (sam_write1(state->fpse, state->h, b[best[2]]) < 0)
+                    goto err;
+            }
+            ++(*n_singletons);
+        } else {
+            if (score[1] > 0) {
+                if (sam_write1(state->fpr[1], state->h, b[best[1]]) < 0)
+                    goto err;
+            } else {
+                if (sam_write1(state->fpr[2], state->h, b[best[2]]) < 0)
+                    goto err;
+            }
+        }
+
+        if (output_index(score[1] > 0 ? b[best[1]] : NULL,
+                         score[2] > 0 ? b[best[2]] : NULL,
+                         state, opts) < 0)
+            goto err;
+    }
+
+    if (score[0]) { // single ended data (neither READ1 nor READ2)
+        if (sam_write1(state->fpr[0], state->h, b[best[0]]) < 0)
+            goto err;
+
+        if (output_index(b[best[0]], NULL, state, opts) < 0)
+            goto err;
+    }
+
+    return 0;
+
+ err:
+    return -1;
+}
+
 static bool bam2fq_mainloop(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     int n;
-    bam1_t *records[3] = {NULL, NULL, NULL};
     char *current_qname = NULL;
     int64_t n_reads = 0, n_singletons = 0; // Statistics
-    kstring_t linebuf[3] = {{0,0,NULL},{0,0,NULL},{0,0,NULL}};
     int score[3];
     int at_eof;
-    bool valid = true;
-    bam1_t* b = NULL;
+    bool valid = false;
+    int best[3] = {-1, -1, -1}; // map R0, R1, single to b[] indices;
+                                // indexed by [readpart]
+    bam1_t *b[4];               // 3 readparts, plus current record
 
-    while (true) {
-        if (!b)
-            b = bam_init1();
-        if (b == NULL) {
+    for (n = 0; n < 4; n++) {
+        if (!(b[n] = bam_init1())) {
             perror("[bam2fq_mainloop] Malloc error for bam record buffer.");
-            valid = false;
-            break;
+            return false;
         }
-        int res = sam_read1(state->fp, state->h, b);
+    }
+
+    n = 0;
+    while (true) {
+        int res = sam_read1(state->fp, state->h, b[n]);
         if (res < -1) {
             fprintf(stderr, "[bam2fq_mainloop] Failed to read bam record.\n");
-            valid = false;
-            break;
+            goto err;
         }
         at_eof = res < 0;
 
-        if (!at_eof && filter_it_out(b, state))
+        if (!at_eof && filter_it_out(b[n], state))
             continue;
-        if (!at_eof) ++n_reads;
-
-        if (at_eof || !current_qname || (strcmp(current_qname, bam_get_qname(b)) != 0)) {
-            if (current_qname) {
-                if (state->illumina_tag) {
-                    for (n=0; valid && n<3; n++) {
-                        if (!records[n]) continue;
-                        if (insert_index_sequence_into_linebuf(state->index_sequence, &linebuf[n], records[n]) < 0) valid = false;
-                    }
-                    if (!valid) break;
-                }
-                free(state->index_sequence); state->index_sequence = NULL;
-                if (score[1] > 0 && score[2] > 0) {
-                    // print linebuf[1] to fpr[1], linebuf[2] to fpr[2]
-                    if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                    if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                } else if (score[1] > 0 || score[2] > 0) {
-                    if (state->fpse) {
-                        // print whichever one exists to fpse
-                        if (score[1] > 0) {
-                            if (bgzf_write(state->fpse, linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                        } else {
-                            if (bgzf_write(state->fpse, linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                        }
-                        ++n_singletons;
-                    } else {
-                        if (score[1] > 0) {
-                            if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                        } else {
-                            if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                        }
-                    }
-                }
-                if (score[0]) { // TODO: check this
-                    // print linebuf[0] to fpr[0]
-                    if (bgzf_write(state->fpr[0], linebuf[0].s, linebuf[0].l) < 0) { valid = false; break; }
-                }
+        if (!at_eof) {
+            ++n_reads;
+
+            // Handle -O option: use OQ for qual
+            uint8_t *oq;
+            if (state->use_oq && (oq = bam_aux_get(b[n],"OQ")) && *oq == 'Z') {
+                int i, l = strlen((char *)++oq);
+                uint8_t *qual = bam_get_qual(b[n]);
+                for (i = 0; i < l && i < b[n]->core.l_qseq; i++)
+                    qual[i] = oq[i] - '!';
             }
+        }
 
+        if (at_eof
+            || !current_qname
+            || (strcmp(current_qname, bam_get_qname(b[n])) != 0)) {
+            // New name, so flush best examples of previous name.
+            if (current_qname)
+                if (flush_rec(state, opts, b, score, best, &n_singletons) < 0)
+                    goto err;
 
-            free(current_qname); current_qname = NULL;
+            current_qname = bam_get_qname(b[n]);
             score[0] = score[1] = score[2] = 0;
-            for (n=0; n < 3; n++) {
-                bam_destroy1(records[n]); records[n]=NULL;
-            }
 
             if (at_eof) { break; }
-
-            current_qname = strdup(bam_get_qname(b));
-            if (!current_qname) { valid = false; break; }
         }
 
         // Prefer a copy of the read that has base qualities
-        int b_score = bam_get_qual(b)[0] != 0xff? 2 : 1;
-        readpart rp = which_readpart(b);
-        if (b_score > score[rp]) {
-            if (!tags2fq(b, state, opts)) { valid = false; break; }
-            if (records[rp]) bam_destroy1(records[rp]);
-            records[rp] = b;
+        int b_score = bam_get_qual(b[n])[0] != 0xff? 2 : 1;
+        readpart rp = which_readpart(b[n]);
+        if (score[rp] < b_score) {
             score[rp] = b_score;
-            b = NULL;
-            if(!bam1_to_fq(records[rp], &linebuf[rp], state)) {
-                fprintf(stderr, "[%s] Error converting read to FASTA/Q\n", __func__);
-                valid = false; break;
-            }
+            // Record b[n] slot for best copy of readpair and find a new
+            // slot for next bam read
+            best[rp] = n;
+            int used_slot[4] = {0}, i;
+            for (i = 0; i < 3; i++)
+                if (best[i] >= 0)
+                    used_slot[best[i]] = 1;
+            for (i = 0; i < 4 && used_slot[i]; i++)
+                ;
+            n = i;
         }
     }
+
+    valid = true;
+ err:
     if (!valid)
-    {
-        perror("[bam2fq_mainloop] Error writing to FASTx files.");
-    }
-    bam_destroy1(b);
-    for (n=0; n < 3; n++) {
-        bam_destroy1(records[n]);
-    }
-    free(current_qname);
-    free(linebuf[0].s);
-    free(linebuf[1].s);
-    free(linebuf[2].s);
-    fprintf(stderr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
-    fprintf(stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+        print_error_errno("bam2fq", "Error writing to FASTx files.");
+
+    for (n = 0; n < 4; n++)
+        bam_destroy1(b[n]);
+
+    fprintf(stderr, "[M::%s] discarded %" PRId64 " singletons\n",
+            __func__, n_singletons);
+    fprintf(stderr, "[M::%s] processed %" PRId64 " reads\n",
+            __func__, n_reads);
 
     return valid;
 }
 
 int main_bam2fq(int argc, char *argv[])
 {
-    int status = EXIT_SUCCESS;
+    int status = EXIT_FAILURE;
     bam2fq_opts_t* opts = NULL;
     bam2fq_state_t* state = NULL;
 
     bool valid = parse_opts(argc, argv, &opts);
     if (!valid || opts == NULL) return valid ? EXIT_SUCCESS : EXIT_FAILURE;
 
-    if (!init_state(opts, &state)) return EXIT_FAILURE;
+    if (!init_state(opts, &state)) goto err;
+
+    if (!bam2fq_mainloop(state,opts)) goto err;
 
-    if (!bam2fq_mainloop(state,opts)) status = EXIT_FAILURE;
+    if (!destroy_state(opts, state, &status)) goto err;
 
-    if (!destroy_state(opts, state, &status)) return EXIT_FAILURE;
+    status = EXIT_SUCCESS;
+ err:
     sam_global_args_free(&opts->ga);
     free_opts(opts);
 
diff --git a/samtools/bam_fastq.c.pysam.c b/samtools/bam_fastq.c.pysam.c
index 2fe4c87..f7249d1 100644
--- a/samtools/bam_fastq.c.pysam.c
+++ b/samtools/bam_fastq.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_fastq.c -- FASTA and FASTQ file generation
 
-    Copyright (C) 2009-2017, 2019 Genome Research Ltd.
+    Copyright (C) 2009-2017, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -44,16 +44,11 @@ DEALINGS IN THE SOFTWARE.  */
 #include "samtools.h"
 #include "sam_opts.h"
 
-#define taglist_free(p)
-KLIST_INIT(ktaglist, char*, taglist_free)
-
 #define DEFAULT_BARCODE_TAG "BC"
 #define DEFAULT_QUALITY_TAG "QT"
 #define INDEX_SEPARATOR "+"
 
 int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
-static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
-
 static void bam2fq_usage(FILE *to, const char *command)
 {
     int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
@@ -62,64 +57,71 @@ static void bam2fq_usage(FILE *to, const char *command)
     fprintf(to,
 "\n"
 "Description:\n"
-"Converts a SAM, BAM or CRAM into either FASTQ or FASTA format depending on the command invoked.\n"
+"Converts a SAM, BAM or CRAM to %s format.\n"
 "\n"
 "Options:\n"
-"  -0 FILE              write reads designated READ_OTHER to FILE\n"
-"  -1 FILE              write reads designated READ1 to FILE\n"
-"  -2 FILE              write reads designated READ2 to FILE\n"
-"  -o FILE              write reads designated READ1 or READ2 to FILE\n"
-"                       note: if a singleton file is specified with -s, only\n"
-"                       paired reads will be written to the -1 and -2 files.\n"
-"  -f INT               only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
-"  -F INT               only include reads with none of the FLAGS in INT present [0x900]\n"       //   F&x == 0
-"  -G INT               only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
-"  -n                   don't append /1 and /2 to the read name\n"
-"  -N                   always append /1 and /2 to the read name\n");
+"  -0 FILE      write reads designated READ_OTHER to FILE\n"
+"  -1 FILE      write reads designated READ1 to FILE\n"
+"  -2 FILE      write reads designated READ2 to FILE\n"
+"  -o FILE      write reads designated READ1 or READ2 to FILE\n"
+"               note: if a singleton file is specified with -s, only\n"
+"               paired reads will be written to the -1 and -2 files.\n"
+"  -f INT       only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
+"  -F INT       only include reads with none of the FLAGS in INT present [0x900]\n"       //   F&x == 0
+"  -G INT       only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
+"  -n           don't append /1 and /2 to the read name\n"
+"  -N           always append /1 and /2 to the read name\n",
+    fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -O                   output quality in the OQ tag if present\n");
+"  -O           output quality in the OQ tag if present\n");
     fprintf(to,
-"  -s FILE              write singleton reads designated READ1 or READ2 to FILE\n"
-"  -t                   copy RG, BC and QT tags to the %s header line\n",
+"  -s FILE      write singleton reads designated READ1 or READ2 to FILE\n"
+"  -t           copy RG, BC and QT tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     fprintf(to,
-"  -T TAGLIST           copy arbitrary tags to the %s header line\n",
+"  -T TAGLIST   copy arbitrary tags to the %s header line\n",
     fq ? "FASTQ" : "FASTA");
     if (fq) fprintf(to,
-"  -v INT               default quality score if not given in file [1]\n"
-"  -i                   add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)\n"
-"  -c                   compression level [0..9] to use when creating gz or bgzf fastq files [1]\n"
-"  --i1 FILE            write first index reads to FILE\n"
-"  --i2 FILE            write second index reads to FILE\n"
-"  --barcode-tag TAG    Barcode tag [default: " DEFAULT_BARCODE_TAG "]\n"
-"  --quality-tag TAG    Quality tag [default: " DEFAULT_QUALITY_TAG "]\n"
-"  --index-format STR   How to parse barcode and quality tags\n\n");
+"  -v INT       default quality score if not given in file [1]\n"
+"  -i           add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)\n"
+"  -c INT       compression level [0..9] to use when writing bgzf files [1]\n"
+"  --i1 FILE    write first index reads to FILE\n"
+"  --i2 FILE    write second index reads to FILE\n"
+"  --barcode-tag TAG\n"
+"               Barcode tag [" DEFAULT_BARCODE_TAG "]\n"
+"  --quality-tag TAG\n"
+"               Quality tag [" DEFAULT_QUALITY_TAG "]\n"
+"  --index-format STR\n"
+"               How to parse barcode and quality tags\n\n");
     sam_global_opt_help(to, "-.--.@-.");
     fprintf(to,
 "\n"
-"The files will be automatically compressed if the file names have a .gz or .bgzf extension.\n"
-"The input to this program must be collated by name. Run 'samtools collate' or 'samtools sort -n'.\n"
+"The files will be automatically compressed if the file names have a .gz\n"
+"or .bgzf extension.  The input to this program must be collated by name.\n"
+"Run 'samtools collate' or 'samtools sort -n' to achieve this.\n"
 "\n"
 "Reads are designated READ1 if FLAG READ1 is set and READ2 is not set.\n"
 "Reads are designated READ2 if FLAG READ1 is not set and READ2 is set.\n"
-"Reads are designated READ_OTHER if FLAGs READ1 and READ2 are either both set\n"
-"or both unset.\n"
+"Otherwise reads are designated READ_OTHER (both flags set or both flags unset).\n"
 "Run 'samtools flags' for more information on flag codes and meanings.\n");
     fprintf(to,
 "\n"
-"The index-format string describes how to parse the barcode and quality tags, for example:\n"
-"   i14i8       the first 14 characters are index 1, the next 8 characters are index 2\n"
-"   n8i14       ignore the first 8 characters, and use the next 14 characters for index 1\n"
-"If the tag contains a separator, then the numeric part can be replaced with '*' to mean\n"
-"'read until the separator or end of tag', for example:\n"
-"   n*i*        ignore the left part of the tag until the separator, then use the second part\n"
-"               of the tag as index 1\n");
+"The index-format string describes how to parse the barcode and quality tags.\n"
+"It is made up of 'i' or 'n' followed by a length or '*'.  For example:\n"
+"   i14i8       The first 14 characters are index 1, the next 8 are index 2\n"
+"   n8i14       Ignore the first 8 characters, and use the next 14 for index 1\n\n"
+"If the tag contains a separator, then the numeric part can be replaced with\n"
+"'*' to mean 'read until the separator or end of tag', for example:\n"
+"   i*i*        Break the tag at the separator into index 1 and index 2\n"
+"   n*i*        Ignore the left part of the tag until the separator,\n"
+"               then use the second part of the tag as index 1\n");
     fprintf(to,
 "\n"
 "Examples:\n"
-" To get just the paired reads in separate files, use:\n"
-"   samtools %s -1 paired1.%s -2 paired2.%s -0 /dev/null -s /dev/null -n in.bam\n"
-"\n To get all non-supplementary/secondary reads in a single file, redirect the output:\n"
+"To get just the paired reads in separate files, use:\n"
+"   samtools %s -1 pair1.%s -2 pair2.%s -0 /dev/null -s /dev/null -n in.bam\n"
+"\nTo get all non-supplementary/secondary reads in a single file, redirect\n"
+"the output:\n"
 "   samtools %s in.bam > all_reads.%s\n",
             command, fq ? "fq" : "fa", fq ? "fq" : "fa",
             command, fq ? "fq" : "fa");
@@ -146,96 +148,20 @@ typedef struct bam2fq_opts {
 
 typedef struct bam2fq_state {
     samFile *fp;
-    BGZF *fpse;
-    BGZF *fpr[3];
-    BGZF *fpi[2];
-    BGZF *hsamtools_stdout;
+    samFile *fpse;
+    samFile *fpr[3];
+    samFile *fpi[3];
+    samFile *hsamtools_stdout;
     sam_hdr_t *h;
     bool has12, use_oq, copy_tags, illumina_tag;
     int flag_on, flag_off, flag_alloff;
     fastfile filetype;
     int def_qual;
-    klist_t(ktaglist) *taglist;
     char *index_sequence;
     char compression_level;
     htsThreadPool p;
 } bam2fq_state_t;
 
-/*
- * Get and decode the read from a BAM record.
- *
- * TODO: htslib really needs an interface for this.  Consider this or perhaps
- * bam_get_seq_str (current vs original orientation) and bam_get_qual_str
- * functions as string formatted equivalents to bam_get_{seq,qual}?
- */
-
-/*
- * Reverse a string in place.
- * From http://stackoverflow.com/questions/8534274/is-the-strrev-function-not-available-in-linux.
- * Author Sumit-naik: http://stackoverflow.com/users/4590926/sumit-naik
- */
-static char *reverse(char *str)
-{
-    int i = strlen(str)-1,j=0;
-    char ch;
-    while (i>j) {
-        ch = str[i];
-        str[i]= str[j];
-        str[j] = ch;
-        i--;
-        j++;
-    }
-    return str;
-}
-
-/* return the read, reverse complemented if necessary */
-static char *get_read(const bam1_t *rec)
-{
-    int len = rec->core.l_qseq + 1;
-    char *read = calloc(1, len);
-    char *seq = (char *)bam_get_seq(rec);
-    int n;
-
-    if (!read) return NULL;
-
-    for (n=0; n < rec->core.l_qseq; n++) {
-        if (rec->core.flag & BAM_FREVERSE) read[n] = seq_nt16_str[seq_comp_table[bam_seqi(seq,n)]];
-        else                               read[n] = seq_nt16_str[bam_seqi(seq,n)];
-    }
-    if (rec->core.flag & BAM_FREVERSE) reverse(read);
-    return read;
-}
-
-/*
- * get and decode the quality from a BAM record
- */
-static int get_quality(const bam1_t *rec, char **qual_out)
-{
-    char *quality = calloc(1, rec->core.l_qseq + 1);
-    char *q = (char *)bam_get_qual(rec);
-    int n;
-
-    if (!quality) return -1;
-
-    if (*q == '\xff') {
-        free(quality);
-        *qual_out = NULL;
-        return 0;
-    }
-
-    for (n=0; n < rec->core.l_qseq; n++) {
-        quality[n] = q[n]+33;
-    }
-    if (rec->core.flag & BAM_FREVERSE) reverse(quality);
-    *qual_out = quality;
-    return 0;
-}
-
-//
-// End of htslib complaints
-//
-
-
 static readpart which_readpart(const bam1_t *b)
 {
     if ((b->core.flag & BAM_FREAD1) && !(b->core.flag & BAM_FREAD2)) {
@@ -247,290 +173,8 @@ static readpart which_readpart(const bam1_t *b)
     }
 }
 
-/*
- * parse the length part from the index-format string
- */
-static int getLength(char **s)
-{
-    int n = 0;
-    while (**s) {
-        if (**s == '*') { n=-1; (*s)++; break; }
-        if ( !isdigit(**s)) break;
-        n = n*10 + ((**s)-'0');
-        (*s)++;
-    }
-    return n;
-}
-
-static bool copy_tag(const char *tag, const bam1_t *rec, kstring_t *linebuf)
-{
-    uint8_t *s = bam_aux_get(rec, tag);
-    if (s) {
-        char aux_type = *s;
-        switch (aux_type) {
-            case 'C':
-            case 'S': aux_type = 'I'; break;
-            case 'c':
-            case 's': aux_type = 'i'; break;
-            case 'd': aux_type = 'f'; break;
-        }
-
-        // Ensure space.  Need 6 chars + length of tag.  Max length of
-        // i is 16, A is 21, B currently 26, Z is unknown, so
-        // have to check that one later.
-        if (ks_resize(linebuf, ks_len(linebuf) + 64) < 0) return false;
-
-        kputc('\t', linebuf);
-        kputsn(tag, 2, linebuf);
-        kputc(':', linebuf);
-        kputc(aux_type=='I'? 'i': aux_type, linebuf);
-        kputc(':', linebuf);
-        switch (aux_type) {
-            case 'H':
-            case 'Z':
-                if (kputs(bam_aux2Z(s), linebuf) < 0) return false;
-                break;
-            case 'i': kputw(bam_aux2i(s), linebuf); break;
-            case 'I': kputuw(bam_aux2i(s), linebuf); break;
-            case 'A': kputc(bam_aux2A(s), linebuf); break;
-            case 'f': kputd(bam_aux2f(s), linebuf); break;
-            case 'B': kputs("*** Unhandled aux type ***", linebuf); return false;
-            default:  kputs("*** Unknown aux type ***", linebuf); return false;
-       }
-    }
-    return true;
-}
-
-static int insert_index_sequence_into_linebuf(char *index_sequence, kstring_t *linebuf, bam1_t *rec)
-{
-    if (!index_sequence) return 0;
-
-    kstring_t new = {0,0,NULL};
-    if (linebuf->s) {
-        char *s = strchr(linebuf->s, '\n');
-        if (s) {
-            if (ks_resize(&new, linebuf->l + strlen(index_sequence) + 16) < 0)
-                return -1;
-            *s = 0;
-            kputs(linebuf->s, &new);
-            kputc(' ', &new);
-            readpart readpart = which_readpart(rec);
-            if (readpart == READ_1) kputc('1', &new);
-            else if (readpart == READ_2) kputc('2', &new);
-            else kputc('0', &new);
-
-            kputc(':', &new);
-            if (rec->core.flag & BAM_FQCFAIL) kputc('Y', &new);
-            else                              kputc('N', &new);
-
-            kputs(":0:", &new);
-            kputs(index_sequence, &new);
-            kputc('\n', &new);
-            kputs(s+1, &new);
-            free(ks_release(linebuf));
-            linebuf->s = new.s; linebuf->l = new.l; linebuf->m = new.m;
-        }
-    }
-    return 0;
-}
-
-static bool make_fq_line(const bam1_t *rec, char *seq, char *qual, kstring_t *linebuf, const bam2fq_state_t *state)
-{
-    int i;
-
-    linebuf->l = 0;
-    // Write read name
-    if (kputc(state->filetype == FASTA? '>' : '@', linebuf) < 0) return false;
-    if (kputs(bam_get_qname(rec), linebuf) < 0) return false;
-    // Add the /1 /2 if requested
-    if (state->has12) {
-        readpart readpart = which_readpart(rec);
-        if (readpart == READ_1) {
-            if (kputs("/1", linebuf) < 0) return false;
-        } else if (readpart == READ_2) {
-            if (kputs("/2", linebuf) < 0) return false;
-        }
-    }
-    if (state->copy_tags) {
-        for (i = 0; copied_tags[i]; ++i) {
-            if (!copy_tag(copied_tags[i], rec, linebuf)) {
-                fprintf(samtools_stderr, "Problem copying aux tags: [%s]\n", linebuf->s);
-                return false;
-            }
-        }
-    }
-
-    if (state->taglist->size) {
-        kliter_t(ktaglist) *p;
-        for (p = kl_begin(state->taglist); p != kl_end(state->taglist); p = kl_next(p)) {
-            if (!copy_tag(kl_val(p), rec, linebuf)) {
-                fprintf(samtools_stderr, "Problem copying aux tags: [%s]\n", linebuf->s);
-                return false;
-            }
-        }
-    }
-
-    if (kputc('\n', linebuf) < 0) return false;
-    if (kputs(seq, linebuf) < 0) return false;
-    if (kputc('\n', linebuf) < 0) return false;
-
-    if (state->filetype == FASTQ) {
-        // Write quality
-        if (kputs("+\n", linebuf) < 0) return false;
-        if (qual && *qual) {
-            if (kputs(qual, linebuf) < 0) return false;
-        } else {
-            int len = strlen(seq);
-            if (ks_resize(linebuf, ks_len(linebuf) + len + 1) < 0) return false;
-            for (i = 0; i < len; ++i) {
-                kputc(33 + state->def_qual, linebuf);
-            }
-        }
-        if (kputc('\n', linebuf) < 0) return false;
-    }
-    return true;
-}
-
-/*
- * Create FASTQ lines from the barcode tag using the index-format
- */
-static bool tags2fq(bam1_t *rec, bam2fq_state_t *state, const bam2fq_opts_t* opts)
-{
-    uint8_t *p;
-    char *ifmt = opts->index_format;
-    char *tag = NULL;
-    char *qual = NULL;
-    char *sub_tag = NULL;
-    char *sub_qual = NULL;
-    size_t tag_len;
-    int file_number = 0;
-    kstring_t linebuf = { 0, 0, NULL }; // Buffer
-
-    if (!ifmt) return true;
-
-    // read barcode tag
-    p = bam_aux_get(rec,opts->barcode_tag);
-    if (p) tag = bam_aux2Z(p);
-
-    if (!tag) return true; // there is no tag
-
-    tag_len = strlen(tag);
-    sub_tag = calloc(1, tag_len + 1);
-    if (!sub_tag) goto fail;
-    sub_qual = calloc(1, tag_len + 1);
-    if (!sub_qual) goto fail;
-
-    // read quality tag
-    p = bam_aux_get(rec, opts->quality_tag);
-    if (p) qual = bam_aux2Z(p);
-
-    // Parse the index-format string
-    while (*ifmt) {
-        if (file_number > 1) break;     // shouldn't happen if we've validated paramaters correctly
-        char action = *ifmt;        // should be 'i' or 'n'
-        ifmt++; // skip over action
-        int index_len = getLength(&ifmt);
-        int n = 0;
-
-        if (index_len < 0) {
-            // read until separator
-            while (isalpha(*tag)) {
-                sub_tag[n] = *tag++;
-                if (qual) sub_qual[n] = *qual++;
-                n++;
-            }
-            if (*tag) { // skip separator
-                tag++;
-                if (qual) qual++;
-            }
-        } else {
-            // read index_len characters
-            while (index_len-- && *tag) {
-                sub_tag[n] = *tag++;
-                if (qual) sub_qual[n] = *qual++;
-                n++;
-            }
-        }
-        sub_tag[n] = '\0';
-        sub_qual[n] = '\0';
-
-        if (action=='i' && *sub_tag) {
-            if (state->index_sequence) {
-                char *new_index_sequence = realloc(state->index_sequence, strlen(state->index_sequence) + strlen(sub_tag) + 2);
-                if (!new_index_sequence) goto fail;
-                state->index_sequence = new_index_sequence;
-                strcat(state->index_sequence, INDEX_SEPARATOR);
-                strcat(state->index_sequence, sub_tag);
-            } else {
-                state->index_sequence = strdup(sub_tag);    // we're going to need this later...
-            }
-            if (!state->index_sequence) goto fail;
-            if (!make_fq_line(rec, sub_tag, sub_qual, &linebuf, state)) goto fail;
-            if (state->illumina_tag) {
-                if (insert_index_sequence_into_linebuf(sub_tag, &linebuf, rec) < 0) {
-                    goto fail;
-                }
-            }
-            if (state->fpi[file_number]) {
-                if (bgzf_write(state->fpi[file_number++], linebuf.s, linebuf.l) < 0)
-                    goto fail;
-            }
-        }
-
-    }
-
-    free(sub_qual); free(sub_tag);
-    free(linebuf.s);
-    return true;
-
- fail:
-    perror(__func__);
-    free(sub_qual); free(sub_tag);
-    free(linebuf.s);
-    return false;
-}
-
-// Transform a bam1_t record into a string with the FASTQ representation of it
-// @returns false for error, true for success
-static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t *state)
-{
-    int32_t qlen = b->core.l_qseq;
-    assert(qlen >= 0);
-    const uint8_t *oq = NULL;
-    char *qual = NULL;
-
-    char *seq = get_read(b);
-    if (!seq) return false;
-
-    if (state->use_oq) oq = bam_aux_get(b, "OQ");
-    if (oq && *oq=='Z') {
-        qual = strdup(bam_aux2Z(oq));
-        if (!qual) goto fail;
-        if (b->core.flag & BAM_FREVERSE) { // read is reverse complemented
-            reverse(qual);
-        }
-    } else {
-        if (get_quality(b, &qual) < 0) goto fail;
-    }
-
-    if (!make_fq_line(b, seq, qual, linebuf, state)) goto fail;
-
-    free(qual);
-    free(seq);
-    return true;
-
- fail:
-    free(seq);
-    free(qual);
-    return false;
-}
-
 static void free_opts(bam2fq_opts_t *opts)
 {
-    free(opts->barcode_tag);
-    free(opts->quality_tag);
-    free(opts->index_format);
-    free(opts->extra_tags);
     free(opts);
 }
 
@@ -568,13 +212,14 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
         {"quality-tag", required_argument, NULL, 'q'},
         { NULL, 0, NULL, 0 }
     };
-    while ((c = getopt_long(argc, argv, "0:1:2:o:f:F:G:niNOs:c:tT:v:@:", lopts, NULL)) > 0) {
+    while ((c = getopt_long(argc, argv, "0:1:2:o:f:F:G:niNOs:c:tT:v:@:",
+                            lopts, NULL)) > 0) {
         switch (c) {
-            case 'b': opts->barcode_tag = strdup(optarg); break;
-            case 'q': opts->quality_tag = strdup(optarg); break;
+            case 'b': opts->barcode_tag = optarg; break;
+            case 'q': opts->quality_tag = optarg; break;
             case  1 : opts->index_file[0] = optarg; break;
             case  2 : opts->index_file[1] = optarg; break;
-            case  3 : opts->index_format = strdup(optarg); break;
+            case  3 : opts->index_format = optarg; break;
             case '0': opts->fnr[0] = optarg; break;
             case '1': opts->fnr[1] = optarg; break;
             case '2': opts->fnr[2] = optarg; break;
@@ -585,7 +230,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
                     flag_off_set = 1;
                     opts->flag_off = 0;
                 }
-                opts->flag_off |= strtol(optarg, 0, 0); break;
+                opts->flag_off |= strtol(optarg, 0, 0);
+                break;
             case 'G': opts->flag_alloff |= strtol(optarg, 0, 0); break;
             case 'n': opts->has12 = false; break;
             case 'N': opts->has12always = true; break;
@@ -593,13 +239,25 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
             case 's': opts->fnse = optarg; break;
             case 't': opts->copy_tags = true; break;
             case 'i': opts->illumina_tag = true; break;
-            case 'c': opts->compression_level = atoi(optarg); break;
-            case 'T': opts->extra_tags = strdup(optarg); break;
+            case 'c':
+                opts->compression_level = atoi(optarg);
+                if (opts->compression_level < 0)
+                    opts->compression_level = 0;
+                if (opts->compression_level > 9)
+                    opts->compression_level = 9;
+                break;
+            case 'T': opts->extra_tags = optarg; break;
             case 'v': opts->def_qual = atoi(optarg); break;
-            case '?': bam2fq_usage(samtools_stderr, argv[0]); free_opts(opts); return false;
+
+            case '?':
+                bam2fq_usage(samtools_stderr, argv[0]);
+                free_opts(opts);
+                return false;
             default:
                 if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
-                    bam2fq_usage(samtools_stderr, argv[0]); free_opts(opts); return false;
+                    bam2fq_usage(samtools_stderr, argv[0]);
+                    free_opts(opts);
+                    return false;
                 }
                 break;
         }
@@ -608,8 +266,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
     if (opts->has12always) opts->has12 = true;
 
-    if (!opts->barcode_tag) opts->barcode_tag = strdup(DEFAULT_BARCODE_TAG);
-    if (!opts->quality_tag) opts->quality_tag = strdup(DEFAULT_QUALITY_TAG);
+    if (!opts->barcode_tag) opts->barcode_tag = DEFAULT_BARCODE_TAG;
+    if (!opts->quality_tag) opts->quality_tag = DEFAULT_QUALITY_TAG;
 
     int nIndex = 0;
     if (opts->index_format) {
@@ -654,7 +312,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     }
 
     const char* type_str = argv[0];
-    if (strcasecmp("fastq", type_str) == 0 || strcasecmp("bam2fq", type_str) == 0) {
+    if (strcasecmp("fastq", type_str) == 0 ||
+        strcasecmp("bam2fq", type_str) == 0) {
         opts->filetype = FASTQ;
     } else if (strcasecmp("fasta", type_str) == 0) {
         opts->filetype = FASTA;
@@ -682,34 +341,61 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
     return true;
 }
 
-static BGZF *open_fqfile(char *filename, int c, htsThreadPool *tp)
-{
-    char mode[4] = "w";
-    size_t len = strlen(filename);
-
-    mode[2] = 0; mode[3] = 0;
-    if (len > 3 && strstr(filename + (len - 3),".gz")) {
-        mode[1] = 'g'; mode[2] = c+'0';
-    } else if ((len > 4 && strstr(filename + (len - 4),".bgz"))
-               || (len > 5 && strstr(filename + (len - 5),".bgzf"))) {
-        mode[1] = c+'0';
-    } else {
-        mode[1] = 'u';
+void set_sam_opts(samFile *fp, bam2fq_state_t *state,
+                  const bam2fq_opts_t *opts) {
+    if (state->has12)
+        hts_set_opt(fp, FASTQ_OPT_RNUM, 1);
+
+    if (state->illumina_tag)
+        hts_set_opt(fp, FASTQ_OPT_CASAVA, 1);
+
+    hts_set_opt(fp, FASTQ_OPT_BARCODE, opts->barcode_tag);
+
+    kstring_t tag_list = {0,0};
+    if (state->copy_tags)
+        kputs("RG,BC,QT", &tag_list);
+    if (opts->extra_tags) {
+        if (tag_list.l)
+            kputc(',', &tag_list);
+        kputs(opts->extra_tags, &tag_list);
     }
+    if (tag_list.l)
+        hts_set_opt(fp, FASTQ_OPT_AUX, tag_list.s);
+    ks_free(&tag_list);
+}
 
-    BGZF *fp = bgzf_open(filename,mode);
+// Open a file as normal or gzipped based on filename.
+// Note we always use bgzf and don't bother to attempt non-blocked
+// gzip streams.  This is a departure from the old fastq code.
+static samFile *sam_open_z(char *fn, char *mode, bam2fq_state_t *state) {
+    char modez[6];
+    strcpy(modez, mode);
+
+    size_t l = strlen(fn);
+    if ((l > 3 && strcmp(fn+l-3, ".gz") == 0) ||
+        (l > 4 && strcmp(fn+l-4, ".bgz") == 0) ||
+        (l > 5 && strcmp(fn+l-5, ".bgzf") == 0)) {
+        char m[3] = {'z', state->compression_level+'0', '\0'};
+        strcat(modez, m);
+    }
+
+    samFile *fp = sam_open(fn, modez);
     if (!fp)
-        return fp;
-    if (tp->pool && bgzf_thread_pool(fp, tp->pool, tp->qsize) < 0) {
-        bgzf_close(fp);
         return NULL;
-    }
+
+    if (state->p.pool)
+        hts_set_thread_pool(fp, &state->p);
+
     return fp;
 }
 
 static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
 {
+    char *mode = opts->filetype == FASTA ? "wF" : "wf";
+
     bam2fq_state_t* state = calloc(1, sizeof(bam2fq_state_t));
+    if (!state)
+        return false;
     state->flag_on = opts->flag_on;
     state->flag_off = opts->flag_off;
     state->flag_alloff = opts->flag_alloff;
@@ -723,22 +409,6 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
     state->hsamtools_stdout = NULL;
     state->compression_level = opts->compression_level;
 
-    state->taglist = kl_init(ktaglist);
-    if (opts->extra_tags) {
-        char *save_p;
-        char *s = strtok_r(opts->extra_tags, ",", &save_p);
-        while (s) {
-            if (strlen(s) != 2) {
-                fprintf(samtools_stderr, "Parsing extra tags - '%s' is not two characters\n", s);
-                free(state);
-                return false;
-            }
-            char **et = kl_pushp(ktaglist, state->taglist);
-            *et = s;
-            s = strtok_r(NULL, ",", &save_p);
-        }
-    }
-
     state->fp = sam_open(opts->fn_input, "r");
     if (state->fp == NULL) {
         print_error_errno("bam2fq","Cannot read file \"%s\"", opts->fn_input);
@@ -770,12 +440,12 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         return false;
     }
     if (opts->fnse) {
-        state->fpse = open_fqfile(opts->fnse, state->compression_level, &state->p);
-        if (state->fpse == NULL) {
-            print_error_errno("bam2fq", "Cannot write to singleton file \"%s\"", opts->fnse);
+        if (!(state->fpse = sam_open_z(opts->fnse, mode, state))) {
+            print_error_errno("bam2fq", "Cannot open singleton file \"%s\"", opts->fnse);
             free(state);
             return false;
         }
+        set_sam_opts(state->fpse, state, opts);
     }
 
     if (opts->ga.reference) {
@@ -786,6 +456,7 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
         }
     }
 
+    // single, read1, read2
     int i, j;
     for (i = 0; i < 3; ++i) {
         if (opts->fnr[i]) {
@@ -793,28 +464,30 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
                 if (opts->fnr[j] && strcmp(opts->fnr[j], opts->fnr[i]) == 0)
                     break;
             if (j == i) {
-                state->fpr[i] = open_fqfile(opts->fnr[i], state->compression_level, &state->p);
-                if (state->fpr[i] == NULL) {
-                    print_error_errno("bam2fq", "Cannot write to r%d file \"%s\"",
+                if (!(state->fpr[i] = sam_open_z(opts->fnr[i], mode, state))) {
+                    print_error_errno("bam2fq", "Cannot open r%d file \"%s\"",
                                       i, opts->fnr[i]);
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->fpr[i], state, opts);
             } else {
                 state->fpr[i] = state->fpr[j];
             }
         } else {
             if (!state->hsamtools_stdout) {
-                state->hsamtools_stdout = bgzf_dopen(fileno(samtools_stdout), "wu");
-                if (!state->hsamtools_stdout) {
+                if (!(state->hsamtools_stdout = sam_open_z("-", mode, state))) {
                     print_error_errno("bam2fq", "Cannot open STDOUT");
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->hsamtools_stdout, state, opts);
             }
             state->fpr[i] = state->hsamtools_stdout;
         }
     }
+
+    // index 1, index 2
     for (i = 0; i < 2; i++) {
         state->fpi[i] = NULL;
         if (opts->index_file[i]) {
@@ -825,13 +498,14 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
                 if (opts->index_file[j] && strcmp(opts->index_file[j], opts->index_file[i]) == 0)
                     break;
             if (i == j) {
-                state->fpi[i] = open_fqfile(opts->index_file[i], state->compression_level, &state->p);
-                if (state->fpi[i] == NULL) {
-                    print_error_errno("bam2fq", "Cannot write to i%d file \"%s\"",
+                if (!(state->fpi[i] = sam_open_z(opts->index_file[i], mode,
+                                                 state))) {
+                    print_error_errno("bam2fq", "Cannot open i%d file \"%s\"",
                                       i+1, opts->index_file[i]);
                     free(state);
                     return false;
                 }
+                set_sam_opts(state->fpi[i], state, opts);
             } else if (j < 0) {
                 state->fpi[i] = state->fpr[j+3];
             } else {
@@ -856,21 +530,25 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
     bool valid = true;
     sam_hdr_destroy(state->h);
     check_sam_close("bam2fq", state->fp, opts->fn_input, "file", status);
-    if (state->fpse && bgzf_close(state->fpse)) { print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse); valid = false; }
+    if (state->fpse && sam_close(state->fpse) < 0) {
+        print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse);
+        valid = false;
+    }
+
     int i, j;
     for (i = 0; i < 3; ++i) {
         if (state->fpr[i] != state->hsamtools_stdout) {
             for (j = 0; j < i; j++)
                 if (state->fpr[i] == state->fpr[j])
                     break;
-            if (j == i && bgzf_close(state->fpr[i])) {
+            if (j == i && sam_close(state->fpr[i])) {
                 print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]);
                 valid = false;
             }
         }
     }
     if (state->hsamtools_stdout) {
-        if (bgzf_close(state->hsamtools_stdout)) {
+        if (sam_close(state->hsamtools_stdout) < 0) {
             print_error_errno("bam2fq", "Error closing STDOUT");
             valid = false;
         }
@@ -882,12 +560,11 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
         for (j -= 3; j >= 0 && j < i; j++)
             if (state->fpi[i] == state->fpi[j])
                 break;
-        if (j == i && state->fpi[i] && bgzf_close(state->fpi[i])) {
+        if (j == i && state->fpi[i] && sam_close(state->fpi[i]) < 0) {
             print_error_errno("bam2fq", "Error closing i%d file \"%s\"", i+1, opts->index_file[i]);
             valid = false;
         }
     }
-    kl_destroy(ktaglist,state->taglist);
     free(state->index_sequence);
     if (state->p.pool)
         hts_tpool_destroy(state->p.pool);
@@ -903,135 +580,300 @@ static inline bool filter_it_out(const bam1_t *b, const bam2fq_state_t *state)
 
 }
 
+int write_index_rec(samFile *fp, bam1_t *b, bam2fq_state_t *state,
+                    bam2fq_opts_t* opts, char *seq, int seq_len,
+                    char *qual, int qual_len) {
+    if (!fp || !b || !seq_len)
+        return 0;
+
+    int ret = -1;
+    bam1_t *b2 = bam_init1(); // FIXME: reuse
+    if (!b2)
+        return -1;
+
+    size_t aux_len = b->data + b->l_data - bam_get_aux(b);
+    if (bam_set1(b2, b->core.l_qname, bam_get_qname(b),
+                 (b->core.flag | BAM_FUNMAP) & ~BAM_FREVERSE,
+                 -1, -1, 0,    // refid, pos, mapq
+                 0, NULL,      // cigar
+                 -1, -1, 0,    // rnext, pnext, tlen
+                 seq_len, seq, qual,
+                 aux_len) < 0)
+        goto err;
+
+    uint8_t *q = bam_get_qual(b2);
+    if (qual) {
+        int i;
+        for (i = 0; i < seq_len; i++)
+            q[i] -= '!';
+    } else {
+        memset(q, opts->def_qual, seq_len);
+    }
+
+    memcpy(bam_get_aux(b2), bam_get_aux(b), aux_len);
+    b2->l_data += aux_len;
+    if (sam_write1(fp, state->h, b2) < 0)
+        goto err;
+
+    ret = 0;
+ err:
+    if (b2)
+        bam_destroy1(b2);
+    return ret;
+}
+
+int output_index(bam1_t *b1, bam1_t *b2, bam2fq_state_t *state,
+                 bam2fq_opts_t* opts) {
+    bam1_t *b[2] = {b1, b2};
+
+    char *ifmt = opts->index_format;
+    if (!ifmt)
+        ifmt = "i*i*";
+
+    // Get seq / qual elements
+    char *bc = NULL, *qt = NULL;
+    if (b1)
+        bc = (char *)bam_aux_get(b1, opts->barcode_tag);
+    if (b2 && !bc)
+        bc = (char *)bam_aux_get(b2, opts->barcode_tag);
+    if (!bc)
+        return 0;
+    else
+        bc++; // skip Z
+
+    if (b1)
+        qt = (char *)bam_aux_get(b1, opts->quality_tag);
+    if (b2 && !qt)
+        qt = (char *)bam_aux_get(b2, opts->quality_tag);
+    if (qt && strlen(bc) != strlen(qt)-1)
+        qt = NULL;
+    else if (qt)
+        qt++;
+
+    int inum = 0;
+    while (inum < 2) {
+        char fc = *ifmt++;
+        if (!fc)
+            break; // ran out of index-format
+
+        long len, rem = 0;
+        if (isdigit(*ifmt)) {
+            rem = len = strtol(ifmt, &ifmt, 10);
+        } else {
+            ifmt++;
+            len = 0;
+        }
+
+        char *bc_end = bc, *qt_end = qt;
+        while (len ? *bc_end && rem-- : isalpha(*bc_end))
+            bc_end++, qt_end += qt != NULL;
+
+        switch (fc) {
+        case 'n':
+            // skip
+            bc = bc_end + (len==0);
+            if (qt)
+                qt = qt_end + (len==0);
+            break;
+
+        case 'i':
+            if (write_index_rec(state->fpi[inum], b[inum], state, opts,
+                                bc, bc_end-bc, qt, qt_end-qt) < 0)
+                return -1;
+            bc = bc_end + (len==0);
+            if (qt)
+                qt = qt_end + (len==0);
+            inum++;
+            break;
+
+        default:
+            fprintf(samtools_stderr, "Unknown index-format code\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int flush_rec(bam2fq_state_t *state, bam2fq_opts_t* opts,
+                     bam1_t *b[4], int score[3], int best[3],
+                     int64_t *n_singletons) {
+    // Paired data, with 1 or 2 ends present.
+    if (score[1] > 0 && score[2] > 0) {
+        // If CASAVA tag is required and barcode is only on R1,
+        // copy it to R2
+        if (state->illumina_tag) {
+            char *tag;
+            if ((tag = (char *)bam_aux_get(b[best[1]],
+                                           opts->barcode_tag)))
+                if (bam_aux_update_str(b[best[2]],
+                                       opts->barcode_tag,
+                                       strlen(tag), tag+1) < 0)
+                    goto err;
+            if ((tag = (char *)bam_aux_get(b[best[1]],
+                                           opts->quality_tag)))
+                if (bam_aux_update_str(b[best[2]],
+                                       opts->quality_tag,
+                                       strlen(tag), tag+1) < 0)
+                    goto err;
+
+        }
+        if (sam_write1(state->fpr[1], state->h, b[best[1]]) < 0)
+            goto err;
+        if (sam_write1(state->fpr[2], state->h, b[best[2]]) < 0)
+            goto err;
+
+        if (output_index(b[best[1]], b[best[2]], state, opts) < 0)
+            goto err;
+    } else if (score[1] > 0 || score[2] > 0) {
+        if (state->fpse) {
+            // print whichever one exists to fpse
+            if (score[1] > 0) {
+                if (sam_write1(state->fpse, state->h, b[best[1]]) < 0)
+                    goto err;
+            } else {
+                if (sam_write1(state->fpse, state->h, b[best[2]]) < 0)
+                    goto err;
+            }
+            ++(*n_singletons);
+        } else {
+            if (score[1] > 0) {
+                if (sam_write1(state->fpr[1], state->h, b[best[1]]) < 0)
+                    goto err;
+            } else {
+                if (sam_write1(state->fpr[2], state->h, b[best[2]]) < 0)
+                    goto err;
+            }
+        }
+
+        if (output_index(score[1] > 0 ? b[best[1]] : NULL,
+                         score[2] > 0 ? b[best[2]] : NULL,
+                         state, opts) < 0)
+            goto err;
+    }
+
+    if (score[0]) { // single ended data (neither READ1 nor READ2)
+        if (sam_write1(state->fpr[0], state->h, b[best[0]]) < 0)
+            goto err;
+
+        if (output_index(b[best[0]], NULL, state, opts) < 0)
+            goto err;
+    }
+
+    return 0;
+
+ err:
+    return -1;
+}
+
 static bool bam2fq_mainloop(bam2fq_state_t *state, bam2fq_opts_t* opts)
 {
     int n;
-    bam1_t *records[3] = {NULL, NULL, NULL};
     char *current_qname = NULL;
     int64_t n_reads = 0, n_singletons = 0; // Statistics
-    kstring_t linebuf[3] = {{0,0,NULL},{0,0,NULL},{0,0,NULL}};
     int score[3];
     int at_eof;
-    bool valid = true;
-    bam1_t* b = NULL;
+    bool valid = false;
+    int best[3] = {-1, -1, -1}; // map R0, R1, single to b[] indices;
+                                // indexed by [readpart]
+    bam1_t *b[4];               // 3 readparts, plus current record
 
-    while (true) {
-        if (!b)
-            b = bam_init1();
-        if (b == NULL) {
+    for (n = 0; n < 4; n++) {
+        if (!(b[n] = bam_init1())) {
             perror("[bam2fq_mainloop] Malloc error for bam record buffer.");
-            valid = false;
-            break;
+            return false;
         }
-        int res = sam_read1(state->fp, state->h, b);
+    }
+
+    n = 0;
+    while (true) {
+        int res = sam_read1(state->fp, state->h, b[n]);
         if (res < -1) {
             fprintf(samtools_stderr, "[bam2fq_mainloop] Failed to read bam record.\n");
-            valid = false;
-            break;
+            goto err;
         }
         at_eof = res < 0;
 
-        if (!at_eof && filter_it_out(b, state))
+        if (!at_eof && filter_it_out(b[n], state))
             continue;
-        if (!at_eof) ++n_reads;
-
-        if (at_eof || !current_qname || (strcmp(current_qname, bam_get_qname(b)) != 0)) {
-            if (current_qname) {
-                if (state->illumina_tag) {
-                    for (n=0; valid && n<3; n++) {
-                        if (!records[n]) continue;
-                        if (insert_index_sequence_into_linebuf(state->index_sequence, &linebuf[n], records[n]) < 0) valid = false;
-                    }
-                    if (!valid) break;
-                }
-                free(state->index_sequence); state->index_sequence = NULL;
-                if (score[1] > 0 && score[2] > 0) {
-                    // print linebuf[1] to fpr[1], linebuf[2] to fpr[2]
-                    if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                    if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                } else if (score[1] > 0 || score[2] > 0) {
-                    if (state->fpse) {
-                        // print whichever one exists to fpse
-                        if (score[1] > 0) {
-                            if (bgzf_write(state->fpse, linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                        } else {
-                            if (bgzf_write(state->fpse, linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                        }
-                        ++n_singletons;
-                    } else {
-                        if (score[1] > 0) {
-                            if (bgzf_write(state->fpr[1], linebuf[1].s, linebuf[1].l) < 0) { valid = false; break; }
-                        } else {
-                            if (bgzf_write(state->fpr[2], linebuf[2].s, linebuf[2].l) < 0) { valid = false; break; }
-                        }
-                    }
-                }
-                if (score[0]) { // TODO: check this
-                    // print linebuf[0] to fpr[0]
-                    if (bgzf_write(state->fpr[0], linebuf[0].s, linebuf[0].l) < 0) { valid = false; break; }
-                }
+        if (!at_eof) {
+            ++n_reads;
+
+            // Handle -O option: use OQ for qual
+            uint8_t *oq;
+            if (state->use_oq && (oq = bam_aux_get(b[n],"OQ")) && *oq == 'Z') {
+                int i, l = strlen((char *)++oq);
+                uint8_t *qual = bam_get_qual(b[n]);
+                for (i = 0; i < l && i < b[n]->core.l_qseq; i++)
+                    qual[i] = oq[i] - '!';
             }
+        }
 
+        if (at_eof
+            || !current_qname
+            || (strcmp(current_qname, bam_get_qname(b[n])) != 0)) {
+            // New name, so flush best examples of previous name.
+            if (current_qname)
+                if (flush_rec(state, opts, b, score, best, &n_singletons) < 0)
+                    goto err;
 
-            free(current_qname); current_qname = NULL;
+            current_qname = bam_get_qname(b[n]);
             score[0] = score[1] = score[2] = 0;
-            for (n=0; n < 3; n++) {
-                bam_destroy1(records[n]); records[n]=NULL;
-            }
 
             if (at_eof) { break; }
-
-            current_qname = strdup(bam_get_qname(b));
-            if (!current_qname) { valid = false; break; }
         }
 
         // Prefer a copy of the read that has base qualities
-        int b_score = bam_get_qual(b)[0] != 0xff? 2 : 1;
-        readpart rp = which_readpart(b);
-        if (b_score > score[rp]) {
-            if (!tags2fq(b, state, opts)) { valid = false; break; }
-            if (records[rp]) bam_destroy1(records[rp]);
-            records[rp] = b;
+        int b_score = bam_get_qual(b[n])[0] != 0xff? 2 : 1;
+        readpart rp = which_readpart(b[n]);
+        if (score[rp] < b_score) {
             score[rp] = b_score;
-            b = NULL;
-            if(!bam1_to_fq(records[rp], &linebuf[rp], state)) {
-                fprintf(samtools_stderr, "[%s] Error converting read to FASTA/Q\n", __func__);
-                valid = false; break;
-            }
+            // Record b[n] slot for best copy of readpair and find a new
+            // slot for next bam read
+            best[rp] = n;
+            int used_slot[4] = {0}, i;
+            for (i = 0; i < 3; i++)
+                if (best[i] >= 0)
+                    used_slot[best[i]] = 1;
+            for (i = 0; i < 4 && used_slot[i]; i++)
+                ;
+            n = i;
         }
     }
+
+    valid = true;
+ err:
     if (!valid)
-    {
-        perror("[bam2fq_mainloop] Error writing to FASTx files.");
-    }
-    bam_destroy1(b);
-    for (n=0; n < 3; n++) {
-        bam_destroy1(records[n]);
-    }
-    free(current_qname);
-    free(linebuf[0].s);
-    free(linebuf[1].s);
-    free(linebuf[2].s);
-    fprintf(samtools_stderr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
-    fprintf(samtools_stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+        print_error_errno("bam2fq", "Error writing to FASTx files.");
+
+    for (n = 0; n < 4; n++)
+        bam_destroy1(b[n]);
+
+    fprintf(samtools_stderr, "[M::%s] discarded %" PRId64 " singletons\n",
+            __func__, n_singletons);
+    fprintf(samtools_stderr, "[M::%s] processed %" PRId64 " reads\n",
+            __func__, n_reads);
 
     return valid;
 }
 
 int main_bam2fq(int argc, char *argv[])
 {
-    int status = EXIT_SUCCESS;
+    int status = EXIT_FAILURE;
     bam2fq_opts_t* opts = NULL;
     bam2fq_state_t* state = NULL;
 
     bool valid = parse_opts(argc, argv, &opts);
     if (!valid || opts == NULL) return valid ? EXIT_SUCCESS : EXIT_FAILURE;
 
-    if (!init_state(opts, &state)) return EXIT_FAILURE;
+    if (!init_state(opts, &state)) goto err;
+
+    if (!bam2fq_mainloop(state,opts)) goto err;
 
-    if (!bam2fq_mainloop(state,opts)) status = EXIT_FAILURE;
+    if (!destroy_state(opts, state, &status)) goto err;
 
-    if (!destroy_state(opts, state, &status)) return EXIT_FAILURE;
+    status = EXIT_SUCCESS;
+ err:
     sam_global_args_free(&opts->ga);
     free_opts(opts);
 
diff --git a/samtools/bam_flags.c b/samtools/bam_flags.c
index 11a82b6..78312ee 100644
--- a/samtools/bam_flags.c
+++ b/samtools/bam_flags.c
@@ -1,6 +1,6 @@
 /*  bam_flags.c -- flags subcommand.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -32,38 +32,54 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <stdarg.h>
 #include <htslib/sam.h>
+#include "samtools.h"
 
-static void usage(void)
+static void usage(FILE *fp)
 {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "About: Convert between textual and numeric flag representation\n");
-    fprintf(stderr, "Usage: samtools flags INT|STR[,...]\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, "Flags:\n");
-    fprintf(stderr, "\t0x%x\tPAIRED        .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
-    fprintf(stderr, "\t0x%x\tPROPER_PAIR   .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
-    fprintf(stderr, "\t0x%x\tUNMAP         .. segment unmapped\n", BAM_FUNMAP);
-    fprintf(stderr, "\t0x%x\tMUNMAP        .. next segment in the template unmapped\n", BAM_FMUNMAP);
-    fprintf(stderr, "\t0x%x\tREVERSE       .. SEQ is reverse complemented\n", BAM_FREVERSE);
-    fprintf(stderr, "\t0x%x\tMREVERSE      .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
-    fprintf(stderr, "\t0x%x\tREAD1         .. the first segment in the template\n", BAM_FREAD1);
-    fprintf(stderr, "\t0x%x\tREAD2         .. the last segment in the template\n", BAM_FREAD2);
-    fprintf(stderr, "\t0x%x\tSECONDARY     .. secondary alignment\n", BAM_FSECONDARY);
-    fprintf(stderr, "\t0x%x\tQCFAIL        .. not passing quality controls\n", BAM_FQCFAIL);
-    fprintf(stderr, "\t0x%x\tDUP           .. PCR or optical duplicate\n", BAM_FDUP);
-    fprintf(stderr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
-    fprintf(stderr, "\n");
+    static const struct { int bit; const char *desc; } *fl, flags[] = {
+        { BAM_FPAIRED, "paired-end / multiple-segment sequencing technology" },
+        { BAM_FPROPER_PAIR, "each segment properly aligned according to aligner" },
+        { BAM_FUNMAP, "segment unmapped" },
+        { BAM_FMUNMAP, "next segment in the template unmapped" },
+        { BAM_FREVERSE, "SEQ is reverse complemented" },
+        { BAM_FMREVERSE, "SEQ of next segment in template is rev.complemented" },
+        { BAM_FREAD1, "the first segment in the template" },
+        { BAM_FREAD2, "the last segment in the template" },
+        { BAM_FSECONDARY, "secondary alignment" },
+        { BAM_FQCFAIL, "not passing quality controls or other filters" },
+        { BAM_FDUP, "PCR or optical duplicate" },
+        { BAM_FSUPPLEMENTARY, "supplementary alignment" },
+        { 0, NULL }
+    };
+
+    fprintf(fp,
+"About: Convert between textual and numeric flag representation\n"
+"Usage: samtools flags FLAGS...\n"
+"\n"
+"Each FLAGS argument is either an INT (in decimal/hexadecimal/octal) representing\n"
+"a combination of the following numeric flag values, or a comma-separated string\n"
+"NAME,...,NAME representing a combination of the following flag names:\n"
+"\n");
+    for (fl = flags; fl->desc; fl++) {
+        char *name = bam_flag2str(fl->bit);
+        fprintf(fp, "%#6x %5d  %-15s%s\n", fl->bit, fl->bit, name, fl->desc);
+        free(name);
+    }
 }
 
 
 int main_flags(int argc, char *argv[])
 {
-    if ( argc!=2 ) usage();
-    else
+    if ( argc < 2 ) { usage(stdout); return 0; }
+
+    int i;
+    for (i = 1; i < argc; i++)
     {
-        int mask = bam_str2flag(argv[1]);
-        if ( mask<0 ) { fprintf(stderr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
-        printf("0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
+        int mask = bam_str2flag(argv[i]);
+        if ( mask<0 ) { print_error("flags", "Could not parse \"%s\"", argv[i]); usage(stderr); return 1; }
+        char *str = bam_flag2str(mask);
+        printf("0x%x\t%d\t%s\n", mask, mask, str);
+        free(str);
     }
     return 0;
 }
diff --git a/samtools/bam_flags.c.pysam.c b/samtools/bam_flags.c.pysam.c
index 9c6424f..b3a9d29 100644
--- a/samtools/bam_flags.c.pysam.c
+++ b/samtools/bam_flags.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_flags.c -- flags subcommand.
 
-    Copyright (C) 2013-2014 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
 
@@ -34,38 +34,54 @@ DEALINGS IN THE SOFTWARE.  */
 #include <unistd.h>
 #include <stdarg.h>
 #include <htslib/sam.h>
+#include "samtools.h"
 
-static void usage(void)
+static void usage(FILE *fp)
 {
-    fprintf(samtools_stderr, "\n");
-    fprintf(samtools_stderr, "About: Convert between textual and numeric flag representation\n");
-    fprintf(samtools_stderr, "Usage: samtools flags INT|STR[,...]\n");
-    fprintf(samtools_stderr, "\n");
-    fprintf(samtools_stderr, "Flags:\n");
-    fprintf(samtools_stderr, "\t0x%x\tPAIRED        .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
-    fprintf(samtools_stderr, "\t0x%x\tPROPER_PAIR   .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
-    fprintf(samtools_stderr, "\t0x%x\tUNMAP         .. segment unmapped\n", BAM_FUNMAP);
-    fprintf(samtools_stderr, "\t0x%x\tMUNMAP        .. next segment in the template unmapped\n", BAM_FMUNMAP);
-    fprintf(samtools_stderr, "\t0x%x\tREVERSE       .. SEQ is reverse complemented\n", BAM_FREVERSE);
-    fprintf(samtools_stderr, "\t0x%x\tMREVERSE      .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
-    fprintf(samtools_stderr, "\t0x%x\tREAD1         .. the first segment in the template\n", BAM_FREAD1);
-    fprintf(samtools_stderr, "\t0x%x\tREAD2         .. the last segment in the template\n", BAM_FREAD2);
-    fprintf(samtools_stderr, "\t0x%x\tSECONDARY     .. secondary alignment\n", BAM_FSECONDARY);
-    fprintf(samtools_stderr, "\t0x%x\tQCFAIL        .. not passing quality controls\n", BAM_FQCFAIL);
-    fprintf(samtools_stderr, "\t0x%x\tDUP           .. PCR or optical duplicate\n", BAM_FDUP);
-    fprintf(samtools_stderr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
-    fprintf(samtools_stderr, "\n");
+    static const struct { int bit; const char *desc; } *fl, flags[] = {
+        { BAM_FPAIRED, "paired-end / multiple-segment sequencing technology" },
+        { BAM_FPROPER_PAIR, "each segment properly aligned according to aligner" },
+        { BAM_FUNMAP, "segment unmapped" },
+        { BAM_FMUNMAP, "next segment in the template unmapped" },
+        { BAM_FREVERSE, "SEQ is reverse complemented" },
+        { BAM_FMREVERSE, "SEQ of next segment in template is rev.complemented" },
+        { BAM_FREAD1, "the first segment in the template" },
+        { BAM_FREAD2, "the last segment in the template" },
+        { BAM_FSECONDARY, "secondary alignment" },
+        { BAM_FQCFAIL, "not passing quality controls or other filters" },
+        { BAM_FDUP, "PCR or optical duplicate" },
+        { BAM_FSUPPLEMENTARY, "supplementary alignment" },
+        { 0, NULL }
+    };
+
+    fprintf(fp,
+"About: Convert between textual and numeric flag representation\n"
+"Usage: samtools flags FLAGS...\n"
+"\n"
+"Each FLAGS argument is either an INT (in decimal/hexadecimal/octal) representing\n"
+"a combination of the following numeric flag values, or a comma-separated string\n"
+"NAME,...,NAME representing a combination of the following flag names:\n"
+"\n");
+    for (fl = flags; fl->desc; fl++) {
+        char *name = bam_flag2str(fl->bit);
+        fprintf(fp, "%#6x %5d  %-15s%s\n", fl->bit, fl->bit, name, fl->desc);
+        free(name);
+    }
 }
 
 
 int main_flags(int argc, char *argv[])
 {
-    if ( argc!=2 ) usage();
-    else
+    if ( argc < 2 ) { usage(samtools_stdout); return 0; }
+
+    int i;
+    for (i = 1; i < argc; i++)
     {
-        int mask = bam_str2flag(argv[1]);
-        if ( mask<0 ) { fprintf(samtools_stderr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
-        fprintf(samtools_stdout, "0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
+        int mask = bam_str2flag(argv[i]);
+        if ( mask<0 ) { print_error("flags", "Could not parse \"%s\"", argv[i]); usage(samtools_stderr); return 1; }
+        char *str = bam_flag2str(mask);
+        fprintf(samtools_stdout, "0x%x\t%d\t%s\n", mask, mask, str);
+        free(str);
     }
     return 0;
 }
diff --git a/samtools/bam_import.c b/samtools/bam_import.c
new file mode 100644
index 0000000..daf6b17
--- /dev/null
+++ b/samtools/bam_import.c
@@ -0,0 +1,487 @@
+/* bam_import -- Import of FASTQ files.
+ *
+ *   samtools import -1 a_1.fq -2 a_2.fq --i1 a_i1.fq --i2 a_i2.fq
+ *   samtools import a_1.fq a_2.fq
+ *   samtools import a_interleaved.fq
+ *
+ * Copyright (C) 2020 Genome Research Ltd.
+ *
+ * Author: James Bonfield <jkb@sanger.ac.uk>
+ */
+
+/*
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notices and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+// TODO: Store other non-aux comments; in new sam tag?
+
+#include <config.h>
+#include <ctype.h>
+
+#include "htslib/sam.h"
+#include "htslib/thread_pool.h"
+
+#include "samtools.h"
+#include "sam_opts.h"
+
+static int usage(FILE *fp, int exit_status) {
+    fprintf(fp, "Usage: samtools import [options] [file.fastq ...]\n");
+    fprintf(fp, "\n");
+    fprintf(fp, "Options:\n");
+    fprintf(fp, "  -s FILE      Read paired-ended data from single FILE\n");
+    fprintf(fp, "  -0 FILE      Read single-ended data from FILE\n");
+    fprintf(fp, "  -1 FILE      Read-1 from FILE\n");
+    fprintf(fp, "  -2 FILE      Read-2 from FILE\n");
+    fprintf(fp, "  --i1 FILE    Index-1 from FILE\n");
+    fprintf(fp, "  --i2 FILE    Index-2 from FILE\n");
+    fprintf(fp, "  -i           Parse CASAVA identifier\n");
+    fprintf(fp, "  --barcode-tag TAG\n");
+    fprintf(fp, "               Tag to use with barcode sequences [BC]\n");
+    fprintf(fp, "  --quality-tag TAG\n");
+    fprintf(fp, "               Tag to use with barcode qualities [QT]\n");
+    fprintf(fp, "  -r STRING    Build up a complete @RG line\n");
+    fprintf(fp, "  -R STRING    Add a simple RG line of \"@RG\\tID:STRING\"\n");
+    fprintf(fp, "  -T TAGLIST   Parse tags in SAM format; list of '*' for all\n");
+    fprintf(fp, "  -o FILE      Output to FILE instead of stdout\n");
+    fprintf(fp, "  -u           Uncompressed output\n");
+    fprintf(fp, "  --order TAG  Store Nth record count in TAG\n");
+    fprintf(fp, "\n");
+    sam_global_opt_help(fp, "-.O.-@--");
+
+    fprintf(fp, "\nA single fastq file will be interpreted as -s, -0 or -1 depending on\n");
+    fprintf(fp, "file contents, and a pair of fastq files as \"-1 FILE1 -2 FILE2\".\n");
+
+    return exit_status;
+}
+
+// Order matters here as we want to read index elements before main
+// sequences so on reading the seqs we can emit a fully annotated record.
+enum fileno {
+    FQ_I1, FQ_I2, // index seqs for R1 and R2
+    FQ_R0,        // single file and unpaired data (singled-ended tech).
+    FQ_R1, FQ_R2, // separate read1 and read2 files
+    FQ_SINGLE,    // single file, but with read1 and/or read2 present.
+    FQ_END
+};
+
+typedef struct {
+    sam_global_args ga;
+    int no_pg;
+    char *fn[FQ_END], *fn_out;
+    int idx_both;      // add index to READ2 too, not just READ1
+    int casava;
+    char *barcode_seq;
+    char *barcode_qual;
+    char *aux;
+    char *rg;
+    char *rg_line;
+    char *order;
+    int compress_level;
+    htsThreadPool p;
+} opts_t;
+
+// Append a sequence and quality string from a BAM record to a BC:Z and
+// QT:Z style aux tag string.
+static int append_index(kstring_t *s, kstring_t *q, bam1_t *b) {
+    char *sp, *qp;
+    if (ks_resize(s, s->l + b->core.l_qseq+1 +1) < 0)
+        return -1;
+    if (ks_resize(q, q->l + b->core.l_qseq+1 +1) < 0)
+        return -1;
+
+    sp = s->s + s->l - (s->l > 0);
+    qp = q->s + q->l - (q->l > 0);
+
+    if (s->l)
+        *sp++ = '-';
+
+    if (q->l)
+        *qp++ = ' ';
+
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    uint8_t *qual = bam_get_qual(b);
+    for (i = 0; i < b->core.l_qseq; i++) {
+        *sp++ = seq_nt16_str[bam_seqi(seq, i)];
+        *qp++ = qual[i] + '!';
+    }
+    *sp++ = 0;
+    *qp++ = 0;
+
+    s->l = sp - s->s;
+    q->l = qp - q->s;
+
+    return 0;
+}
+
+static int import_fastq(int argc, char **argv, opts_t *opts) {
+    int i, n, ret = 0;
+    samFile *fp_in[FQ_END] = {NULL};
+    bam1_t *b = bam_init1();
+    int ids[FQ_END];
+    samFile *fp_out = NULL;
+    sam_hdr_t *hdr_out = NULL;
+    kstring_t index_str = {0,0};
+    kstring_t read_str = {0,0};
+    char *rg = opts->rg;
+    kstring_t rg_line = {0,0};
+    uint64_t read_num = 0;
+    kstring_t idx_seq  = {0};
+    kstring_t idx_qual = {0};
+
+    // Any additional arguments are assumed to be r1 r2, as a
+    // short cut. We support reading index tags out of those too (eg
+    // Illumina CASAVA format), but if we do that we lack the barcode
+    // quality string.
+    //
+    // We also consider a read name ending in /1 or /2 to be a single
+    // file containing interleaved fastq records for both ends.
+    // These will be labeled as fn[FQ_R1] but adjusted during reading.
+    if (argc == 1)
+        opts->fn[FQ_SINGLE] = argv[0];
+    else
+        for (i = 0; i < 4; i++)
+            if (argc > i)
+                opts->fn[FQ_R1+i] = argv[i];
+
+    // Open all files
+    for (i = n = 0; i < FQ_END; i++) {
+        if (!opts->fn[i])
+            continue;
+        fp_in[i] = sam_open_format(opts->fn[i], "r", &opts->ga.in);
+        if (!fp_in[i]) {
+            perror(opts->fn[i]);
+            ret = -1;
+            goto err;
+        }
+        if (opts->p.pool)
+            hts_set_thread_pool(fp_in[i], &opts->p);
+        ids[n++] = i;
+
+        if (opts->casava)
+            hts_set_opt(fp_in[i], FASTQ_OPT_CASAVA, 1);
+        if (opts->barcode_seq) // for auto-CASAVA parsing
+            hts_set_opt(fp_in[i], FASTQ_OPT_BARCODE, opts->barcode_seq);
+        if (opts->aux)
+            hts_set_opt(fp_in[i], FASTQ_OPT_AUX,
+                        *opts->aux == '*' || *opts->aux == '\0'
+                        ? NULL : opts->aux);
+
+        switch (i) {
+        case FQ_I1:
+            kputs("--i1 I1.fastq ", &read_str);
+            kputs("i*", &index_str);
+            break;
+        case FQ_I2:
+            kputs("--i2 I2.fastq ", &read_str);
+            kputs("i*", &index_str);
+            break;
+
+        case FQ_R0:
+            kputs("-0 unpaired.fastq ", &read_str);
+            break;
+
+        case FQ_R1:
+            kputs("-1 R1.fastq ", &read_str);
+            break;
+
+        case FQ_R2:
+            kputs("-2 R2.fastq ", &read_str);
+            break;
+
+        case FQ_SINGLE:
+            kputs("-N -o paired.fastq ", &read_str);
+            break;
+
+        default:
+            ks_clear(&read_str); // not reversible
+            kputs("", &read_str);
+        }
+    }
+    if (n == 0) {
+        bam_destroy1(b);
+        return usage(stdout, EXIT_SUCCESS);
+    }
+
+    char out_mode[10] = {'w', 0, 0};
+    if (opts->compress_level != -1)
+        out_mode[1] = '0' + opts->compress_level;
+    sam_open_mode(out_mode+strlen(out_mode), opts->fn_out, NULL);
+    fp_out = sam_open_format(opts->fn_out, out_mode, &opts->ga.out);
+    if (!fp_out) {
+        perror(opts->fn_out);
+        goto err;
+    }
+    if (opts->p.pool)
+        hts_set_thread_pool(fp_out, &opts->p);
+
+    // Create header
+    if (ks_len(&read_str)) {
+        char CO[2100];
+        if (ks_len(&index_str))
+            snprintf(CO, sizeof(CO), "@CO\tReverse with: samtools fastq %s "
+                    "--index-format=\"%s\"\n",
+                    ks_str(&read_str), ks_str(&index_str));
+        else
+            snprintf(CO, sizeof(CO), "@CO\tReverse with: samtools fastq %s\n",
+                    ks_str(&read_str));
+
+        hdr_out = sam_hdr_parse(strlen(CO), CO);
+    } else {
+        hdr_out = sam_hdr_init();
+    }
+
+    // Read group
+    if (opts->rg_line) {
+        if (*opts->rg_line != '@')
+            ksprintf(&rg_line, "@RG\t%s", opts->rg_line);
+        else
+            kputs(opts->rg_line, &rg_line);
+    } else if (opts->rg) {
+        ksprintf(&rg_line, "@RG\tID:%s", opts->rg);
+    }
+
+    if (ks_len(&rg_line)) {
+        if (sam_hdr_add_lines(hdr_out, ks_str(&rg_line), 0) < 0)
+            goto err;
+        rg = strstr(ks_str(&rg_line), "\tID:");
+        if (!rg) {
+            fprintf(stderr, "\"-r RG-LINE\" option contained no ID field\n");
+            goto err;
+        }
+        rg += 4;
+
+        i = 0;
+        while (rg[i] != '\t' && rg[i] != '\0')
+            i++;
+        rg[i] = 0;
+    }
+
+    if ((ret = sam_hdr_write(fp_out, hdr_out)) < 0)
+        goto err;
+
+
+    // Interleave / combine from n files (ids[0..n-1]).
+    int res;
+    int eof = 0;
+    do {
+        idx_seq.l = idx_qual.l = 0;
+        for (i = 0; i < n; i++) {
+            if ((res = sam_read1(fp_in[ids[i]], NULL, b)) < 0) {
+                if (res == -1) {
+                    eof++;
+                    continue;
+                } else
+                    break;
+            }
+
+            // index
+            if (ids[i] == FQ_I1 || ids[i] == FQ_I2) {
+                if (append_index(&idx_seq, &idx_qual, b) < 0) {
+                    res = -1;
+                    break;
+                }
+                continue;
+            }
+
+            // full read
+            if (idx_seq.l) {
+                if (opts->idx_both || ids[i] == FQ_SINGLE ||
+                    ids[i] == FQ_R0 || ids[i] == FQ_R1) {
+                    if (bam_aux_append(b, opts->barcode_seq, 'Z', idx_seq.l,
+                                       (uint8_t *)idx_seq.s) ||
+                        bam_aux_append(b, opts->barcode_qual, 'Z', idx_qual.l,
+                                       (uint8_t *)idx_qual.s)) {
+                        res = -1;
+                        break;
+                    }
+                }
+            }
+
+            switch(ids[i]) {
+            case FQ_R0:
+                // unpaired; no flags to declare
+                break;
+            case FQ_SINGLE:
+                // paired (but don't know if R1 or R2) or unpaired.
+                // We rely on the /1 and /2 read suffix parsing in htslib
+                // to distinguish the two cases, or CASAVA tags if
+                // explicitly enabled.
+                break;
+            case FQ_R1:
+                if ((b->core.flag & (BAM_FREAD1 | BAM_FREAD2)) == 0)
+                    b->core.flag |= BAM_FREAD1;
+                b->core.flag |= BAM_FPAIRED;
+                if (i+1 < n && ids[i+1] == FQ_R2)
+                    b->core.flag |= BAM_FMUNMAP;
+                break;
+            case FQ_R2:
+                b->core.flag |= BAM_FPAIRED | BAM_FREAD2;
+                if (i > 0 && ids[i-1] == FQ_R1)
+                    b->core.flag |= BAM_FMUNMAP;
+                break;
+            }
+
+            if (rg) {
+                if (bam_aux_append(b, "RG", 'Z', strlen(rg)+1,
+                                   (uint8_t *)rg) < 0) {
+                    ret = -1;
+                    goto err;
+                }
+            }
+
+            if (opts->order) {
+                if (bam_aux_update_int(b, opts->order, read_num++) < 0) {
+                    ret = -1;
+                    goto err;
+                }
+            }
+
+            res = sam_write1(fp_out, hdr_out, b);
+        }
+    } while (res >= 0);
+
+    if (res != -1) {
+        print_error("import", "truncated file. Aborting");
+        ret = res;
+        goto err;
+    }
+
+    if (eof != n) {
+        print_error("import", "input files with differing number of records");
+        ret = -1;
+        goto err;
+    }
+
+    // Close and return
+    ret = 0;
+err:
+    bam_destroy1(b);
+    sam_hdr_destroy(hdr_out);
+    ks_free(&rg_line);
+    ks_free(&index_str);
+    ks_free(&read_str);
+    if (fp_out) {
+        if (sam_close(fp_out) < 0) {
+            perror(opts->fn_out);
+            ret |= -1;
+        }
+    }
+    for (i = 0; i < FQ_END; i++) {
+        if (fp_in[i] && sam_close(fp_in[i]) < 0) {
+            perror(opts->fn[i]);
+            ret |= -1;
+        }
+    }
+    ks_free(&idx_seq);
+    ks_free(&idx_qual);
+
+    return ret;
+}
+
+int main_import(int argc, char *argv[]) {
+    int c;
+    opts_t opts = {
+        .no_pg = 0,
+        .ga = SAM_GLOBAL_ARGS_INIT,
+        .fn = {NULL},
+        .fn_out = "-",
+        .casava = 0,
+        .barcode_seq = "BC",
+        .barcode_qual = "QT",
+        .aux = NULL,
+        .rg = NULL,
+        .rg_line = NULL,
+        .order = NULL,
+        .compress_level = -1,
+    };
+    kstring_t rg = {0};
+
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, '-', '@'),
+        {"no-PG", no_argument, NULL, 9},
+        {"i1", required_argument, NULL, 1},
+        {"i2", required_argument, NULL, 2},
+        {"r1", required_argument, NULL, '1'},
+        {"r2", required_argument, NULL, '2'},
+        {"rg", required_argument, NULL, 'R'},
+        {"rg-line", required_argument, NULL, 'r'},
+        {"order", required_argument, NULL, 3},
+        {"barcode-tag", required_argument, NULL, 4},
+        {"quality-tag", required_argument, NULL, 5},
+        { NULL, 0, NULL, 0 }
+    };
+
+    while ((c = getopt_long(argc, argv, "1:2:s:0:bhiT:r:R:o:O:u@:", lopts, NULL)) >= 0) {
+        switch (c) {
+        case 'b': opts.idx_both = 1; break;
+        case '0': opts.fn[FQ_R0] = optarg; break;
+        case '1': opts.fn[FQ_R1] = optarg; break;
+        case '2': opts.fn[FQ_R2] = optarg; break;
+        case  1:  opts.fn[FQ_I1] = optarg; break;
+        case  2:  opts.fn[FQ_I2] = optarg; break;
+        case 's': opts.fn[FQ_SINGLE] = optarg; break;
+        case 'o': opts.fn_out = optarg; break;
+        case 'i': opts.casava = 1; break;
+        case  4:  opts.barcode_seq = optarg; break;
+        case  5:  opts.barcode_qual = optarg; break;
+        case 'T': opts.aux = optarg; break;
+        case 'u': opts.compress_level = 0; break;
+        case 'R': opts.rg = optarg; break;
+        case 'r':
+            if (*optarg != '@' && ks_len(&rg) == 0)
+                kputs("@RG", &rg);
+            if (ks_len(&rg))
+                kputc_('\t', &rg);
+            kputs(optarg, &rg);
+            opts.rg_line = rg.s;
+            break;
+
+        case 9: opts.no_pg = 1; break;
+        case 3: opts.order = optarg; break;
+
+        case 'h': return usage(stdout, EXIT_SUCCESS);
+        case '?': return usage(stderr, EXIT_FAILURE);
+
+        default:
+            if (parse_sam_global_opt(c, optarg, lopts, &opts.ga) != 0)
+                return usage(stderr, EXIT_FAILURE);
+            break;
+        }
+    }
+
+    if (opts.ga.nthreads > 0) {
+        if (!(opts.p.pool = hts_tpool_init(opts.ga.nthreads))) {
+            fprintf(stderr, "Failed to create thread pool\n");
+            if (rg.s)
+                free(rg.s);
+            return -1;;
+        }
+    }
+
+    int ret = import_fastq(argc-optind, argv+optind, &opts) ? 1 : 0;
+
+    if (rg.s)
+        free(rg.s);
+
+    if (opts.p.pool)
+        hts_tpool_destroy(opts.p.pool);
+
+    return ret;
+}
diff --git a/samtools/bam_import.c.pysam.c b/samtools/bam_import.c.pysam.c
new file mode 100644
index 0000000..1307ac6
--- /dev/null
+++ b/samtools/bam_import.c.pysam.c
@@ -0,0 +1,489 @@
+#include "samtools.pysam.h"
+
+/* bam_import -- Import of FASTQ files.
+ *
+ *   samtools import -1 a_1.fq -2 a_2.fq --i1 a_i1.fq --i2 a_i2.fq
+ *   samtools import a_1.fq a_2.fq
+ *   samtools import a_interleaved.fq
+ *
+ * Copyright (C) 2020 Genome Research Ltd.
+ *
+ * Author: James Bonfield <jkb@sanger.ac.uk>
+ */
+
+/*
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notices and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+// TODO: Store other non-aux comments; in new sam tag?
+
+#include <config.h>
+#include <ctype.h>
+
+#include "htslib/sam.h"
+#include "htslib/thread_pool.h"
+
+#include "samtools.h"
+#include "sam_opts.h"
+
+static int usage(FILE *fp, int exit_status) {
+    fprintf(fp, "Usage: samtools import [options] [file.fastq ...]\n");
+    fprintf(fp, "\n");
+    fprintf(fp, "Options:\n");
+    fprintf(fp, "  -s FILE      Read paired-ended data from single FILE\n");
+    fprintf(fp, "  -0 FILE      Read single-ended data from FILE\n");
+    fprintf(fp, "  -1 FILE      Read-1 from FILE\n");
+    fprintf(fp, "  -2 FILE      Read-2 from FILE\n");
+    fprintf(fp, "  --i1 FILE    Index-1 from FILE\n");
+    fprintf(fp, "  --i2 FILE    Index-2 from FILE\n");
+    fprintf(fp, "  -i           Parse CASAVA identifier\n");
+    fprintf(fp, "  --barcode-tag TAG\n");
+    fprintf(fp, "               Tag to use with barcode sequences [BC]\n");
+    fprintf(fp, "  --quality-tag TAG\n");
+    fprintf(fp, "               Tag to use with barcode qualities [QT]\n");
+    fprintf(fp, "  -r STRING    Build up a complete @RG line\n");
+    fprintf(fp, "  -R STRING    Add a simple RG line of \"@RG\\tID:STRING\"\n");
+    fprintf(fp, "  -T TAGLIST   Parse tags in SAM format; list of '*' for all\n");
+    fprintf(fp, "  -o FILE      Output to FILE instead of samtools_stdout\n");
+    fprintf(fp, "  -u           Uncompressed output\n");
+    fprintf(fp, "  --order TAG  Store Nth record count in TAG\n");
+    fprintf(fp, "\n");
+    sam_global_opt_help(fp, "-.O.-@--");
+
+    fprintf(fp, "\nA single fastq file will be interpreted as -s, -0 or -1 depending on\n");
+    fprintf(fp, "file contents, and a pair of fastq files as \"-1 FILE1 -2 FILE2\".\n");
+
+    return exit_status;
+}
+
+// Order matters here as we want to read index elements before main
+// sequences so on reading the seqs we can emit a fully annotated record.
+enum fileno {
+    FQ_I1, FQ_I2, // index seqs for R1 and R2
+    FQ_R0,        // single file and unpaired data (singled-ended tech).
+    FQ_R1, FQ_R2, // separate read1 and read2 files
+    FQ_SINGLE,    // single file, but with read1 and/or read2 present.
+    FQ_END
+};
+
+typedef struct {
+    sam_global_args ga;
+    int no_pg;
+    char *fn[FQ_END], *fn_out;
+    int idx_both;      // add index to READ2 too, not just READ1
+    int casava;
+    char *barcode_seq;
+    char *barcode_qual;
+    char *aux;
+    char *rg;
+    char *rg_line;
+    char *order;
+    int compress_level;
+    htsThreadPool p;
+} opts_t;
+
+// Append a sequence and quality string from a BAM record to a BC:Z and
+// QT:Z style aux tag string.
+static int append_index(kstring_t *s, kstring_t *q, bam1_t *b) {
+    char *sp, *qp;
+    if (ks_resize(s, s->l + b->core.l_qseq+1 +1) < 0)
+        return -1;
+    if (ks_resize(q, q->l + b->core.l_qseq+1 +1) < 0)
+        return -1;
+
+    sp = s->s + s->l - (s->l > 0);
+    qp = q->s + q->l - (q->l > 0);
+
+    if (s->l)
+        *sp++ = '-';
+
+    if (q->l)
+        *qp++ = ' ';
+
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    uint8_t *qual = bam_get_qual(b);
+    for (i = 0; i < b->core.l_qseq; i++) {
+        *sp++ = seq_nt16_str[bam_seqi(seq, i)];
+        *qp++ = qual[i] + '!';
+    }
+    *sp++ = 0;
+    *qp++ = 0;
+
+    s->l = sp - s->s;
+    q->l = qp - q->s;
+
+    return 0;
+}
+
+static int import_fastq(int argc, char **argv, opts_t *opts) {
+    int i, n, ret = 0;
+    samFile *fp_in[FQ_END] = {NULL};
+    bam1_t *b = bam_init1();
+    int ids[FQ_END];
+    samFile *fp_out = NULL;
+    sam_hdr_t *hdr_out = NULL;
+    kstring_t index_str = {0,0};
+    kstring_t read_str = {0,0};
+    char *rg = opts->rg;
+    kstring_t rg_line = {0,0};
+    uint64_t read_num = 0;
+    kstring_t idx_seq  = {0};
+    kstring_t idx_qual = {0};
+
+    // Any additional arguments are assumed to be r1 r2, as a
+    // short cut. We support reading index tags out of those too (eg
+    // Illumina CASAVA format), but if we do that we lack the barcode
+    // quality string.
+    //
+    // We also consider a read name ending in /1 or /2 to be a single
+    // file containing interleaved fastq records for both ends.
+    // These will be labeled as fn[FQ_R1] but adjusted during reading.
+    if (argc == 1)
+        opts->fn[FQ_SINGLE] = argv[0];
+    else
+        for (i = 0; i < 4; i++)
+            if (argc > i)
+                opts->fn[FQ_R1+i] = argv[i];
+
+    // Open all files
+    for (i = n = 0; i < FQ_END; i++) {
+        if (!opts->fn[i])
+            continue;
+        fp_in[i] = sam_open_format(opts->fn[i], "r", &opts->ga.in);
+        if (!fp_in[i]) {
+            perror(opts->fn[i]);
+            ret = -1;
+            goto err;
+        }
+        if (opts->p.pool)
+            hts_set_thread_pool(fp_in[i], &opts->p);
+        ids[n++] = i;
+
+        if (opts->casava)
+            hts_set_opt(fp_in[i], FASTQ_OPT_CASAVA, 1);
+        if (opts->barcode_seq) // for auto-CASAVA parsing
+            hts_set_opt(fp_in[i], FASTQ_OPT_BARCODE, opts->barcode_seq);
+        if (opts->aux)
+            hts_set_opt(fp_in[i], FASTQ_OPT_AUX,
+                        *opts->aux == '*' || *opts->aux == '\0'
+                        ? NULL : opts->aux);
+
+        switch (i) {
+        case FQ_I1:
+            kputs("--i1 I1.fastq ", &read_str);
+            kputs("i*", &index_str);
+            break;
+        case FQ_I2:
+            kputs("--i2 I2.fastq ", &read_str);
+            kputs("i*", &index_str);
+            break;
+
+        case FQ_R0:
+            kputs("-0 unpaired.fastq ", &read_str);
+            break;
+
+        case FQ_R1:
+            kputs("-1 R1.fastq ", &read_str);
+            break;
+
+        case FQ_R2:
+            kputs("-2 R2.fastq ", &read_str);
+            break;
+
+        case FQ_SINGLE:
+            kputs("-N -o paired.fastq ", &read_str);
+            break;
+
+        default:
+            ks_clear(&read_str); // not reversible
+            kputs("", &read_str);
+        }
+    }
+    if (n == 0) {
+        bam_destroy1(b);
+        return usage(samtools_stdout, EXIT_SUCCESS);
+    }
+
+    char out_mode[10] = {'w', 0, 0};
+    if (opts->compress_level != -1)
+        out_mode[1] = '0' + opts->compress_level;
+    sam_open_mode(out_mode+strlen(out_mode), opts->fn_out, NULL);
+    fp_out = sam_open_format(opts->fn_out, out_mode, &opts->ga.out);
+    if (!fp_out) {
+        perror(opts->fn_out);
+        goto err;
+    }
+    if (opts->p.pool)
+        hts_set_thread_pool(fp_out, &opts->p);
+
+    // Create header
+    if (ks_len(&read_str)) {
+        char CO[2100];
+        if (ks_len(&index_str))
+            snprintf(CO, sizeof(CO), "@CO\tReverse with: samtools fastq %s "
+                    "--index-format=\"%s\"\n",
+                    ks_str(&read_str), ks_str(&index_str));
+        else
+            snprintf(CO, sizeof(CO), "@CO\tReverse with: samtools fastq %s\n",
+                    ks_str(&read_str));
+
+        hdr_out = sam_hdr_parse(strlen(CO), CO);
+    } else {
+        hdr_out = sam_hdr_init();
+    }
+
+    // Read group
+    if (opts->rg_line) {
+        if (*opts->rg_line != '@')
+            ksprintf(&rg_line, "@RG\t%s", opts->rg_line);
+        else
+            kputs(opts->rg_line, &rg_line);
+    } else if (opts->rg) {
+        ksprintf(&rg_line, "@RG\tID:%s", opts->rg);
+    }
+
+    if (ks_len(&rg_line)) {
+        if (sam_hdr_add_lines(hdr_out, ks_str(&rg_line), 0) < 0)
+            goto err;
+        rg = strstr(ks_str(&rg_line), "\tID:");
+        if (!rg) {
+            fprintf(samtools_stderr, "\"-r RG-LINE\" option contained no ID field\n");
+            goto err;
+        }
+        rg += 4;
+
+        i = 0;
+        while (rg[i] != '\t' && rg[i] != '\0')
+            i++;
+        rg[i] = 0;
+    }
+
+    if ((ret = sam_hdr_write(fp_out, hdr_out)) < 0)
+        goto err;
+
+
+    // Interleave / combine from n files (ids[0..n-1]).
+    int res;
+    int eof = 0;
+    do {
+        idx_seq.l = idx_qual.l = 0;
+        for (i = 0; i < n; i++) {
+            if ((res = sam_read1(fp_in[ids[i]], NULL, b)) < 0) {
+                if (res == -1) {
+                    eof++;
+                    continue;
+                } else
+                    break;
+            }
+
+            // index
+            if (ids[i] == FQ_I1 || ids[i] == FQ_I2) {
+                if (append_index(&idx_seq, &idx_qual, b) < 0) {
+                    res = -1;
+                    break;
+                }
+                continue;
+            }
+
+            // full read
+            if (idx_seq.l) {
+                if (opts->idx_both || ids[i] == FQ_SINGLE ||
+                    ids[i] == FQ_R0 || ids[i] == FQ_R1) {
+                    if (bam_aux_append(b, opts->barcode_seq, 'Z', idx_seq.l,
+                                       (uint8_t *)idx_seq.s) ||
+                        bam_aux_append(b, opts->barcode_qual, 'Z', idx_qual.l,
+                                       (uint8_t *)idx_qual.s)) {
+                        res = -1;
+                        break;
+                    }
+                }
+            }
+
+            switch(ids[i]) {
+            case FQ_R0:
+                // unpaired; no flags to declare
+                break;
+            case FQ_SINGLE:
+                // paired (but don't know if R1 or R2) or unpaired.
+                // We rely on the /1 and /2 read suffix parsing in htslib
+                // to distinguish the two cases, or CASAVA tags if
+                // explicitly enabled.
+                break;
+            case FQ_R1:
+                if ((b->core.flag & (BAM_FREAD1 | BAM_FREAD2)) == 0)
+                    b->core.flag |= BAM_FREAD1;
+                b->core.flag |= BAM_FPAIRED;
+                if (i+1 < n && ids[i+1] == FQ_R2)
+                    b->core.flag |= BAM_FMUNMAP;
+                break;
+            case FQ_R2:
+                b->core.flag |= BAM_FPAIRED | BAM_FREAD2;
+                if (i > 0 && ids[i-1] == FQ_R1)
+                    b->core.flag |= BAM_FMUNMAP;
+                break;
+            }
+
+            if (rg) {
+                if (bam_aux_append(b, "RG", 'Z', strlen(rg)+1,
+                                   (uint8_t *)rg) < 0) {
+                    ret = -1;
+                    goto err;
+                }
+            }
+
+            if (opts->order) {
+                if (bam_aux_update_int(b, opts->order, read_num++) < 0) {
+                    ret = -1;
+                    goto err;
+                }
+            }
+
+            res = sam_write1(fp_out, hdr_out, b);
+        }
+    } while (res >= 0);
+
+    if (res != -1) {
+        print_error("import", "truncated file. Aborting");
+        ret = res;
+        goto err;
+    }
+
+    if (eof != n) {
+        print_error("import", "input files with differing number of records");
+        ret = -1;
+        goto err;
+    }
+
+    // Close and return
+    ret = 0;
+err:
+    bam_destroy1(b);
+    sam_hdr_destroy(hdr_out);
+    ks_free(&rg_line);
+    ks_free(&index_str);
+    ks_free(&read_str);
+    if (fp_out) {
+        if (sam_close(fp_out) < 0) {
+            perror(opts->fn_out);
+            ret |= -1;
+        }
+    }
+    for (i = 0; i < FQ_END; i++) {
+        if (fp_in[i] && sam_close(fp_in[i]) < 0) {
+            perror(opts->fn[i]);
+            ret |= -1;
+        }
+    }
+    ks_free(&idx_seq);
+    ks_free(&idx_qual);
+
+    return ret;
+}
+
+int main_import(int argc, char *argv[]) {
+    int c;
+    opts_t opts = {
+        .no_pg = 0,
+        .ga = SAM_GLOBAL_ARGS_INIT,
+        .fn = {NULL},
+        .fn_out = "-",
+        .casava = 0,
+        .barcode_seq = "BC",
+        .barcode_qual = "QT",
+        .aux = NULL,
+        .rg = NULL,
+        .rg_line = NULL,
+        .order = NULL,
+        .compress_level = -1,
+    };
+    kstring_t rg = {0};
+
+    static const struct option lopts[] = {
+        SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, '-', '@'),
+        {"no-PG", no_argument, NULL, 9},
+        {"i1", required_argument, NULL, 1},
+        {"i2", required_argument, NULL, 2},
+        {"r1", required_argument, NULL, '1'},
+        {"r2", required_argument, NULL, '2'},
+        {"rg", required_argument, NULL, 'R'},
+        {"rg-line", required_argument, NULL, 'r'},
+        {"order", required_argument, NULL, 3},
+        {"barcode-tag", required_argument, NULL, 4},
+        {"quality-tag", required_argument, NULL, 5},
+        { NULL, 0, NULL, 0 }
+    };
+
+    while ((c = getopt_long(argc, argv, "1:2:s:0:bhiT:r:R:o:O:u@:", lopts, NULL)) >= 0) {
+        switch (c) {
+        case 'b': opts.idx_both = 1; break;
+        case '0': opts.fn[FQ_R0] = optarg; break;
+        case '1': opts.fn[FQ_R1] = optarg; break;
+        case '2': opts.fn[FQ_R2] = optarg; break;
+        case  1:  opts.fn[FQ_I1] = optarg; break;
+        case  2:  opts.fn[FQ_I2] = optarg; break;
+        case 's': opts.fn[FQ_SINGLE] = optarg; break;
+        case 'o': opts.fn_out = optarg; break;
+        case 'i': opts.casava = 1; break;
+        case  4:  opts.barcode_seq = optarg; break;
+        case  5:  opts.barcode_qual = optarg; break;
+        case 'T': opts.aux = optarg; break;
+        case 'u': opts.compress_level = 0; break;
+        case 'R': opts.rg = optarg; break;
+        case 'r':
+            if (*optarg != '@' && ks_len(&rg) == 0)
+                kputs("@RG", &rg);
+            if (ks_len(&rg))
+                kputc_('\t', &rg);
+            kputs(optarg, &rg);
+            opts.rg_line = rg.s;
+            break;
+
+        case 9: opts.no_pg = 1; break;
+        case 3: opts.order = optarg; break;
+
+        case 'h': return usage(samtools_stdout, EXIT_SUCCESS);
+        case '?': return usage(samtools_stderr, EXIT_FAILURE);
+
+        default:
+            if (parse_sam_global_opt(c, optarg, lopts, &opts.ga) != 0)
+                return usage(samtools_stderr, EXIT_FAILURE);
+            break;
+        }
+    }
+
+    if (opts.ga.nthreads > 0) {
+        if (!(opts.p.pool = hts_tpool_init(opts.ga.nthreads))) {
+            fprintf(samtools_stderr, "Failed to create thread pool\n");
+            if (rg.s)
+                free(rg.s);
+            return -1;;
+        }
+    }
+
+    int ret = import_fastq(argc-optind, argv+optind, &opts) ? 1 : 0;
+
+    if (rg.s)
+        free(rg.s);
+
+    if (opts.p.pool)
+        hts_tpool_destroy(opts.p.pool);
+
+    return ret;
+}
diff --git a/samtools/bam_index.c.pysam.c b/samtools/bam_index.c.pysam.c
index 5399da7..7b2ee3e 100644
--- a/samtools/bam_index.c.pysam.c
+++ b/samtools/bam_index.c.pysam.c
@@ -170,7 +170,7 @@ static void usage_exit(FILE *fp, int exit_status)
 {
     fprintf(fp, "Usage: samtools idxstats [options] <in.bam>\n");
     sam_global_opt_help(fp, "-.---@-.");
-    exit(exit_status);
+    samtools_exit(exit_status);
 }
 
 int bam_idxstats(int argc, char *argv[])
diff --git a/samtools/bam_markdup.c b/samtools/bam_markdup.c
index 1619b5b..2da184f 100644
--- a/samtools/bam_markdup.c
+++ b/samtools/bam_markdup.c
@@ -1,7 +1,7 @@
 /*  bam_markdup.c -- Mark duplicates from a coord sorted file that has gone
                      through fixmates with the mate scoring option on.
 
-    Copyright (C) 2017-2019 Genome Research Ltd.
+    Copyright (C) 2017-2020 Genome Research Ltd.
 
     Author: Andrew Whitwham <aw7@sanger.ac.uk>
 
@@ -62,6 +62,7 @@ typedef struct {
     int mode;
     int write_index;
     int include_fails;
+    int check_chain;
     char *stats_file;
     char *arg_list;
     char *out_fn;
@@ -83,6 +84,7 @@ typedef struct read_queue_s {
     bam1_t *b;
     struct read_queue_s *duplicate;
     hts_pos_t pos;
+    int dup_checked;
 } read_queue_t;
 
 typedef struct {
@@ -94,8 +96,23 @@ typedef struct {
     char type;
 } dup_map_t;
 
+typedef struct {
+    bam1_t *b;
+    int64_t score;
+    int64_t mate_score;
+    long x;
+    long y;
+    int opt;
+    int xpos;
+} check_t;
 
 
+typedef struct {
+    check_t *c;
+    size_t size;
+    size_t length;
+} check_list_t;
+
 static khint32_t do_hash(unsigned char *key, khint32_t len);
 
 static khint_t hash_key(key_data_t key) {
@@ -665,6 +682,7 @@ static int add_duplicate(khash_t(duplicates) *d_hash, bam1_t *dupe, char *orig_n
 }
 
 
+/* Get the position of the coordinates from the read name. */
 static inline int get_coordinate_positions(const char *qname, int *xpos, int *ypos) {
     int sep = 0;
     int pos = 0;
@@ -693,6 +711,66 @@ static inline int get_coordinate_positions(const char *qname, int *xpos, int *yp
     return sep;
 }
 
+
+static int get_coordinates(const char *name, int *xpos_out, long *x_coord, long *y_coord, long *warnings) {
+    int ret = 1;
+    int seps, xpos = 0, ypos = 0;
+    long x = 0, y = 0;
+    char *end;
+
+    seps = get_coordinate_positions(name, &xpos, &ypos);
+
+    /* The most current Illumina read format at time of writing is:
+       @machine:run:flowcell:lane:tile:x:y:UMI or
+       @machine:run:flowcell:lane:tile:x:y
+
+       Counting the separating colons gives us a quick format check.
+       Older name formats have fewer elements.
+    */
+
+    if (!(seps == 3 || seps == 4 || seps == 6 || seps == 7)) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(stderr, "[markdup] warning: cannot decipher read name %s for optical duplicate marking.\n", name);
+        }
+
+        return ret;
+    }
+
+    x = strtol(name + xpos, &end, 10);
+
+    if ((name + xpos) == end) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(stderr, "[markdup] warning: can not decipher X coordinate in %s .\n", name);
+        }
+
+        return ret;
+    }
+
+    y = strtol(name + ypos, &end, 10);
+
+    if ((name + ypos) == end) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(stderr, "[markdup] warning: can not decipher y coordinate in %s .\n", name);
+        }
+
+        return ret;
+    }
+
+    *x_coord = x;
+    *y_coord = y;
+    *xpos_out = xpos;
+    ret = 0;
+
+    return ret;
+}
+
+
 /* Using the coordinates from the Illumina read name, see whether the duplicated read is
    close enough (set by max_dist) to the original to be counted as optical.*/
 
@@ -806,6 +884,59 @@ static int optical_duplicate(bam1_t *ori, bam1_t *dup, long max_dist, long *warn
 }
 
 
+/* Using the coordinates from the Illumina read name, see whether the duplicated read is
+   close enough (set by max_dist) to the original to be counted as optical.
+
+   This function needs the values from the first read to be already calculated. */
+
+static int optical_duplicate_partial(const char *name, const int oxpos, const long ox, const long oy, bam1_t *dup, check_t *c, long max_dist, long *warnings) {
+    int ret = 0;
+    char *duplicate;
+    int dxpos = 0;
+    long dx, dy;
+
+    duplicate = bam_get_qname(dup);
+
+    if (get_coordinates(duplicate, &dxpos, &dx, &dy, warnings)) {
+        return ret;
+    }
+
+    if (strncmp(name, duplicate, oxpos - 1) == 0) {
+        // the initial parts match, look at the numbers
+        long xdiff, ydiff;
+
+        if (ox > dx) {
+            xdiff = ox - dx;
+        } else {
+            xdiff = dx - ox;
+        }
+
+        if (xdiff <= max_dist) {
+            // still might be optical
+
+            if (oy > dy) {
+                ydiff = oy - dy;
+            } else {
+                ydiff = dy - oy;
+            }
+
+            if (ydiff <= max_dist) ret = 1;
+        }
+    }
+
+    c->x = dx;
+    c->y = dy;
+    c->xpos = dxpos;
+
+    if (ret) {
+        c->opt = ret;
+    }
+
+    return ret;
+}
+
+
+/* Mark the read as a duplicate and update the duplicate hash (if needed) */
 static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam1_t *ori, bam1_t *dup,
                            long *optical, long *warn) {
     char dup_type = 0;
@@ -814,7 +945,7 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
     dup->core.flag |= BAM_FDUP;
 
     if (param->tag) {
-        if (bam_aux_append(dup, "do", 'Z', strlen(bam_get_qname(ori)) + 1, (uint8_t*)bam_get_qname(ori))) {
+        if (bam_aux_update_str(dup, "do", strlen(bam_get_qname(ori)) + 1, bam_get_qname(ori))) {
             fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
             return -1;
         }
@@ -822,12 +953,12 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
 
     if (param->opt_dist) { // mark optical duplicates
         if (optical_duplicate(ori, dup, param->opt_dist, warn)) {
-            bam_aux_append(dup, "dt", 'Z', 3, (const uint8_t *)"SQ");
+            bam_aux_update_str(dup, "dt", 3, "SQ");
             dup_type = 'O';
             (*optical)++;
         } else {
             // not an optical duplicate
-            bam_aux_append(dup, "dt", 'Z', 3, (const uint8_t *)"LB");
+            bam_aux_update_str(dup, "dt", 3, "LB");
         }
     }
 
@@ -853,17 +984,12 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
 }
 
 
+/* If the duplicate type has changed to optical then retag and duplicate hash. */
 static inline int optical_retag(md_param_t *param, khash_t(duplicates) *dup_hash, bam1_t *b, int paired, long *optical_single, long *optical_pair) {
     int ret = 0;
-    uint8_t *data;
 
-    // remove any existing dt tag
-    if ((data = bam_aux_get(b, "dt")) != NULL) {
-        bam_aux_del(b, data);
-    }
-
-    if (bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"SQ")) {
-        fprintf(stderr, "[markdup] error: unable to append 'dt' tag.\n");
+    if (bam_aux_update_str(b, "dt", 3, "SQ")) {
+        fprintf(stderr, "[markdup] error: unable to update 'dt' tag.\n");
         ret = -1;
     }
 
@@ -897,23 +1023,54 @@ static inline int optical_retag(md_param_t *param, khash_t(duplicates) *dup_hash
 }
 
 
+/* Check all duplicates of the highest quality read (the "original") for consistancy.  Also
+   pre-calculate any values for use in check_duplicate_chain later.
+   Returns 0 on success, >0 on coordinate reading error (program can continue) or
+   <0 on an error (program should not continue. */
+static int check_chain_against_original(md_param_t *param, khash_t(duplicates) *dup_hash, read_queue_t *ori,
+             check_list_t *list, long *warn, long *optical_single, long *optical_pair) {
 
-/*
-    Where there is more than one duplicate go down the list and check for optical duplicates and change
-    do tags (where used) to point to original (non-duplicate) read.
-*/
-static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_hash, read_queue_t *ori,
-             long *warn, long *optical_single, long *optical_pair) {
     int ret = 0;
-    read_queue_t *current = ori->duplicate;
     char *ori_name = bam_get_qname(ori->b);
-    int have_original = !(ori->b->core.flag & BAM_FDUP);
-    int ori_paired = (ori->b->core.flag & BAM_FPAIRED) && !(ori->b->core.flag & BAM_FMUNMAP);
+    read_queue_t *current = ori->duplicate;
+    int xpos;
+    long x, y;
+
+    if (param->opt_dist) {
+        if ((ret = get_coordinates(ori_name, &xpos, &x, &y, warn))) {
+            return ret;
+        }
+    }
+
+    list->length = 0;
 
     while (current) {
-        int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
+        check_t *c;
+
+        if (list->length >= list->size) {
+            check_t *tmp;
+
+            list->size *= 2;
+
+            if (!(tmp = realloc(list->c, list->size * sizeof(check_t)))) {
+                fprintf(stderr, "[markdup] error: Unable to expand opt check list.\n");
+                return -1;
+            }
+
+            list->c = tmp;
+        }
+
+        c = &list->c[list->length];
 
-        if (param->tag && have_original) {
+        c->b = current->b;
+        c->x = -1;
+        c->y = -1;
+        c->opt = 0;
+        c->score = 0;
+        c->mate_score = 0;
+        current->dup_checked = 1;
+
+        if (param->tag) {
             uint8_t *data;
 
             // at this stage all duplicates should have a do tag
@@ -923,10 +1080,8 @@ static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_has
 
                 if (old_name) {
                     if (strcmp(old_name, ori_name) != 0) {
-                        bam_aux_del(current->b, data);
-
-                        if (bam_aux_append(current->b, "do", 'Z', strlen(ori_name) + 1, (uint8_t*)ori_name)) {
-                            fprintf(stderr, "[markdup] error: unable to append 'do' tag.\n");
+                        if (bam_aux_update_str(current->b, "do", strlen(ori_name) + 1, (const char *)ori_name)) {
+                            fprintf(stderr, "[markdup] error: unable to update 'do' tag.\n");
                             ret =  -1;
                             break;
                         }
@@ -940,118 +1095,226 @@ static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_has
         }
 
         if (param->opt_dist) {
-            int is_cur_opt = 0, is_ori_opt = 0;
             uint8_t *data;
             char *dup_type;
+            int is_opt = 0;
+            int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
 
-            if ((data = bam_aux_get(ori->b, "dt"))) {
+            if ((data = bam_aux_get(current->b, "dt"))) {
                 if ((dup_type = bam_aux2Z(data))) {
                     if (strcmp(dup_type, "SQ") == 0) {
-                        is_ori_opt = 1;
+                        c->opt = 1;
                     }
                 }
             }
 
-            if ((data = bam_aux_get(current->b, "dt"))) {
-                if ((dup_type = bam_aux2Z(data))) {
-                    if (strcmp(dup_type, "SQ") == 0) {
-                        is_cur_opt = 1;
-                    }
+            // need to run this to get the duplicates x and y scores
+            is_opt = optical_duplicate_partial(ori_name, xpos, x, y, current->b, c, param->opt_dist, warn);
+
+            if (!c->opt && is_opt) {
+                if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
+                    ret = -1;
+                    break;
                 }
+
+                c->opt = 1;
             }
 
-            if (!(is_ori_opt && is_cur_opt)) {
-                // if both are already optical duplicates there is no need to check again, otherwise...
+            c->score = calc_score(current->b);
 
-                if (optical_duplicate(ori->b, current->b, param->opt_dist, warn)) {
-                    // find out which one is the duplicate
-                    int is_cur_dup = 0;
+            if (current_paired) {
+                if ((c->mate_score = get_mate_score(current->b)) == -1) {
+                     fprintf(stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
+                     ret = -1;
+                     break;
+                }
+            }
+        }
 
-                    if (have_original) {
-                        // compared against an original, this is a dup.
-                        is_cur_dup = 1;
-                    } else if (ori_paired != current_paired) {
-                        if (!current_paired) {
-                            // current is single vs pair, this is a dup.
-                            is_cur_dup = 1;
-                        }
-                    } else {
-                        // do it by scores
-                        int64_t ori_score, curr_score;
+        current = current->duplicate;
+        list->length++;
+    }
 
-                        if ((ori->b->core.flag & BAM_FQCFAIL) != (current->b->core.flag & BAM_FQCFAIL)) {
-                            if (ori->b->core.flag & BAM_FQCFAIL) {
-                                ori_score  = 0;
-                                curr_score = 1;
-                            } else {
-                                ori_score  = 1;
-                                curr_score = 0;
-                            }
-                        } else {
-                            ori_score  = calc_score(ori->b);
-                            curr_score = calc_score(current->b);
-
-                            if (current_paired) {
-                                // they are pairs so add mate scores.
-                                int64_t mate_tmp;
-
-                                if ((mate_tmp = get_mate_score(ori->b)) == -1) {
-                                    fprintf(stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
-                                    ret = -1;
-                                    break;
-                                } else {
-                                    ori_score += mate_tmp;
-                                }
+    return ret;
+}
 
-                                if ((mate_tmp = get_mate_score(current->b)) == -1) {
-                                    fprintf(stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
-                                    ret = -1;
-                                    break;
-                                } else {
-                                    curr_score += mate_tmp;
-                                }
-                            }
-                        }
 
-                        if (ori_score == curr_score) {
-                            if (strcmp(bam_get_qname(current->b), ori_name) < 0) {
-                                curr_score++;
-                            } else {
-                                curr_score--;
-                            }
-                        }
+static int xcoord_sort(const void *a, const void *b) {
+    check_t *ac = (check_t *) a;
+    check_t *bc = (check_t *) b;
 
-                        if (ori_score > curr_score) {
-                            is_cur_dup = 1;
-                        }
+    return (ac->x - bc->x);
+}
+
+
+/* Check all the duplicates against each other to see if they are optical duplicates. */
+static int check_duplicate_chain(md_param_t *param, khash_t(duplicates) *dup_hash, check_list_t *list,
+             long *warn, long *optical_single, long *optical_pair) {
+    int ret = 0;
+    size_t curr = 0;
+
+    qsort(list->c, list->length, sizeof(list->c[0]), xcoord_sort);
+
+    while (curr < list->length - 1) {
+        check_t *current = &list->c[curr];
+        size_t count = curr;
+        char *cur_name = bam_get_qname(current->b);
+        int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
+
+        while (++count < list->length && (list->c[count].x - current->x <= param->opt_dist)) {
+            // while close enough along the x coordinate
+            check_t *chk = &list->c[count];
+
+            if (current->opt && chk->opt)
+                continue;
+
+            // if both are already optical duplicates there is no need to check again, otherwise...
+
+            long ydiff;
+
+            if (current->y > chk->y) {
+                ydiff = current->y - chk->y;
+            } else {
+                ydiff = chk->y - current->y;
+            }
+
+            if (ydiff > param->opt_dist)
+                continue;
+
+            // the number are right, check the names
+            if (strncmp(cur_name, bam_get_qname(chk->b), current->xpos - 1) != 0)
+                continue;
+
+            // optical duplicates
+            int chk_dup = 0;
+            int chk_paired = (chk->b->core.flag & BAM_FPAIRED) && !(chk->b->core.flag & BAM_FMUNMAP);
+
+            if (current_paired != chk_paired) {
+                if (!chk_paired) {
+                    // chk is single vs pair, this is a dup.
+                    chk_dup = 1;
+                }
+            } else {
+                // do it by scores
+                int64_t cur_score, chk_score;
+
+                if ((current->b->core.flag & BAM_FQCFAIL) != (chk->b->core.flag & BAM_FQCFAIL)) {
+                    if (current->b->core.flag & BAM_FQCFAIL) {
+                        cur_score = 0;
+                        chk_score = 1;
+                    } else {
+                        cur_score = 1;
+                        chk_score = 0;
                     }
+                } else {
+                    cur_score = current->score;
+                    chk_score = chk->score;
 
-                    if (is_cur_dup) {
-                        // the current is the optical duplicate
-                        if (!is_cur_opt) { // only change if not already an optical duplicate
-                            if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
-                                ret = -1;
-                                break;
-                            }
-                        }
+                    if (current_paired) {
+                        // they are pairs so add mate scores.
+                        chk_score += chk->mate_score;
+                        cur_score += current->mate_score;
+                    }
+                }
+
+                if (cur_score == chk_score) {
+                    if (strcmp(bam_get_qname(chk->b), cur_name) < 0) {
+                        chk_score++;
                     } else {
-                        if (!is_ori_opt) {
-                            if (optical_retag(param, dup_hash, ori->b, ori_paired, optical_single, optical_pair)) {
-                                ret = -1;
-                                break;
-                            }
-                        }
+                        chk_score--;
                     }
                 }
+
+                if (cur_score > chk_score) {
+                    chk_dup = 1;
+                }
+            }
+
+            if (chk_dup) {
+                // the duplicate is the optical duplicate
+                if (!chk->opt) { // only change if not already an optical duplicate
+                    if (optical_retag(param, dup_hash, chk->b, chk_paired, optical_single, optical_pair)) {
+                        ret = -1;
+                        goto fail;
+                    }
+
+                    chk->opt = 1;
+                }
+            } else {
+                if (!current->opt) {
+                    if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
+                        ret = -1;
+                        goto fail;
+                    }
+
+                    current->opt = 1;
+                }
             }
         }
 
-        current = current->duplicate;
+        curr++;
+    }
+
+ fail:
+    return ret;
+}
+
+
+/* Where there is more than one duplicate go down the list and check for optical duplicates and change
+   do tags (where used) to point to original (non-duplicate) read. */
+static int find_duplicate_chains(md_param_t *param, klist_t(read_queue) *read_buffer, khash_t(duplicates) *dup_hash, check_list_t *dup_list,
+                                const hts_pos_t prev_coord, const int32_t prev_tid, long *warn, long *optical_single,
+                                long *optical_pair, const int check_range) {
+    int ret = 0;
+    kliter_t(read_queue) *rq;
+
+    rq = kl_begin(read_buffer);
+
+    while (rq != kl_end(read_buffer)) {
+        read_queue_t *in_read = &kl_val(rq);
+
+        if (check_range) {
+            /* Just check against the moving window of reads based on coordinates and max read length. */
+            if (in_read->pos + param->max_length > prev_coord && in_read->b->core.tid == prev_tid && (prev_tid != -1 || prev_coord != -1)) {
+                break;
+            }
+        } else {
+            // this is the last set of results and the end entry will be blank
+            if (!bam_get_qname(in_read->b)) {
+                break;
+            }
+        }
+
+        if (!(in_read->b->core.flag & BAM_FDUP) && in_read->duplicate) { // is the head of a duplicate chain
+
+            // check against the original for tagging and optical duplication
+            if ((ret = check_chain_against_original(param, dup_hash, in_read, dup_list, warn, optical_single, optical_pair))) {
+                if (ret < 0) { // real error
+                    ret = -1;
+                    break;
+                } else { // coordinate decoding error
+                    ret = 0;
+                    in_read->duplicate = NULL;
+                    continue;
+                }
+            }
+
+            // check the rest of the duplicates against each other for optical duplication
+            if (param->opt_dist && check_duplicate_chain(param, dup_hash, dup_list, warn, optical_single, optical_pair)) {
+                ret = -1;
+                break;
+            }
+
+            in_read->duplicate = NULL;
+        }
+
+        rq = kl_next(rq);
     }
 
     return ret;
 }
 
+
 /*
   Function to use when estimating library size.
 
@@ -1080,30 +1343,29 @@ static inline double coverage_equation(double x, double c, double n) {
 
 
 /* estimate the library size, based on the Picard code in DuplicationMetrics.java*/
-static unsigned long estimate_library_size(unsigned long read_pairs, unsigned long duplicate_pairs) {
+static unsigned long estimate_library_size(unsigned long paired_reads, unsigned long paired_duplicate_reads, unsigned long optical) {
     unsigned long estimated_size = 0;
+    unsigned long non_optical_pairs = (paired_reads - optical) / 2;
+    unsigned long unique_pairs = (paired_reads - paired_duplicate_reads) / 2;
+    unsigned long duplicate_pairs = (paired_duplicate_reads - optical) / 2;
 
-    read_pairs /= 2;
-    duplicate_pairs /= 2;
-
-    if ((read_pairs && duplicate_pairs) && (read_pairs > duplicate_pairs)) {
-        unsigned long unique_pairs = read_pairs - duplicate_pairs;
+    if ((non_optical_pairs && duplicate_pairs && unique_pairs) && (non_optical_pairs > duplicate_pairs)) {
         double m = 1;
         double M = 100;
         int i;
 
-        if (coverage_equation(m * (double)unique_pairs, (double)unique_pairs, (double)read_pairs) < 0) {
+        if (coverage_equation(m * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs) < 0) {
             fprintf(stderr, "[markdup] warning: unable to calculate estimated library size.\n");
             return  estimated_size;
         }
 
-        while (coverage_equation(M * (double)unique_pairs, (double)unique_pairs, (double)read_pairs) > 0) {
+        while (coverage_equation(M * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs) > 0) {
             M *= 10;
         }
 
         for (i = 0; i < 40; i++) {
             double r = (m + M) / 2;
-            double u = coverage_equation(r * (double)unique_pairs, (double)unique_pairs, (double)read_pairs);
+            double u = coverage_equation(r * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs);
 
             if (u > 0) {
                 m = r;
@@ -1119,7 +1381,7 @@ static unsigned long estimate_library_size(unsigned long read_pairs, unsigned lo
         fprintf(stderr, "[markdup] warning: unable to calculate estimated library size."
                         " Read pairs %ld should be greater than duplicate pairs %ld,"
                         " which should both be non zero.\n",
-                        read_pairs, duplicate_pairs);
+                        non_optical_pairs, duplicate_pairs);
     }
 
     return estimated_size;
@@ -1153,6 +1415,7 @@ static int bam_mark_duplicates(md_param_t *param) {
     tmp_file_t temp;
     char *idx_fn = NULL;
     int exclude = 0;
+    check_list_t dup_list = {NULL, 0, 0};
 
     if (!pair_hash || !single_hash || !read_buffer || !dup_hash) {
         fprintf(stderr, "[markdup] out of memory\n");
@@ -1213,10 +1476,24 @@ static int bam_mark_duplicates(md_param_t *param) {
         goto fail;
     }
 
+    if (param->check_chain && !(param->tag || param->opt_dist))
+        param->check_chain = 0;
+
+    if (param->check_chain) {
+        dup_list.size = 128;
+        dup_list.c = NULL;
+
+        if ((dup_list.c = malloc(dup_list.size * sizeof(check_t))) == NULL) {
+            fprintf(stderr, "[markdup] error: unable to allocate memory for dup_list.\n");
+            goto fail;
+        }
+    }
+
     reading = writing = excluded = single_dup = duplicate = examined = pair = single = optical = single_optical = 0;
     np_duplicate = np_opt_duplicate = 0;
 
     while ((ret = sam_read1(param->in, header, in_read->b)) >= 0) {
+        int dup_checked = 0;
 
         // do some basic coordinate order checks
         if (in_read->b->core.tid >= 0) { // -1 for unmapped reads
@@ -1231,6 +1508,8 @@ static int bam_mark_duplicates(md_param_t *param) {
         prev_tid   =  in_read->b->core.tid;
         in_read->pair_key.single   = 1;
         in_read->single_key.single = 0;
+        in_read->duplicate = NULL;
+        in_read->dup_checked = 0;
 
         reading++;
 
@@ -1257,7 +1536,7 @@ static int bam_mark_duplicates(md_param_t *param) {
         // read must not be secondary, supplementary, unmapped or (possibly) failed QC
         if (!(in_read->b->core.flag & exclude)) {
             examined++;
-            in_read->duplicate = NULL;
+
 
             // look at the pairs first
             if ((in_read->b->core.flag & BAM_FPAIRED) && !(in_read->b->core.flag & BAM_FMUNMAP)) {
@@ -1300,17 +1579,15 @@ static int bam_mark_duplicates(md_param_t *param) {
                        // scores more than one read of the pair
                         bam1_t *dup = bp->p->b;
 
-                        in_read->duplicate = bp->p;
+                        if (param->check_chain)
+                            in_read->duplicate = bp->p;
+
                         bp->p = in_read;
 
                         if (mark_duplicates(param, dup_hash, bp->p->b, dup, &single_optical, &opt_warnings))
                             goto fail;
 
                         single_dup++;
-
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-
                     }
                 } else {
                     fprintf(stderr, "[markdup] error: single hashing failure.\n");
@@ -1327,8 +1604,7 @@ static int bam_mark_duplicates(md_param_t *param) {
                     in_read->pair_key = pair_key;
                 } else if (ret == 0) {
                     int64_t old_score, new_score, tie_add = 0;
-                    bam1_t *dup;
-                    int check_chain = 0;
+                    bam1_t *dup = NULL;
 
                     bp = &kh_val(pair_hash, k);
 
@@ -1369,29 +1645,48 @@ static int bam_mark_duplicates(md_param_t *param) {
 
                     if (new_score + tie_add > old_score) { // swap reads
                         dup = bp->p->b;
-                        in_read->duplicate = bp->p;
+
+                        if (param->check_chain) {
+
+                            if (in_read->duplicate) {
+                                read_queue_t *current = in_read->duplicate;
+
+                                while (current->duplicate) {
+                                    current = current->duplicate;
+                                }
+
+                                current->duplicate = bp->p;
+                            } else {
+                                in_read->duplicate = bp->p;
+                            }
+                        }
+
                         bp->p = in_read;
                     } else {
-                        if (bp->p->duplicate) {
-                            in_read->duplicate = bp->p->duplicate;
-                            check_chain = 1;
+                        if (param->check_chain) {
+                            if (bp->p->duplicate) {
+                                if (in_read->duplicate) {
+                                    read_queue_t *current = bp->p->duplicate;
+
+                                    while (current->duplicate) {
+                                        current = current->duplicate;
+                                    }
+
+                                    current->duplicate = in_read->duplicate;
+                                }
+
+                                in_read->duplicate = bp->p->duplicate;
+                            }
+
+                            bp->p->duplicate = in_read;
                         }
 
-                        bp->p->duplicate = in_read;
                         dup = in_read->b;
                     }
 
                     if (mark_duplicates(param, dup_hash, bp->p->b, dup, &optical, &opt_warnings))
                         goto fail;
 
-                    if (check_chain) {
-                        if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-                    }
-
-                    if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                        goto fail;
-
                     duplicate++;
                 } else {
                     fprintf(stderr, "[markdup] error: pair hashing failure.\n");
@@ -1401,7 +1696,6 @@ static int bam_mark_duplicates(md_param_t *param) {
                 int ret;
                 key_data_t single_key;
                 in_hash_t *bp;
-                int check_chain = 0;
 
                 make_single_key(&single_key, in_read->b);
 
@@ -1420,29 +1714,20 @@ static int bam_mark_duplicates(md_param_t *param) {
                     if ((bp->p->b->core.flag & BAM_FPAIRED) && !(bp->p->b->core.flag & BAM_FMUNMAP)) {
                         // if matched against one of a pair just mark as duplicate
 
-                        if (bp->p->duplicate) {
-                            in_read->duplicate = bp->p->duplicate;
-                            check_chain = 1;
-                        }
-
-                        bp->p->duplicate = in_read;
-
-                        if (mark_duplicates(param, dup_hash, bp->p->b, in_read->b, &single_optical, &opt_warnings))
-                            goto fail;
+                        if (param->check_chain) {
+                            if (bp->p->duplicate) {
+                                in_read->duplicate = bp->p->duplicate;
+                            }
 
-                        if (check_chain) {
-                            // check the new duplicate entry in the chain
-                            if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                                    goto fail;
+                            bp->p->duplicate = in_read;
                         }
 
-                        // check against the new original
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
+                        if (mark_duplicates(param, dup_hash, bp->p->b, in_read->b, &single_optical, &opt_warnings))
                             goto fail;
 
                     } else {
                         int64_t old_score, new_score;
-                        bam1_t *dup;
+                        bam1_t *dup = NULL;
 
                         old_score = calc_score(bp->p->b);
                         new_score = calc_score(in_read->b);
@@ -1451,32 +1736,26 @@ static int bam_mark_duplicates(md_param_t *param) {
                         // to the single hash and mark the other as duplicate
                         if (new_score > old_score) { // swap reads
                             dup = bp->p->b;
-                            in_read->duplicate = bp->p;
+
+                            if (param->check_chain)
+                                in_read->duplicate = bp->p;
+
                             bp->p = in_read;
                         } else {
-                            if (bp->p->duplicate) {
-                                in_read->duplicate = bp->p->duplicate;
-                                check_chain = 1;
+                            if (param->check_chain) {
+                                if (bp->p->duplicate) {
+                                    in_read->duplicate = bp->p->duplicate;
+                                }
+
+                                bp->p->duplicate = in_read;
                             }
 
-                            bp->p->duplicate = in_read;
                             dup = in_read->b;
                         }
 
                         if (mark_duplicates(param, dup_hash, bp->p->b, dup, &single_optical, &opt_warnings))
                             goto fail;
-
-
-                        if (check_chain) {
-                            if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                                goto fail;
-                        }
-
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-
-
-                        }
+                    }
 
                     single_dup++;
                 } else {
@@ -1500,6 +1779,22 @@ static int bam_mark_duplicates(md_param_t *param) {
                 break;
             }
 
+            if (!dup_checked && param->check_chain) {
+                // check for multiple optical duplicates of the same original read
+
+                if (find_duplicate_chains(param, read_buffer, dup_hash, &dup_list, prev_coord, prev_tid, &opt_warnings, &single_optical, &optical, 1)) {
+                    fprintf(stderr, "[markdup] error: duplicate checking failed.\n");
+                    goto fail;
+                }
+
+                dup_checked = 1;
+            }
+
+
+            if (param->check_chain && (in_read->b->core.flag & BAM_FDUP) && !in_read->dup_checked && !(in_read->b->core.flag & exclude)) {
+                break;
+            }
+
             if (!param->remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
                 if (param->supp) {
                     if (tmp_file_write(&temp, in_read->b)) {
@@ -1550,6 +1845,14 @@ static int bam_mark_duplicates(md_param_t *param) {
         goto fail;
     }
 
+    // one last check
+    if (param->tag || param->opt_dist) {
+        if (find_duplicate_chains(param, read_buffer, dup_hash, &dup_list, prev_coord, prev_tid, &opt_warnings, &single_optical, &optical, 0)) {
+            fprintf(stderr, "[markdup] error: duplicate checking failed.\n");
+            goto fail;
+        }
+    }
+
     // write out the end of the list
     rq = kl_begin(read_buffer);
     while (rq != kl_end(read_buffer)) {
@@ -1606,7 +1909,7 @@ static int bam_mark_duplicates(md_param_t *param) {
                     np_duplicate++;
 
                     if (param->tag && kh_val(dup_hash, k).name) {
-                        if (bam_aux_append(b, "do", 'Z', strlen(kh_val(dup_hash, k).name) + 1, (uint8_t*)kh_val(dup_hash, k).name)) {
+                        if (bam_aux_update_str(b, "do", strlen(kh_val(dup_hash, k).name) + 1, (char*)kh_val(dup_hash, k).name)) {
                             fprintf(stderr, "[markdup] error: unable to append supplementary 'do' tag.\n");
                             goto fail;
                         }
@@ -1614,10 +1917,10 @@ static int bam_mark_duplicates(md_param_t *param) {
 
                     if (param->opt_dist) {
                         if (kh_val(dup_hash, k).type) {
-                            bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"SQ");
+                            bam_aux_update_str(b, "dt", 3, "SQ");
                             np_opt_duplicate++;
                         } else {
-                            bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"LB");
+                            bam_aux_update_str(b, "dt", 3, "LB");
                         }
                     }
                 }
@@ -1669,7 +1972,7 @@ static int bam_mark_duplicates(md_param_t *param) {
             fp = stderr;
         }
 
-        els = estimate_library_size(pair, duplicate - optical);
+        els = estimate_library_size(pair, duplicate, optical);
 
         fprintf(fp,
                 "COMMAND: %s\n"
@@ -1703,6 +2006,9 @@ static int bam_mark_duplicates(md_param_t *param) {
         }
     }
 
+    if (param->check_chain && (param->tag || param->opt_dist))
+        free(dup_list.c);
+
     kh_destroy(reads, pair_hash);
     kh_destroy(reads, single_hash);
     kl_destroy(read_queue, read_buffer);
@@ -1723,6 +2029,9 @@ static int bam_mark_duplicates(md_param_t *param) {
     }
     kh_destroy(duplicates, dup_hash);
 
+    if (param->check_chain && (param->tag || param->opt_dist))
+        free(dup_list.c);
+
     kh_destroy(reads, pair_hash);
     kh_destroy(reads, single_hash);
     sam_hdr_destroy(header);
@@ -1745,8 +2054,11 @@ static int markdup_usage(void) {
     fprintf(stderr, "  -m --mode TYPE   Duplicate decision method for paired reads.\n"
                     "                   TYPE = t measure positions based on template start/end (default).\n"
                     "                          s measure positions based on sequence start.\n");
+    fprintf(stderr, "  -n               Reduce optical duplicate accuracy (faster results with many duplicates).\n");
+    fprintf(stderr, "  -u               Output uncompressed data\n");
     fprintf(stderr, "  --include-fails  Include quality check failed reads.\n");
     fprintf(stderr, "  --no-PG          Do not add a PG line\n");
+    fprintf(stderr, "  --no-multi-dup   Reduced duplicates of duplicates checking.\n");
     fprintf(stderr, "  -t               Mark primary duplicates with the name of the original in a \'do\' tag."
                                   " Mainly for information and debugging.\n");
 
@@ -1761,23 +2073,24 @@ static int markdup_usage(void) {
 
 int bam_markdup(int argc, char **argv) {
     int c, ret;
-    char wmode[3] = {'w', 'b', 0};
+    char wmode[4] = {'w', 'b', 0, 0};
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     htsThreadPool p = {NULL, 0};
     kstring_t tmpprefix = {0, 0, NULL};
     struct stat st;
     unsigned int t;
-    md_param_t param = {NULL, NULL, NULL, 0, 300, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL};
+    md_param_t param = {NULL, NULL, NULL, 0, 300, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, NULL, NULL, NULL};
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         {"include-fails", no_argument, NULL, 1001},
         {"no-PG", no_argument, NULL, 1002},
         {"mode", required_argument, NULL, 'm'},
+        {"no-multi-dup", no_argument, NULL, 1003},
         {NULL, 0, NULL, 0}
     };
 
-    while ((c = getopt_long(argc, argv, "rsl:StT:O:@:f:d:ncm:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rsl:StT:O:@:f:d:cm:u", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': param.remove_dups = 1; break;
             case 'l': param.max_length = atoi(optarg); break;
@@ -1799,8 +2112,10 @@ int bam_markdup(int argc, char **argv) {
                 }
 
                 break;
+            case 'u': wmode[2] = '0'; break;
             case 1001: param.include_fails = 1; break;
             case 1002: param.no_pg = 1; break;
+            case 1003: param.check_chain = 0; break;
             default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
             /* else fall-through */
             case '?': return markdup_usage();
diff --git a/samtools/bam_markdup.c.pysam.c b/samtools/bam_markdup.c.pysam.c
index bcb9243..7132687 100644
--- a/samtools/bam_markdup.c.pysam.c
+++ b/samtools/bam_markdup.c.pysam.c
@@ -3,7 +3,7 @@
 /*  bam_markdup.c -- Mark duplicates from a coord sorted file that has gone
                      through fixmates with the mate scoring option on.
 
-    Copyright (C) 2017-2019 Genome Research Ltd.
+    Copyright (C) 2017-2020 Genome Research Ltd.
 
     Author: Andrew Whitwham <aw7@sanger.ac.uk>
 
@@ -64,6 +64,7 @@ typedef struct {
     int mode;
     int write_index;
     int include_fails;
+    int check_chain;
     char *stats_file;
     char *arg_list;
     char *out_fn;
@@ -85,6 +86,7 @@ typedef struct read_queue_s {
     bam1_t *b;
     struct read_queue_s *duplicate;
     hts_pos_t pos;
+    int dup_checked;
 } read_queue_t;
 
 typedef struct {
@@ -96,8 +98,23 @@ typedef struct {
     char type;
 } dup_map_t;
 
+typedef struct {
+    bam1_t *b;
+    int64_t score;
+    int64_t mate_score;
+    long x;
+    long y;
+    int opt;
+    int xpos;
+} check_t;
 
 
+typedef struct {
+    check_t *c;
+    size_t size;
+    size_t length;
+} check_list_t;
+
 static khint32_t do_hash(unsigned char *key, khint32_t len);
 
 static khint_t hash_key(key_data_t key) {
@@ -667,6 +684,7 @@ static int add_duplicate(khash_t(duplicates) *d_hash, bam1_t *dupe, char *orig_n
 }
 
 
+/* Get the position of the coordinates from the read name. */
 static inline int get_coordinate_positions(const char *qname, int *xpos, int *ypos) {
     int sep = 0;
     int pos = 0;
@@ -695,6 +713,66 @@ static inline int get_coordinate_positions(const char *qname, int *xpos, int *yp
     return sep;
 }
 
+
+static int get_coordinates(const char *name, int *xpos_out, long *x_coord, long *y_coord, long *warnings) {
+    int ret = 1;
+    int seps, xpos = 0, ypos = 0;
+    long x = 0, y = 0;
+    char *end;
+
+    seps = get_coordinate_positions(name, &xpos, &ypos);
+
+    /* The most current Illumina read format at time of writing is:
+       @machine:run:flowcell:lane:tile:x:y:UMI or
+       @machine:run:flowcell:lane:tile:x:y
+
+       Counting the separating colons gives us a quick format check.
+       Older name formats have fewer elements.
+    */
+
+    if (!(seps == 3 || seps == 4 || seps == 6 || seps == 7)) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(samtools_stderr, "[markdup] warning: cannot decipher read name %s for optical duplicate marking.\n", name);
+        }
+
+        return ret;
+    }
+
+    x = strtol(name + xpos, &end, 10);
+
+    if ((name + xpos) == end) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(samtools_stderr, "[markdup] warning: can not decipher X coordinate in %s .\n", name);
+        }
+
+        return ret;
+    }
+
+    y = strtol(name + ypos, &end, 10);
+
+    if ((name + ypos) == end) {
+        (*warnings)++;
+
+        if (*warnings <= BMD_WARNING_MAX) {
+            fprintf(samtools_stderr, "[markdup] warning: can not decipher y coordinate in %s .\n", name);
+        }
+
+        return ret;
+    }
+
+    *x_coord = x;
+    *y_coord = y;
+    *xpos_out = xpos;
+    ret = 0;
+
+    return ret;
+}
+
+
 /* Using the coordinates from the Illumina read name, see whether the duplicated read is
    close enough (set by max_dist) to the original to be counted as optical.*/
 
@@ -808,6 +886,59 @@ static int optical_duplicate(bam1_t *ori, bam1_t *dup, long max_dist, long *warn
 }
 
 
+/* Using the coordinates from the Illumina read name, see whether the duplicated read is
+   close enough (set by max_dist) to the original to be counted as optical.
+
+   This function needs the values from the first read to be already calculated. */
+
+static int optical_duplicate_partial(const char *name, const int oxpos, const long ox, const long oy, bam1_t *dup, check_t *c, long max_dist, long *warnings) {
+    int ret = 0;
+    char *duplicate;
+    int dxpos = 0;
+    long dx, dy;
+
+    duplicate = bam_get_qname(dup);
+
+    if (get_coordinates(duplicate, &dxpos, &dx, &dy, warnings)) {
+        return ret;
+    }
+
+    if (strncmp(name, duplicate, oxpos - 1) == 0) {
+        // the initial parts match, look at the numbers
+        long xdiff, ydiff;
+
+        if (ox > dx) {
+            xdiff = ox - dx;
+        } else {
+            xdiff = dx - ox;
+        }
+
+        if (xdiff <= max_dist) {
+            // still might be optical
+
+            if (oy > dy) {
+                ydiff = oy - dy;
+            } else {
+                ydiff = dy - oy;
+            }
+
+            if (ydiff <= max_dist) ret = 1;
+        }
+    }
+
+    c->x = dx;
+    c->y = dy;
+    c->xpos = dxpos;
+
+    if (ret) {
+        c->opt = ret;
+    }
+
+    return ret;
+}
+
+
+/* Mark the read as a duplicate and update the duplicate hash (if needed) */
 static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam1_t *ori, bam1_t *dup,
                            long *optical, long *warn) {
     char dup_type = 0;
@@ -816,7 +947,7 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
     dup->core.flag |= BAM_FDUP;
 
     if (param->tag) {
-        if (bam_aux_append(dup, "do", 'Z', strlen(bam_get_qname(ori)) + 1, (uint8_t*)bam_get_qname(ori))) {
+        if (bam_aux_update_str(dup, "do", strlen(bam_get_qname(ori)) + 1, bam_get_qname(ori))) {
             fprintf(samtools_stderr, "[markdup] error: unable to append 'do' tag.\n");
             return -1;
         }
@@ -824,12 +955,12 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
 
     if (param->opt_dist) { // mark optical duplicates
         if (optical_duplicate(ori, dup, param->opt_dist, warn)) {
-            bam_aux_append(dup, "dt", 'Z', 3, (const uint8_t *)"SQ");
+            bam_aux_update_str(dup, "dt", 3, "SQ");
             dup_type = 'O';
             (*optical)++;
         } else {
             // not an optical duplicate
-            bam_aux_append(dup, "dt", 'Z', 3, (const uint8_t *)"LB");
+            bam_aux_update_str(dup, "dt", 3, "LB");
         }
     }
 
@@ -855,17 +986,12 @@ static int mark_duplicates(md_param_t *param, khash_t(duplicates) *dup_hash, bam
 }
 
 
+/* If the duplicate type has changed to optical then retag and duplicate hash. */
 static inline int optical_retag(md_param_t *param, khash_t(duplicates) *dup_hash, bam1_t *b, int paired, long *optical_single, long *optical_pair) {
     int ret = 0;
-    uint8_t *data;
 
-    // remove any existing dt tag
-    if ((data = bam_aux_get(b, "dt")) != NULL) {
-        bam_aux_del(b, data);
-    }
-
-    if (bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"SQ")) {
-        fprintf(samtools_stderr, "[markdup] error: unable to append 'dt' tag.\n");
+    if (bam_aux_update_str(b, "dt", 3, "SQ")) {
+        fprintf(samtools_stderr, "[markdup] error: unable to update 'dt' tag.\n");
         ret = -1;
     }
 
@@ -899,23 +1025,54 @@ static inline int optical_retag(md_param_t *param, khash_t(duplicates) *dup_hash
 }
 
 
+/* Check all duplicates of the highest quality read (the "original") for consistancy.  Also
+   pre-calculate any values for use in check_duplicate_chain later.
+   Returns 0 on success, >0 on coordinate reading error (program can continue) or
+   <0 on an error (program should not continue. */
+static int check_chain_against_original(md_param_t *param, khash_t(duplicates) *dup_hash, read_queue_t *ori,
+             check_list_t *list, long *warn, long *optical_single, long *optical_pair) {
 
-/*
-    Where there is more than one duplicate go down the list and check for optical duplicates and change
-    do tags (where used) to point to original (non-duplicate) read.
-*/
-static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_hash, read_queue_t *ori,
-             long *warn, long *optical_single, long *optical_pair) {
     int ret = 0;
-    read_queue_t *current = ori->duplicate;
     char *ori_name = bam_get_qname(ori->b);
-    int have_original = !(ori->b->core.flag & BAM_FDUP);
-    int ori_paired = (ori->b->core.flag & BAM_FPAIRED) && !(ori->b->core.flag & BAM_FMUNMAP);
+    read_queue_t *current = ori->duplicate;
+    int xpos;
+    long x, y;
+
+    if (param->opt_dist) {
+        if ((ret = get_coordinates(ori_name, &xpos, &x, &y, warn))) {
+            return ret;
+        }
+    }
+
+    list->length = 0;
 
     while (current) {
-        int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
+        check_t *c;
+
+        if (list->length >= list->size) {
+            check_t *tmp;
+
+            list->size *= 2;
+
+            if (!(tmp = realloc(list->c, list->size * sizeof(check_t)))) {
+                fprintf(samtools_stderr, "[markdup] error: Unable to expand opt check list.\n");
+                return -1;
+            }
+
+            list->c = tmp;
+        }
+
+        c = &list->c[list->length];
 
-        if (param->tag && have_original) {
+        c->b = current->b;
+        c->x = -1;
+        c->y = -1;
+        c->opt = 0;
+        c->score = 0;
+        c->mate_score = 0;
+        current->dup_checked = 1;
+
+        if (param->tag) {
             uint8_t *data;
 
             // at this stage all duplicates should have a do tag
@@ -925,10 +1082,8 @@ static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_has
 
                 if (old_name) {
                     if (strcmp(old_name, ori_name) != 0) {
-                        bam_aux_del(current->b, data);
-
-                        if (bam_aux_append(current->b, "do", 'Z', strlen(ori_name) + 1, (uint8_t*)ori_name)) {
-                            fprintf(samtools_stderr, "[markdup] error: unable to append 'do' tag.\n");
+                        if (bam_aux_update_str(current->b, "do", strlen(ori_name) + 1, (const char *)ori_name)) {
+                            fprintf(samtools_stderr, "[markdup] error: unable to update 'do' tag.\n");
                             ret =  -1;
                             break;
                         }
@@ -942,118 +1097,226 @@ static int duplicate_chain_check(md_param_t *param, khash_t(duplicates) *dup_has
         }
 
         if (param->opt_dist) {
-            int is_cur_opt = 0, is_ori_opt = 0;
             uint8_t *data;
             char *dup_type;
+            int is_opt = 0;
+            int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
 
-            if ((data = bam_aux_get(ori->b, "dt"))) {
+            if ((data = bam_aux_get(current->b, "dt"))) {
                 if ((dup_type = bam_aux2Z(data))) {
                     if (strcmp(dup_type, "SQ") == 0) {
-                        is_ori_opt = 1;
+                        c->opt = 1;
                     }
                 }
             }
 
-            if ((data = bam_aux_get(current->b, "dt"))) {
-                if ((dup_type = bam_aux2Z(data))) {
-                    if (strcmp(dup_type, "SQ") == 0) {
-                        is_cur_opt = 1;
-                    }
+            // need to run this to get the duplicates x and y scores
+            is_opt = optical_duplicate_partial(ori_name, xpos, x, y, current->b, c, param->opt_dist, warn);
+
+            if (!c->opt && is_opt) {
+                if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
+                    ret = -1;
+                    break;
                 }
+
+                c->opt = 1;
             }
 
-            if (!(is_ori_opt && is_cur_opt)) {
-                // if both are already optical duplicates there is no need to check again, otherwise...
+            c->score = calc_score(current->b);
 
-                if (optical_duplicate(ori->b, current->b, param->opt_dist, warn)) {
-                    // find out which one is the duplicate
-                    int is_cur_dup = 0;
+            if (current_paired) {
+                if ((c->mate_score = get_mate_score(current->b)) == -1) {
+                     fprintf(samtools_stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
+                     ret = -1;
+                     break;
+                }
+            }
+        }
 
-                    if (have_original) {
-                        // compared against an original, this is a dup.
-                        is_cur_dup = 1;
-                    } else if (ori_paired != current_paired) {
-                        if (!current_paired) {
-                            // current is single vs pair, this is a dup.
-                            is_cur_dup = 1;
-                        }
-                    } else {
-                        // do it by scores
-                        int64_t ori_score, curr_score;
+        current = current->duplicate;
+        list->length++;
+    }
 
-                        if ((ori->b->core.flag & BAM_FQCFAIL) != (current->b->core.flag & BAM_FQCFAIL)) {
-                            if (ori->b->core.flag & BAM_FQCFAIL) {
-                                ori_score  = 0;
-                                curr_score = 1;
-                            } else {
-                                ori_score  = 1;
-                                curr_score = 0;
-                            }
-                        } else {
-                            ori_score  = calc_score(ori->b);
-                            curr_score = calc_score(current->b);
-
-                            if (current_paired) {
-                                // they are pairs so add mate scores.
-                                int64_t mate_tmp;
-
-                                if ((mate_tmp = get_mate_score(ori->b)) == -1) {
-                                    fprintf(samtools_stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
-                                    ret = -1;
-                                    break;
-                                } else {
-                                    ori_score += mate_tmp;
-                                }
+    return ret;
+}
 
-                                if ((mate_tmp = get_mate_score(current->b)) == -1) {
-                                    fprintf(samtools_stderr, "[markdup] error: no ms score tag. Please run samtools fixmate on file first.\n");
-                                    ret = -1;
-                                    break;
-                                } else {
-                                    curr_score += mate_tmp;
-                                }
-                            }
-                        }
 
-                        if (ori_score == curr_score) {
-                            if (strcmp(bam_get_qname(current->b), ori_name) < 0) {
-                                curr_score++;
-                            } else {
-                                curr_score--;
-                            }
-                        }
+static int xcoord_sort(const void *a, const void *b) {
+    check_t *ac = (check_t *) a;
+    check_t *bc = (check_t *) b;
 
-                        if (ori_score > curr_score) {
-                            is_cur_dup = 1;
-                        }
+    return (ac->x - bc->x);
+}
+
+
+/* Check all the duplicates against each other to see if they are optical duplicates. */
+static int check_duplicate_chain(md_param_t *param, khash_t(duplicates) *dup_hash, check_list_t *list,
+             long *warn, long *optical_single, long *optical_pair) {
+    int ret = 0;
+    size_t curr = 0;
+
+    qsort(list->c, list->length, sizeof(list->c[0]), xcoord_sort);
+
+    while (curr < list->length - 1) {
+        check_t *current = &list->c[curr];
+        size_t count = curr;
+        char *cur_name = bam_get_qname(current->b);
+        int current_paired = (current->b->core.flag & BAM_FPAIRED) && !(current->b->core.flag & BAM_FMUNMAP);
+
+        while (++count < list->length && (list->c[count].x - current->x <= param->opt_dist)) {
+            // while close enough along the x coordinate
+            check_t *chk = &list->c[count];
+
+            if (current->opt && chk->opt)
+                continue;
+
+            // if both are already optical duplicates there is no need to check again, otherwise...
+
+            long ydiff;
+
+            if (current->y > chk->y) {
+                ydiff = current->y - chk->y;
+            } else {
+                ydiff = chk->y - current->y;
+            }
+
+            if (ydiff > param->opt_dist)
+                continue;
+
+            // the number are right, check the names
+            if (strncmp(cur_name, bam_get_qname(chk->b), current->xpos - 1) != 0)
+                continue;
+
+            // optical duplicates
+            int chk_dup = 0;
+            int chk_paired = (chk->b->core.flag & BAM_FPAIRED) && !(chk->b->core.flag & BAM_FMUNMAP);
+
+            if (current_paired != chk_paired) {
+                if (!chk_paired) {
+                    // chk is single vs pair, this is a dup.
+                    chk_dup = 1;
+                }
+            } else {
+                // do it by scores
+                int64_t cur_score, chk_score;
+
+                if ((current->b->core.flag & BAM_FQCFAIL) != (chk->b->core.flag & BAM_FQCFAIL)) {
+                    if (current->b->core.flag & BAM_FQCFAIL) {
+                        cur_score = 0;
+                        chk_score = 1;
+                    } else {
+                        cur_score = 1;
+                        chk_score = 0;
                     }
+                } else {
+                    cur_score = current->score;
+                    chk_score = chk->score;
 
-                    if (is_cur_dup) {
-                        // the current is the optical duplicate
-                        if (!is_cur_opt) { // only change if not already an optical duplicate
-                            if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
-                                ret = -1;
-                                break;
-                            }
-                        }
+                    if (current_paired) {
+                        // they are pairs so add mate scores.
+                        chk_score += chk->mate_score;
+                        cur_score += current->mate_score;
+                    }
+                }
+
+                if (cur_score == chk_score) {
+                    if (strcmp(bam_get_qname(chk->b), cur_name) < 0) {
+                        chk_score++;
                     } else {
-                        if (!is_ori_opt) {
-                            if (optical_retag(param, dup_hash, ori->b, ori_paired, optical_single, optical_pair)) {
-                                ret = -1;
-                                break;
-                            }
-                        }
+                        chk_score--;
                     }
                 }
+
+                if (cur_score > chk_score) {
+                    chk_dup = 1;
+                }
+            }
+
+            if (chk_dup) {
+                // the duplicate is the optical duplicate
+                if (!chk->opt) { // only change if not already an optical duplicate
+                    if (optical_retag(param, dup_hash, chk->b, chk_paired, optical_single, optical_pair)) {
+                        ret = -1;
+                        goto fail;
+                    }
+
+                    chk->opt = 1;
+                }
+            } else {
+                if (!current->opt) {
+                    if (optical_retag(param, dup_hash, current->b, current_paired, optical_single, optical_pair)) {
+                        ret = -1;
+                        goto fail;
+                    }
+
+                    current->opt = 1;
+                }
             }
         }
 
-        current = current->duplicate;
+        curr++;
+    }
+
+ fail:
+    return ret;
+}
+
+
+/* Where there is more than one duplicate go down the list and check for optical duplicates and change
+   do tags (where used) to point to original (non-duplicate) read. */
+static int find_duplicate_chains(md_param_t *param, klist_t(read_queue) *read_buffer, khash_t(duplicates) *dup_hash, check_list_t *dup_list,
+                                const hts_pos_t prev_coord, const int32_t prev_tid, long *warn, long *optical_single,
+                                long *optical_pair, const int check_range) {
+    int ret = 0;
+    kliter_t(read_queue) *rq;
+
+    rq = kl_begin(read_buffer);
+
+    while (rq != kl_end(read_buffer)) {
+        read_queue_t *in_read = &kl_val(rq);
+
+        if (check_range) {
+            /* Just check against the moving window of reads based on coordinates and max read length. */
+            if (in_read->pos + param->max_length > prev_coord && in_read->b->core.tid == prev_tid && (prev_tid != -1 || prev_coord != -1)) {
+                break;
+            }
+        } else {
+            // this is the last set of results and the end entry will be blank
+            if (!bam_get_qname(in_read->b)) {
+                break;
+            }
+        }
+
+        if (!(in_read->b->core.flag & BAM_FDUP) && in_read->duplicate) { // is the head of a duplicate chain
+
+            // check against the original for tagging and optical duplication
+            if ((ret = check_chain_against_original(param, dup_hash, in_read, dup_list, warn, optical_single, optical_pair))) {
+                if (ret < 0) { // real error
+                    ret = -1;
+                    break;
+                } else { // coordinate decoding error
+                    ret = 0;
+                    in_read->duplicate = NULL;
+                    continue;
+                }
+            }
+
+            // check the rest of the duplicates against each other for optical duplication
+            if (param->opt_dist && check_duplicate_chain(param, dup_hash, dup_list, warn, optical_single, optical_pair)) {
+                ret = -1;
+                break;
+            }
+
+            in_read->duplicate = NULL;
+        }
+
+        rq = kl_next(rq);
     }
 
     return ret;
 }
 
+
 /*
   Function to use when estimating library size.
 
@@ -1082,30 +1345,29 @@ static inline double coverage_equation(double x, double c, double n) {
 
 
 /* estimate the library size, based on the Picard code in DuplicationMetrics.java*/
-static unsigned long estimate_library_size(unsigned long read_pairs, unsigned long duplicate_pairs) {
+static unsigned long estimate_library_size(unsigned long paired_reads, unsigned long paired_duplicate_reads, unsigned long optical) {
     unsigned long estimated_size = 0;
+    unsigned long non_optical_pairs = (paired_reads - optical) / 2;
+    unsigned long unique_pairs = (paired_reads - paired_duplicate_reads) / 2;
+    unsigned long duplicate_pairs = (paired_duplicate_reads - optical) / 2;
 
-    read_pairs /= 2;
-    duplicate_pairs /= 2;
-
-    if ((read_pairs && duplicate_pairs) && (read_pairs > duplicate_pairs)) {
-        unsigned long unique_pairs = read_pairs - duplicate_pairs;
+    if ((non_optical_pairs && duplicate_pairs && unique_pairs) && (non_optical_pairs > duplicate_pairs)) {
         double m = 1;
         double M = 100;
         int i;
 
-        if (coverage_equation(m * (double)unique_pairs, (double)unique_pairs, (double)read_pairs) < 0) {
+        if (coverage_equation(m * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs) < 0) {
             fprintf(samtools_stderr, "[markdup] warning: unable to calculate estimated library size.\n");
             return  estimated_size;
         }
 
-        while (coverage_equation(M * (double)unique_pairs, (double)unique_pairs, (double)read_pairs) > 0) {
+        while (coverage_equation(M * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs) > 0) {
             M *= 10;
         }
 
         for (i = 0; i < 40; i++) {
             double r = (m + M) / 2;
-            double u = coverage_equation(r * (double)unique_pairs, (double)unique_pairs, (double)read_pairs);
+            double u = coverage_equation(r * (double)unique_pairs, (double)unique_pairs, (double)non_optical_pairs);
 
             if (u > 0) {
                 m = r;
@@ -1121,7 +1383,7 @@ static unsigned long estimate_library_size(unsigned long read_pairs, unsigned lo
         fprintf(samtools_stderr, "[markdup] warning: unable to calculate estimated library size."
                         " Read pairs %ld should be greater than duplicate pairs %ld,"
                         " which should both be non zero.\n",
-                        read_pairs, duplicate_pairs);
+                        non_optical_pairs, duplicate_pairs);
     }
 
     return estimated_size;
@@ -1155,6 +1417,7 @@ static int bam_mark_duplicates(md_param_t *param) {
     tmp_file_t temp;
     char *idx_fn = NULL;
     int exclude = 0;
+    check_list_t dup_list = {NULL, 0, 0};
 
     if (!pair_hash || !single_hash || !read_buffer || !dup_hash) {
         fprintf(samtools_stderr, "[markdup] out of memory\n");
@@ -1215,10 +1478,24 @@ static int bam_mark_duplicates(md_param_t *param) {
         goto fail;
     }
 
+    if (param->check_chain && !(param->tag || param->opt_dist))
+        param->check_chain = 0;
+
+    if (param->check_chain) {
+        dup_list.size = 128;
+        dup_list.c = NULL;
+
+        if ((dup_list.c = malloc(dup_list.size * sizeof(check_t))) == NULL) {
+            fprintf(samtools_stderr, "[markdup] error: unable to allocate memory for dup_list.\n");
+            goto fail;
+        }
+    }
+
     reading = writing = excluded = single_dup = duplicate = examined = pair = single = optical = single_optical = 0;
     np_duplicate = np_opt_duplicate = 0;
 
     while ((ret = sam_read1(param->in, header, in_read->b)) >= 0) {
+        int dup_checked = 0;
 
         // do some basic coordinate order checks
         if (in_read->b->core.tid >= 0) { // -1 for unmapped reads
@@ -1233,6 +1510,8 @@ static int bam_mark_duplicates(md_param_t *param) {
         prev_tid   =  in_read->b->core.tid;
         in_read->pair_key.single   = 1;
         in_read->single_key.single = 0;
+        in_read->duplicate = NULL;
+        in_read->dup_checked = 0;
 
         reading++;
 
@@ -1259,7 +1538,7 @@ static int bam_mark_duplicates(md_param_t *param) {
         // read must not be secondary, supplementary, unmapped or (possibly) failed QC
         if (!(in_read->b->core.flag & exclude)) {
             examined++;
-            in_read->duplicate = NULL;
+
 
             // look at the pairs first
             if ((in_read->b->core.flag & BAM_FPAIRED) && !(in_read->b->core.flag & BAM_FMUNMAP)) {
@@ -1302,17 +1581,15 @@ static int bam_mark_duplicates(md_param_t *param) {
                        // scores more than one read of the pair
                         bam1_t *dup = bp->p->b;
 
-                        in_read->duplicate = bp->p;
+                        if (param->check_chain)
+                            in_read->duplicate = bp->p;
+
                         bp->p = in_read;
 
                         if (mark_duplicates(param, dup_hash, bp->p->b, dup, &single_optical, &opt_warnings))
                             goto fail;
 
                         single_dup++;
-
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-
                     }
                 } else {
                     fprintf(samtools_stderr, "[markdup] error: single hashing failure.\n");
@@ -1329,8 +1606,7 @@ static int bam_mark_duplicates(md_param_t *param) {
                     in_read->pair_key = pair_key;
                 } else if (ret == 0) {
                     int64_t old_score, new_score, tie_add = 0;
-                    bam1_t *dup;
-                    int check_chain = 0;
+                    bam1_t *dup = NULL;
 
                     bp = &kh_val(pair_hash, k);
 
@@ -1371,29 +1647,48 @@ static int bam_mark_duplicates(md_param_t *param) {
 
                     if (new_score + tie_add > old_score) { // swap reads
                         dup = bp->p->b;
-                        in_read->duplicate = bp->p;
+
+                        if (param->check_chain) {
+
+                            if (in_read->duplicate) {
+                                read_queue_t *current = in_read->duplicate;
+
+                                while (current->duplicate) {
+                                    current = current->duplicate;
+                                }
+
+                                current->duplicate = bp->p;
+                            } else {
+                                in_read->duplicate = bp->p;
+                            }
+                        }
+
                         bp->p = in_read;
                     } else {
-                        if (bp->p->duplicate) {
-                            in_read->duplicate = bp->p->duplicate;
-                            check_chain = 1;
+                        if (param->check_chain) {
+                            if (bp->p->duplicate) {
+                                if (in_read->duplicate) {
+                                    read_queue_t *current = bp->p->duplicate;
+
+                                    while (current->duplicate) {
+                                        current = current->duplicate;
+                                    }
+
+                                    current->duplicate = in_read->duplicate;
+                                }
+
+                                in_read->duplicate = bp->p->duplicate;
+                            }
+
+                            bp->p->duplicate = in_read;
                         }
 
-                        bp->p->duplicate = in_read;
                         dup = in_read->b;
                     }
 
                     if (mark_duplicates(param, dup_hash, bp->p->b, dup, &optical, &opt_warnings))
                         goto fail;
 
-                    if (check_chain) {
-                        if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-                    }
-
-                    if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                        goto fail;
-
                     duplicate++;
                 } else {
                     fprintf(samtools_stderr, "[markdup] error: pair hashing failure.\n");
@@ -1403,7 +1698,6 @@ static int bam_mark_duplicates(md_param_t *param) {
                 int ret;
                 key_data_t single_key;
                 in_hash_t *bp;
-                int check_chain = 0;
 
                 make_single_key(&single_key, in_read->b);
 
@@ -1422,29 +1716,20 @@ static int bam_mark_duplicates(md_param_t *param) {
                     if ((bp->p->b->core.flag & BAM_FPAIRED) && !(bp->p->b->core.flag & BAM_FMUNMAP)) {
                         // if matched against one of a pair just mark as duplicate
 
-                        if (bp->p->duplicate) {
-                            in_read->duplicate = bp->p->duplicate;
-                            check_chain = 1;
-                        }
-
-                        bp->p->duplicate = in_read;
-
-                        if (mark_duplicates(param, dup_hash, bp->p->b, in_read->b, &single_optical, &opt_warnings))
-                            goto fail;
+                        if (param->check_chain) {
+                            if (bp->p->duplicate) {
+                                in_read->duplicate = bp->p->duplicate;
+                            }
 
-                        if (check_chain) {
-                            // check the new duplicate entry in the chain
-                            if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                                    goto fail;
+                            bp->p->duplicate = in_read;
                         }
 
-                        // check against the new original
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
+                        if (mark_duplicates(param, dup_hash, bp->p->b, in_read->b, &single_optical, &opt_warnings))
                             goto fail;
 
                     } else {
                         int64_t old_score, new_score;
-                        bam1_t *dup;
+                        bam1_t *dup = NULL;
 
                         old_score = calc_score(bp->p->b);
                         new_score = calc_score(in_read->b);
@@ -1453,32 +1738,26 @@ static int bam_mark_duplicates(md_param_t *param) {
                         // to the single hash and mark the other as duplicate
                         if (new_score > old_score) { // swap reads
                             dup = bp->p->b;
-                            in_read->duplicate = bp->p;
+
+                            if (param->check_chain)
+                                in_read->duplicate = bp->p;
+
                             bp->p = in_read;
                         } else {
-                            if (bp->p->duplicate) {
-                                in_read->duplicate = bp->p->duplicate;
-                                check_chain = 1;
+                            if (param->check_chain) {
+                                if (bp->p->duplicate) {
+                                    in_read->duplicate = bp->p->duplicate;
+                                }
+
+                                bp->p->duplicate = in_read;
                             }
 
-                            bp->p->duplicate = in_read;
                             dup = in_read->b;
                         }
 
                         if (mark_duplicates(param, dup_hash, bp->p->b, dup, &single_optical, &opt_warnings))
                             goto fail;
-
-
-                        if (check_chain) {
-                            if (duplicate_chain_check(param, dup_hash, bp->p->duplicate, &opt_warnings, &single_optical, &optical))
-                                goto fail;
-                        }
-
-                        if (duplicate_chain_check(param, dup_hash, bp->p, &opt_warnings, &single_optical, &optical))
-                            goto fail;
-
-
-                        }
+                    }
 
                     single_dup++;
                 } else {
@@ -1502,6 +1781,22 @@ static int bam_mark_duplicates(md_param_t *param) {
                 break;
             }
 
+            if (!dup_checked && param->check_chain) {
+                // check for multiple optical duplicates of the same original read
+
+                if (find_duplicate_chains(param, read_buffer, dup_hash, &dup_list, prev_coord, prev_tid, &opt_warnings, &single_optical, &optical, 1)) {
+                    fprintf(samtools_stderr, "[markdup] error: duplicate checking failed.\n");
+                    goto fail;
+                }
+
+                dup_checked = 1;
+            }
+
+
+            if (param->check_chain && (in_read->b->core.flag & BAM_FDUP) && !in_read->dup_checked && !(in_read->b->core.flag & exclude)) {
+                break;
+            }
+
             if (!param->remove_dups || !(in_read->b->core.flag & BAM_FDUP)) {
                 if (param->supp) {
                     if (tmp_file_write(&temp, in_read->b)) {
@@ -1552,6 +1847,14 @@ static int bam_mark_duplicates(md_param_t *param) {
         goto fail;
     }
 
+    // one last check
+    if (param->tag || param->opt_dist) {
+        if (find_duplicate_chains(param, read_buffer, dup_hash, &dup_list, prev_coord, prev_tid, &opt_warnings, &single_optical, &optical, 0)) {
+            fprintf(samtools_stderr, "[markdup] error: duplicate checking failed.\n");
+            goto fail;
+        }
+    }
+
     // write out the end of the list
     rq = kl_begin(read_buffer);
     while (rq != kl_end(read_buffer)) {
@@ -1608,7 +1911,7 @@ static int bam_mark_duplicates(md_param_t *param) {
                     np_duplicate++;
 
                     if (param->tag && kh_val(dup_hash, k).name) {
-                        if (bam_aux_append(b, "do", 'Z', strlen(kh_val(dup_hash, k).name) + 1, (uint8_t*)kh_val(dup_hash, k).name)) {
+                        if (bam_aux_update_str(b, "do", strlen(kh_val(dup_hash, k).name) + 1, (char*)kh_val(dup_hash, k).name)) {
                             fprintf(samtools_stderr, "[markdup] error: unable to append supplementary 'do' tag.\n");
                             goto fail;
                         }
@@ -1616,10 +1919,10 @@ static int bam_mark_duplicates(md_param_t *param) {
 
                     if (param->opt_dist) {
                         if (kh_val(dup_hash, k).type) {
-                            bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"SQ");
+                            bam_aux_update_str(b, "dt", 3, "SQ");
                             np_opt_duplicate++;
                         } else {
-                            bam_aux_append(b, "dt", 'Z', 3, (const uint8_t *)"LB");
+                            bam_aux_update_str(b, "dt", 3, "LB");
                         }
                     }
                 }
@@ -1671,7 +1974,7 @@ static int bam_mark_duplicates(md_param_t *param) {
             fp = samtools_stderr;
         }
 
-        els = estimate_library_size(pair, duplicate - optical);
+        els = estimate_library_size(pair, duplicate, optical);
 
         fprintf(fp,
                 "COMMAND: %s\n"
@@ -1705,6 +2008,9 @@ static int bam_mark_duplicates(md_param_t *param) {
         }
     }
 
+    if (param->check_chain && (param->tag || param->opt_dist))
+        free(dup_list.c);
+
     kh_destroy(reads, pair_hash);
     kh_destroy(reads, single_hash);
     kl_destroy(read_queue, read_buffer);
@@ -1725,6 +2031,9 @@ static int bam_mark_duplicates(md_param_t *param) {
     }
     kh_destroy(duplicates, dup_hash);
 
+    if (param->check_chain && (param->tag || param->opt_dist))
+        free(dup_list.c);
+
     kh_destroy(reads, pair_hash);
     kh_destroy(reads, single_hash);
     sam_hdr_destroy(header);
@@ -1747,8 +2056,11 @@ static int markdup_usage(void) {
     fprintf(samtools_stderr, "  -m --mode TYPE   Duplicate decision method for paired reads.\n"
                     "                   TYPE = t measure positions based on template start/end (default).\n"
                     "                          s measure positions based on sequence start.\n");
+    fprintf(samtools_stderr, "  -n               Reduce optical duplicate accuracy (faster results with many duplicates).\n");
+    fprintf(samtools_stderr, "  -u               Output uncompressed data\n");
     fprintf(samtools_stderr, "  --include-fails  Include quality check failed reads.\n");
     fprintf(samtools_stderr, "  --no-PG          Do not add a PG line\n");
+    fprintf(samtools_stderr, "  --no-multi-dup   Reduced duplicates of duplicates checking.\n");
     fprintf(samtools_stderr, "  -t               Mark primary duplicates with the name of the original in a \'do\' tag."
                                   " Mainly for information and debugging.\n");
 
@@ -1763,23 +2075,24 @@ static int markdup_usage(void) {
 
 int bam_markdup(int argc, char **argv) {
     int c, ret;
-    char wmode[3] = {'w', 'b', 0};
+    char wmode[4] = {'w', 'b', 0, 0};
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     htsThreadPool p = {NULL, 0};
     kstring_t tmpprefix = {0, 0, NULL};
     struct stat st;
     unsigned int t;
-    md_param_t param = {NULL, NULL, NULL, 0, 300, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL};
+    md_param_t param = {NULL, NULL, NULL, 0, 300, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, NULL, NULL, NULL};
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         {"include-fails", no_argument, NULL, 1001},
         {"no-PG", no_argument, NULL, 1002},
         {"mode", required_argument, NULL, 'm'},
+        {"no-multi-dup", no_argument, NULL, 1003},
         {NULL, 0, NULL, 0}
     };
 
-    while ((c = getopt_long(argc, argv, "rsl:StT:O:@:f:d:ncm:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rsl:StT:O:@:f:d:cm:u", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': param.remove_dups = 1; break;
             case 'l': param.max_length = atoi(optarg); break;
@@ -1801,8 +2114,10 @@ int bam_markdup(int argc, char **argv) {
                 }
 
                 break;
+            case 'u': wmode[2] = '0'; break;
             case 1001: param.include_fails = 1; break;
             case 1002: param.no_pg = 1; break;
+            case 1003: param.check_chain = 0; break;
             default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
             /* else fall-through */
             case '?': return markdup_usage();
diff --git a/samtools/bam_mate.c b/samtools/bam_mate.c
index 6d40144..4239fd1 100644
--- a/samtools/bam_mate.c
+++ b/samtools/bam_mate.c
@@ -372,7 +372,7 @@ static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int prop
         curr = 1 - curr;
         pre_end = cur_end;
     }
-    if (result < -1) goto fail;
+    if (result < -1) goto read_fail;
     if (has_prev && !remove_reads) { // If we still have a BAM in the buffer it must be unpaired
         bam1_t *pre = b[1-curr];
         if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
@@ -391,6 +391,10 @@ static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int prop
     ks_free(&str);
     return 0;
 
+ read_fail:
+    print_error("fixmate", "Couldn't read from input file");
+    goto fail;
+
  write_fail:
     print_error_errno("fixmate", "Couldn't write to output file");
  fail:
@@ -410,6 +414,7 @@ void usage(FILE* where)
 "  -p           Disable FR proper pair check\n"
 "  -c           Add template cigar ct tag\n"
 "  -m           Add mate score tag\n"
+"  -u           Uncompressed output\n"
 "  --no-PG      do not add a PG line\n");
 
     sam_global_opt_help(where, "-.O..@-.");
@@ -427,7 +432,7 @@ int bam_mating(int argc, char *argv[])
     samFile *in = NULL, *out = NULL;
     int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1, mate_score = 0, no_pg = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    char wmode[3] = {'w', 'b', 0};
+    char wmode[4] = {'w', 'b', 0, 0};
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         {"no-PG", no_argument, NULL, 1},
@@ -437,12 +442,13 @@ int bam_mating(int argc, char *argv[])
 
     // parse args
     if (argc == 1) { usage(stdout); return 0; }
-    while ((c = getopt_long(argc, argv, "rpcmO:@:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rpcmO:@:u", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': remove_reads = 1; break;
             case 'p': proper_pair_check = 0; break;
             case 'c': add_ct = 1; break;
             case 'm': mate_score = 1; break;
+            case 'u': wmode[2] = '0'; break;
             case 1: no_pg = 1; break;
             default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                       /* else fall-through */
diff --git a/samtools/bam_mate.c.pysam.c b/samtools/bam_mate.c.pysam.c
index edefb0b..0aa83ec 100644
--- a/samtools/bam_mate.c.pysam.c
+++ b/samtools/bam_mate.c.pysam.c
@@ -374,7 +374,7 @@ static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int prop
         curr = 1 - curr;
         pre_end = cur_end;
     }
-    if (result < -1) goto fail;
+    if (result < -1) goto read_fail;
     if (has_prev && !remove_reads) { // If we still have a BAM in the buffer it must be unpaired
         bam1_t *pre = b[1-curr];
         if (pre->core.tid < 0 || pre->core.pos < 0 || pre->core.flag&BAM_FUNMAP) { // If unmapped
@@ -393,6 +393,10 @@ static int bam_mating_core(samFile *in, samFile *out, int remove_reads, int prop
     ks_free(&str);
     return 0;
 
+ read_fail:
+    print_error("fixmate", "Couldn't read from input file");
+    goto fail;
+
  write_fail:
     print_error_errno("fixmate", "Couldn't write to output file");
  fail:
@@ -412,6 +416,7 @@ void usage(FILE* where)
 "  -p           Disable FR proper pair check\n"
 "  -c           Add template cigar ct tag\n"
 "  -m           Add mate score tag\n"
+"  -u           Uncompressed output\n"
 "  --no-PG      do not add a PG line\n");
 
     sam_global_opt_help(where, "-.O..@-.");
@@ -429,7 +434,7 @@ int bam_mating(int argc, char *argv[])
     samFile *in = NULL, *out = NULL;
     int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1, mate_score = 0, no_pg = 0;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
-    char wmode[3] = {'w', 'b', 0};
+    char wmode[4] = {'w', 'b', 0, 0};
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0, '@'),
         {"no-PG", no_argument, NULL, 1},
@@ -439,12 +444,13 @@ int bam_mating(int argc, char *argv[])
 
     // parse args
     if (argc == 1) { usage(samtools_stdout); return 0; }
-    while ((c = getopt_long(argc, argv, "rpcmO:@:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "rpcmO:@:u", lopts, NULL)) >= 0) {
         switch (c) {
             case 'r': remove_reads = 1; break;
             case 'p': proper_pair_check = 0; break;
             case 'c': add_ct = 1; break;
             case 'm': mate_score = 1; break;
+            case 'u': wmode[2] = '0'; break;
             case 1: no_pg = 1; break;
             default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                       /* else fall-through */
diff --git a/samtools/bam_md.c b/samtools/bam_md.c
index 9277788..7d5aeaa 100644
--- a/samtools/bam_md.c
+++ b/samtools/bam_md.c
@@ -1,6 +1,6 @@
 /*  bam_md.c -- calmd subcommand.
 
-    Copyright (C) 2009-2011, 2014-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2009-2011, 2014-2015, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009-2011 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -30,6 +30,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
+#include <errno.h>
 #include "htslib/faidx.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
@@ -46,102 +47,136 @@ DEALINGS IN THE SOFTWARE.  */
 
 int bam_aux_drop_other(bam1_t *b, uint8_t *s);
 
-void bam_fillmd1_core(bam1_t *b, char *ref, hts_pos_t ref_len, int flag, int max_nm, int quiet_mode)
+static int bam_fillmd1_core(const char *ref_name, bam1_t *b, char *ref,
+                            hts_pos_t ref_len, int flag, int max_nm,
+                            int quiet_mode, uint32_t *skipped)
 {
     uint8_t *seq = bam_get_seq(b);
     uint32_t *cigar = bam_get_cigar(b);
     bam1_core_t *c = &b->core;
-    int i, y, u = 0;
-    hts_pos_t x;
-    kstring_t *str;
+    int i, qpos, matched = 0;
+    hts_pos_t rpos;
+    kstring_t str = KS_INITIALIZE;
     int32_t old_nm_i = -1, nm = 0;
+    uint32_t err = 0;
 
-    str = (kstring_t*)calloc(1, sizeof(kstring_t));
-    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-        int j, l = cigar[i]>>4, op = cigar[i]&0xf;
+    if (c->l_qseq == 0) {
+        if (!quiet_mode) {
+            if (ref_name) {
+                fprintf(stderr, "[bam_fillmd1] no sequence in alignment "
+                        "record for '%s' at %s:%"PRIhts_pos", skipped\n",
+                        bam_get_qname(b), ref_name, c->pos + 1);
+            } else {
+                fprintf(stderr, "[bam_fillmd1] no sequence in alignment "
+                        "record for '%s', skipped", bam_get_qname(b));
+            }
+        }
+        if (skipped) (*skipped)++;
+        return 0;
+    }
+
+    for (i = qpos = 0, rpos = c->pos; i < c->n_cigar; ++i) {
+        int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
         if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            for (j = 0; j < l; ++j) {
-                int c1, c2, z = y + j;
-                if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
+            for (j = 0; j < oplen; ++j) {
+                int c1, c2, z = qpos + j;
+                if (rpos+j >= ref_len || z >= c->l_qseq || ref[rpos+j] == '\0')
+                    break; // out of bounds
+                c1 = bam_seqi(seq, z);
+                c2 = seq_nt16_table[(uint8_t)ref[rpos+j]];
                 if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                     if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f;
-                    ++u;
+                    ++matched;
                 } else {
-                    kputw(u, str); kputc(toupper(ref[x+j]), str);
-                    u = 0; ++nm;
+                    err |= kputw(matched, &str) < 0;
+                    err |= kputc(toupper(ref[rpos+j]), &str) < 0;
+                    matched = 0; ++nm;
                 }
             }
-            if (j < l) break;
-            x += l; y += l;
+            if (j < oplen) break;
+            rpos += oplen; qpos += oplen;
         } else if (op == BAM_CDEL) {
-            kputw(u, str); kputc('^', str);
-            for (j = 0; j < l; ++j) {
-                if (x+j >= ref_len || ref[x+j] == '\0') break;
-                kputc(toupper(ref[x+j]), str);
+            err |= kputw(matched, &str) < 0;
+            err |= kputc('^', &str) < 0;
+            for (j = 0; j < oplen; ++j) {
+                if (rpos+j >= ref_len || ref[rpos+j] == '\0') break;
+                err |= kputc(toupper(ref[rpos+j]), &str) < 0;
             }
-            u = 0;
-            x += j; nm += j;
-            if (j < l) break;
+            matched = 0;
+            rpos += j; nm += j;
+            if (j < oplen) break;
         } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
-            y += l;
-            if (op == BAM_CINS) nm += l;
+            qpos += oplen;
+            if (op == BAM_CINS) nm += oplen;
         } else if (op == BAM_CREF_SKIP) {
-            x += l;
+            rpos += oplen;
         }
     }
-    kputw(u, str);
+    err |= kputw(matched, &str) < 0;
+    if (err) {
+        print_error_errno("calmd", "Couldn't build new MD string");
+        goto fail;
+    }
     // apply max_nm
     if (max_nm > 0 && nm >= max_nm) {
-        for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-            int j, l = cigar[i]>>4, op = cigar[i]&0xf;
+        for (i = qpos = 0, rpos = c->pos; i < c->n_cigar; ++i) {
+            int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
             if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                for (j = 0; j < l; ++j) {
-                    int c1, c2, z = y + j;
-                    if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                    c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
+                for (j = 0; j < oplen; ++j) {
+                    int c1, c2, z = qpos + j;
+                    if (rpos+j >= ref_len || z >= c->l_qseq || ref[rpos+j] == '\0')
+                        break; // out of bounds
+                    c1 = bam_seqi(seq, z);
+                    c2 = seq_nt16_table[(uint8_t)ref[rpos+j]];
                     if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                         seq[z/2] |= (z&1)? 0x0f : 0xf0;
                         bam_get_qual(b)[z] = 0;
                     }
                 }
-                if (j < l) break;
-                x += l; y += l;
-            } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
-            else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
+                if (j < oplen) break;
+                rpos += oplen; qpos += oplen;
+            } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) rpos += oplen;
+            else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) qpos += oplen;
         }
     }
     // update NM
     if ((flag & UPDATE_NM) && !(c->flag & BAM_FUNMAP)) {
         uint8_t *old_nm = bam_aux_get(b, "NM");
         if (old_nm) old_nm_i = bam_aux2i(old_nm);
-        if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
+        if (!old_nm) {
+            if (bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm) < 0)
+                goto aux_fail;
+        }
         else if (nm != old_nm_i) {
             if (!quiet_mode) {
                 fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
             }
-            bam_aux_del(b, old_nm);
-            bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
+            if (bam_aux_del(b, old_nm) < 0) goto aux_fail;
+            if (bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm) < 0)
+                goto aux_fail;
         }
     }
     // update MD
     if ((flag & UPDATE_MD) && !(c->flag & BAM_FUNMAP)) {
         uint8_t *old_md = bam_aux_get(b, "MD");
-        if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
-        else {
+        if (!old_md) {
+            if (bam_aux_append(b, "MD", 'Z', str.l + 1, (uint8_t*)str.s) < 0)
+                goto aux_fail;
+        } else {
             int is_diff = 0;
-            if (strlen((char*)old_md+1) == str->l) {
-                for (i = 0; i < str->l; ++i)
-                    if (toupper(old_md[i+1]) != toupper(str->s[i]))
+            if (strlen((char*)old_md+1) == str.l) {
+                for (i = 0; i < str.l; ++i)
+                    if (toupper(old_md[i+1]) != toupper(str.s[i]))
                         break;
-                if (i < str->l) is_diff = 1;
+                if (i < str.l) is_diff = 1;
             } else is_diff = 1;
             if (is_diff) {
                 if (!quiet_mode) {
-                    fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
+                    fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str.s);
                 }
-                bam_aux_del(b, old_md);
-                bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
+                if (bam_aux_del(b, old_md) < 0) goto aux_fail;
+                if (bam_aux_append(b, "MD", 'Z', str.l + 1, (uint8_t*)str.s) < 0)
+                    goto aux_fail;
             }
         }
     }
@@ -158,12 +193,25 @@ void bam_fillmd1_core(bam1_t *b, char *ref, hts_pos_t ref_len, int flag, int max
             if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7;
     }
 
-    free(str->s); free(str);
+    free(str.s);
+    return 0;
+
+ aux_fail:
+    if (errno == ENOMEM) {
+        print_error("calmd", "Couldn't add aux tag (too long)");
+    } else if (errno == EINVAL) {
+        print_error("calmd", "Corrupt aux data");
+    } else {
+        print_error_errno("calmd", "Couldn't add aux tag");
+    }
+ fail:
+    free(str.s);
+    return -1;
 }
 
-void bam_fillmd1(bam1_t *b, char *ref, int flag, int quiet_mode)
+int bam_fillmd1(bam1_t *b, char *ref, int flag, int quiet_mode)
 {
-    bam_fillmd1_core(b, ref, INT_MAX, flag, 0, quiet_mode);
+    return bam_fillmd1_core(NULL, b, ref, INT_MAX, flag, 0, quiet_mode, NULL);
 }
 
 int calmd_usage() {
@@ -193,8 +241,10 @@ int bam_fillmd(int argc, char *argv[])
     sam_hdr_t *header = NULL;
     faidx_t *fai = NULL;
     char *ref = NULL, mode_w[8], *ref_file, *arg_list = NULL;
+    const char *ref_name = NULL;
     bam1_t *b = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    uint32_t skipped = 0;
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0,'@'),
@@ -294,20 +344,34 @@ int bam_fillmd(int argc, char *argv[])
         if (b->core.tid >= 0) {
             if (tid != b->core.tid) {
                 free(ref);
-                ref = fai_fetch64(fai, sam_hdr_tid2name(header, b->core.tid), &len);
+                ref = NULL;
+                len = 0;
+                ref_name = sam_hdr_tid2name(header, b->core.tid);
+                if (ref_name) {
+                    ref = fai_fetch64(fai, ref_name, &len);
+                }
                 tid = b->core.tid;
                 if (ref == 0) { // FIXME: Should this always be fatal?
                     fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
-                            sam_hdr_tid2name(header, tid));
+                            ref_name ? ref_name : "(unknown)");
                     if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                 }
             }
-            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
+            if (is_realn) {
+                if (sam_prob_realn(b, ref, len, baq_flag) < -3) {
+                    print_error_errno("calmd", "BAQ alignment failed");
+                    goto fail;
+                }
+            }
             if (capQ > 10) {
                 int q = sam_cap_mapq(b, ref, len, capQ);
                 if (b->core.qual > q) b->core.qual = q;
             }
-            if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm, quiet_mode);
+            if (ref) {
+                if (bam_fillmd1_core(ref_name, b, ref, len, flt_flag, max_nm,
+                                     quiet_mode, &skipped) < 0)
+                    goto fail;
+            }
         }
         if (sam_write1(fpout, header, b) < 0) {
             print_error_errno("calmd", "failed to write to output file");
@@ -318,6 +382,13 @@ int bam_fillmd(int argc, char *argv[])
         fprintf(stderr, "[bam_fillmd] Error reading input.\n");
         goto fail;
     }
+
+    if (skipped) {
+        fprintf(stderr, "[calmd] Warning: %"PRIu32" records skipped due "
+                "to no query sequence\n",
+                skipped);
+    }
+
     bam_destroy1(b);
     sam_hdr_destroy(header);
 
diff --git a/samtools/bam_md.c.pysam.c b/samtools/bam_md.c.pysam.c
index 93990b9..b71e77c 100644
--- a/samtools/bam_md.c.pysam.c
+++ b/samtools/bam_md.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_md.c -- calmd subcommand.
 
-    Copyright (C) 2009-2011, 2014-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2009-2011, 2014-2015, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2009-2011 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -32,6 +32,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
+#include <errno.h>
 #include "htslib/faidx.h"
 #include "htslib/sam.h"
 #include "htslib/kstring.h"
@@ -48,102 +49,136 @@ DEALINGS IN THE SOFTWARE.  */
 
 int bam_aux_drop_other(bam1_t *b, uint8_t *s);
 
-void bam_fillmd1_core(bam1_t *b, char *ref, hts_pos_t ref_len, int flag, int max_nm, int quiet_mode)
+static int bam_fillmd1_core(const char *ref_name, bam1_t *b, char *ref,
+                            hts_pos_t ref_len, int flag, int max_nm,
+                            int quiet_mode, uint32_t *skipped)
 {
     uint8_t *seq = bam_get_seq(b);
     uint32_t *cigar = bam_get_cigar(b);
     bam1_core_t *c = &b->core;
-    int i, y, u = 0;
-    hts_pos_t x;
-    kstring_t *str;
+    int i, qpos, matched = 0;
+    hts_pos_t rpos;
+    kstring_t str = KS_INITIALIZE;
     int32_t old_nm_i = -1, nm = 0;
+    uint32_t err = 0;
 
-    str = (kstring_t*)calloc(1, sizeof(kstring_t));
-    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-        int j, l = cigar[i]>>4, op = cigar[i]&0xf;
+    if (c->l_qseq == 0) {
+        if (!quiet_mode) {
+            if (ref_name) {
+                fprintf(samtools_stderr, "[bam_fillmd1] no sequence in alignment "
+                        "record for '%s' at %s:%"PRIhts_pos", skipped\n",
+                        bam_get_qname(b), ref_name, c->pos + 1);
+            } else {
+                fprintf(samtools_stderr, "[bam_fillmd1] no sequence in alignment "
+                        "record for '%s', skipped", bam_get_qname(b));
+            }
+        }
+        if (skipped) (*skipped)++;
+        return 0;
+    }
+
+    for (i = qpos = 0, rpos = c->pos; i < c->n_cigar; ++i) {
+        int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
         if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-            for (j = 0; j < l; ++j) {
-                int c1, c2, z = y + j;
-                if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
+            for (j = 0; j < oplen; ++j) {
+                int c1, c2, z = qpos + j;
+                if (rpos+j >= ref_len || z >= c->l_qseq || ref[rpos+j] == '\0')
+                    break; // out of bounds
+                c1 = bam_seqi(seq, z);
+                c2 = seq_nt16_table[(uint8_t)ref[rpos+j]];
                 if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                     if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f;
-                    ++u;
+                    ++matched;
                 } else {
-                    kputw(u, str); kputc(toupper(ref[x+j]), str);
-                    u = 0; ++nm;
+                    err |= kputw(matched, &str) < 0;
+                    err |= kputc(toupper(ref[rpos+j]), &str) < 0;
+                    matched = 0; ++nm;
                 }
             }
-            if (j < l) break;
-            x += l; y += l;
+            if (j < oplen) break;
+            rpos += oplen; qpos += oplen;
         } else if (op == BAM_CDEL) {
-            kputw(u, str); kputc('^', str);
-            for (j = 0; j < l; ++j) {
-                if (x+j >= ref_len || ref[x+j] == '\0') break;
-                kputc(toupper(ref[x+j]), str);
+            err |= kputw(matched, &str) < 0;
+            err |= kputc('^', &str) < 0;
+            for (j = 0; j < oplen; ++j) {
+                if (rpos+j >= ref_len || ref[rpos+j] == '\0') break;
+                err |= kputc(toupper(ref[rpos+j]), &str) < 0;
             }
-            u = 0;
-            x += j; nm += j;
-            if (j < l) break;
+            matched = 0;
+            rpos += j; nm += j;
+            if (j < oplen) break;
         } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
-            y += l;
-            if (op == BAM_CINS) nm += l;
+            qpos += oplen;
+            if (op == BAM_CINS) nm += oplen;
         } else if (op == BAM_CREF_SKIP) {
-            x += l;
+            rpos += oplen;
         }
     }
-    kputw(u, str);
+    err |= kputw(matched, &str) < 0;
+    if (err) {
+        print_error_errno("calmd", "Couldn't build new MD string");
+        goto fail;
+    }
     // apply max_nm
     if (max_nm > 0 && nm >= max_nm) {
-        for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
-            int j, l = cigar[i]>>4, op = cigar[i]&0xf;
+        for (i = qpos = 0, rpos = c->pos; i < c->n_cigar; ++i) {
+            int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
             if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
-                for (j = 0; j < l; ++j) {
-                    int c1, c2, z = y + j;
-                    if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
-                    c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
+                for (j = 0; j < oplen; ++j) {
+                    int c1, c2, z = qpos + j;
+                    if (rpos+j >= ref_len || z >= c->l_qseq || ref[rpos+j] == '\0')
+                        break; // out of bounds
+                    c1 = bam_seqi(seq, z);
+                    c2 = seq_nt16_table[(uint8_t)ref[rpos+j]];
                     if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                         seq[z/2] |= (z&1)? 0x0f : 0xf0;
                         bam_get_qual(b)[z] = 0;
                     }
                 }
-                if (j < l) break;
-                x += l; y += l;
-            } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
-            else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
+                if (j < oplen) break;
+                rpos += oplen; qpos += oplen;
+            } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) rpos += oplen;
+            else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) qpos += oplen;
         }
     }
     // update NM
     if ((flag & UPDATE_NM) && !(c->flag & BAM_FUNMAP)) {
         uint8_t *old_nm = bam_aux_get(b, "NM");
         if (old_nm) old_nm_i = bam_aux2i(old_nm);
-        if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
+        if (!old_nm) {
+            if (bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm) < 0)
+                goto aux_fail;
+        }
         else if (nm != old_nm_i) {
             if (!quiet_mode) {
                 fprintf(samtools_stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
             }
-            bam_aux_del(b, old_nm);
-            bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
+            if (bam_aux_del(b, old_nm) < 0) goto aux_fail;
+            if (bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm) < 0)
+                goto aux_fail;
         }
     }
     // update MD
     if ((flag & UPDATE_MD) && !(c->flag & BAM_FUNMAP)) {
         uint8_t *old_md = bam_aux_get(b, "MD");
-        if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
-        else {
+        if (!old_md) {
+            if (bam_aux_append(b, "MD", 'Z', str.l + 1, (uint8_t*)str.s) < 0)
+                goto aux_fail;
+        } else {
             int is_diff = 0;
-            if (strlen((char*)old_md+1) == str->l) {
-                for (i = 0; i < str->l; ++i)
-                    if (toupper(old_md[i+1]) != toupper(str->s[i]))
+            if (strlen((char*)old_md+1) == str.l) {
+                for (i = 0; i < str.l; ++i)
+                    if (toupper(old_md[i+1]) != toupper(str.s[i]))
                         break;
-                if (i < str->l) is_diff = 1;
+                if (i < str.l) is_diff = 1;
             } else is_diff = 1;
             if (is_diff) {
                 if (!quiet_mode) {
-                    fprintf(samtools_stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
+                    fprintf(samtools_stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str.s);
                 }
-                bam_aux_del(b, old_md);
-                bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
+                if (bam_aux_del(b, old_md) < 0) goto aux_fail;
+                if (bam_aux_append(b, "MD", 'Z', str.l + 1, (uint8_t*)str.s) < 0)
+                    goto aux_fail;
             }
         }
     }
@@ -160,12 +195,25 @@ void bam_fillmd1_core(bam1_t *b, char *ref, hts_pos_t ref_len, int flag, int max
             if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7;
     }
 
-    free(str->s); free(str);
+    free(str.s);
+    return 0;
+
+ aux_fail:
+    if (errno == ENOMEM) {
+        print_error("calmd", "Couldn't add aux tag (too long)");
+    } else if (errno == EINVAL) {
+        print_error("calmd", "Corrupt aux data");
+    } else {
+        print_error_errno("calmd", "Couldn't add aux tag");
+    }
+ fail:
+    free(str.s);
+    return -1;
 }
 
-void bam_fillmd1(bam1_t *b, char *ref, int flag, int quiet_mode)
+int bam_fillmd1(bam1_t *b, char *ref, int flag, int quiet_mode)
 {
-    bam_fillmd1_core(b, ref, INT_MAX, flag, 0, quiet_mode);
+    return bam_fillmd1_core(NULL, b, ref, INT_MAX, flag, 0, quiet_mode, NULL);
 }
 
 int calmd_usage() {
@@ -195,8 +243,10 @@ int bam_fillmd(int argc, char *argv[])
     sam_hdr_t *header = NULL;
     faidx_t *fai = NULL;
     char *ref = NULL, mode_w[8], *ref_file, *arg_list = NULL;
+    const char *ref_name = NULL;
     bam1_t *b = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
+    uint32_t skipped = 0;
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0,'@'),
@@ -296,20 +346,34 @@ int bam_fillmd(int argc, char *argv[])
         if (b->core.tid >= 0) {
             if (tid != b->core.tid) {
                 free(ref);
-                ref = fai_fetch64(fai, sam_hdr_tid2name(header, b->core.tid), &len);
+                ref = NULL;
+                len = 0;
+                ref_name = sam_hdr_tid2name(header, b->core.tid);
+                if (ref_name) {
+                    ref = fai_fetch64(fai, ref_name, &len);
+                }
                 tid = b->core.tid;
                 if (ref == 0) { // FIXME: Should this always be fatal?
                     fprintf(samtools_stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
-                            sam_hdr_tid2name(header, tid));
+                            ref_name ? ref_name : "(unknown)");
                     if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                 }
             }
-            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
+            if (is_realn) {
+                if (sam_prob_realn(b, ref, len, baq_flag) < -3) {
+                    print_error_errno("calmd", "BAQ alignment failed");
+                    goto fail;
+                }
+            }
             if (capQ > 10) {
                 int q = sam_cap_mapq(b, ref, len, capQ);
                 if (b->core.qual > q) b->core.qual = q;
             }
-            if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm, quiet_mode);
+            if (ref) {
+                if (bam_fillmd1_core(ref_name, b, ref, len, flt_flag, max_nm,
+                                     quiet_mode, &skipped) < 0)
+                    goto fail;
+            }
         }
         if (sam_write1(fpout, header, b) < 0) {
             print_error_errno("calmd", "failed to write to output file");
@@ -320,6 +384,13 @@ int bam_fillmd(int argc, char *argv[])
         fprintf(samtools_stderr, "[bam_fillmd] Error reading input.\n");
         goto fail;
     }
+
+    if (skipped) {
+        fprintf(samtools_stderr, "[calmd] Warning: %"PRIu32" records skipped due "
+                "to no query sequence\n",
+                skipped);
+    }
+
     bam_destroy1(b);
     sam_hdr_destroy(header);
 
diff --git a/samtools/bam_plcmd.c b/samtools/bam_plcmd.c
index 0497fb6..6fd282c 100644
--- a/samtools/bam_plcmd.c
+++ b/samtools/bam_plcmd.c
@@ -1,6 +1,6 @@
 /*  bam_plcmd.c -- mpileup subcommand.
 
-    Copyright (C) 2008-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2015, 2019-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -90,8 +90,10 @@ static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
     int del_len = -p->indel;
     if (p->indel > 0) {
         int len = bam_plp_insertion(p, ks, &del_len);
-        if (len < 0)
+        if (len < 0) {
+            print_error("mpileup", "bam_plp_insertion() failed");
             return -1;
+        }
         putc('+', fp); printw(len, fp);
         if (bam_is_rev(p->b)) {
             char pad = rev_del ? '#' : '*';
@@ -126,10 +128,11 @@ static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
 #define MPLP_REDO_BAQ   (1<<6)
 #define MPLP_ILLUMINA13 (1<<7)
 #define MPLP_IGNORE_RG  (1<<8)
-#define MPLP_PRINT_QPOS (1<<9)
-#define MPLP_PER_SAMPLE (1<<11)
-#define MPLP_SMART_OVERLAPS (1<<12)
+#define MPLP_PER_SAMPLE (1<<9)
+#define MPLP_SMART_OVERLAPS (1<<10)
 
+#define MPLP_PRINT_MAPQ_CHAR (1<<11)
+#define MPLP_PRINT_QPOS  (1<<12)
 #define MPLP_PRINT_QNAME (1<<13)
 #define MPLP_PRINT_FLAG  (1<<14)
 #define MPLP_PRINT_RNAME (1<<15)
@@ -294,9 +297,7 @@ print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
     fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
     for (i = 0; i < n; ++i) {
         fputs("\t0\t*\t*", fp);
-        if (conf->flag & MPLP_PRINT_QPOS)
-            fputs("\t*", fp);
-        int flag_value = MPLP_PRINT_QNAME;
+        int flag_value = MPLP_PRINT_MAPQ_CHAR;
         while(flag_value < MPLP_PRINT_QUAL + 1) {
             if (conf->flag & flag_value)
                 fputs("\t*", fp);
@@ -757,9 +758,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                 fprintf(pileup_fp, "\t%d\t", cnt);
                 if (n_plp[i] == 0) {
                     fputs("*\t*", pileup_fp);
-                    if (conf->flag & MPLP_PRINT_QPOS)
-                        fputs("\t*", pileup_fp);
-                    int flag_value = MPLP_PRINT_QNAME;
+                    int flag_value = MPLP_PRINT_MAPQ_CHAR;
                     while(flag_value < MPLP_PRINT_QUAL + 1) {
                         if (conf->flag & flag_value)
                             fputs("\t*", pileup_fp);
@@ -805,25 +804,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                     }
                     if (!n) putc('*', pileup_fp);
 
-                    /* Print mpileup positions */
-                    if (conf->flag & MPLP_PRINT_QPOS) {
-                        n = 0;
-                        putc('\t', pileup_fp);
-                        for (j = 0; j < n_plp[i]; ++j) {
-                            const bam_pileup1_t *p = plp[i] + j;
-                            int c = p->qpos < p->b->core.l_qseq
-                                    ? bam_get_qual(p->b)[p->qpos]
-                                                         : 0;
-                            if ( c < conf->min_baseQ ) continue;
-                            if (n > 0) putc(',', pileup_fp);
-                            n++;
-                            fprintf(pileup_fp, "%d", p->qpos + 1);
-                        }
-                        if (!n) putc('*', pileup_fp);
-                    }
-
                     /* Print selected columns */
-                    int flag_value = MPLP_PRINT_QNAME;
+                    int flag_value = MPLP_PRINT_MAPQ_CHAR;
                     while(flag_value < MPLP_PRINT_QUAL + 1) {
                         if (conf->flag & flag_value) {
                             n = 0;
@@ -834,10 +816,18 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                                     ? bam_get_qual(p->b)[p->qpos]
                                     : 0;
                                 if ( c < conf->min_baseQ ) continue;
-                                if (n > 0 && flag_value != MPLP_PRINT_MAPQ) putc(',', pileup_fp);
+                                if (n > 0 && flag_value != MPLP_PRINT_MAPQ_CHAR) putc(',', pileup_fp);
                                 n++;
 
                                 switch (flag_value) {
+                                case MPLP_PRINT_MAPQ_CHAR:
+                                    c = p->b->core.qual + 33;
+                                    if (c > 126) c = 126;
+                                    putc(c, pileup_fp);
+                                    break;
+                                case MPLP_PRINT_QPOS:
+                                    fprintf(pileup_fp, "%d", p->qpos + 1);
+                                    break;
                                 case MPLP_PRINT_QNAME:
                                     fputs(bam_get_qname(p->b), pileup_fp);
                                     break;
@@ -854,9 +844,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                                     fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.pos + 1);
                                     break;
                                 case MPLP_PRINT_MAPQ:
-                                    c = p->b->core.qual + 33;
-                                    if (c > 126) c = 126;
-                                    putc(c, pileup_fp);
+                                    fprintf(pileup_fp, "%d", p->b->core.qual);
                                     break;
                                 case MPLP_PRINT_RNEXT:
                                     if (p->b->core.mtid >= 0)
@@ -930,6 +918,12 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
         }
     }
 
+    if (ret < 0) {
+        print_error("mpileup", "error reading from input file");
+        ret = EXIT_FAILURE;
+        goto fail;
+    }
+
     if (conf->all && !(conf->flag & MPLP_BCF)) {
         // Handle terminating region
         if (last_tid < 0 && conf->reg && conf->all > 1) {
@@ -1110,9 +1104,9 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     fprintf(fp,
 "  -r, --region REG        region in which pileup is generated\n"
 "  -R, --ignore-RG         ignore RG tags (one BAM = one sample)\n"
-"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+"  --rf, --incl-flags STR|INT  required flags: include reads with any of the mask bits set [%s]\n", tmp_require);
     fprintf(fp,
-"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+"  --ff, --excl-flags STR|INT  filter flags: skip reads with any of the mask bits set\n"
 "                                            [%s]\n", tmp_filter);
     fprintf(fp,
 "  -x, --ignore-overlaps   disable read-pair overlap detection\n"
@@ -1281,7 +1275,7 @@ int bam_mpileup(int argc, char *argv[])
         case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
         case '6': mplp.flag |= MPLP_ILLUMINA13; break;
         case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
-        case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
+        case 's': mplp.flag |= MPLP_PRINT_MAPQ_CHAR; break;
         case 'O': mplp.flag |= MPLP_PRINT_QPOS; break;
         case 'C': mplp.capQ_thres = atoi(optarg); break;
         case 'q': mplp.min_mq = atoi(optarg); break;
diff --git a/samtools/bam_plcmd.c.pysam.c b/samtools/bam_plcmd.c.pysam.c
index 7c9986f..bcb8a5c 100644
--- a/samtools/bam_plcmd.c.pysam.c
+++ b/samtools/bam_plcmd.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_plcmd.c -- mpileup subcommand.
 
-    Copyright (C) 2008-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2008-2015, 2019-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -92,8 +92,10 @@ static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
     int del_len = -p->indel;
     if (p->indel > 0) {
         int len = bam_plp_insertion(p, ks, &del_len);
-        if (len < 0)
+        if (len < 0) {
+            print_error("mpileup", "bam_plp_insertion() failed");
             return -1;
+        }
         putc('+', fp); printw(len, fp);
         if (bam_is_rev(p->b)) {
             char pad = rev_del ? '#' : '*';
@@ -128,10 +130,11 @@ static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
 #define MPLP_REDO_BAQ   (1<<6)
 #define MPLP_ILLUMINA13 (1<<7)
 #define MPLP_IGNORE_RG  (1<<8)
-#define MPLP_PRINT_QPOS (1<<9)
-#define MPLP_PER_SAMPLE (1<<11)
-#define MPLP_SMART_OVERLAPS (1<<12)
+#define MPLP_PER_SAMPLE (1<<9)
+#define MPLP_SMART_OVERLAPS (1<<10)
 
+#define MPLP_PRINT_MAPQ_CHAR (1<<11)
+#define MPLP_PRINT_QPOS  (1<<12)
 #define MPLP_PRINT_QNAME (1<<13)
 #define MPLP_PRINT_FLAG  (1<<14)
 #define MPLP_PRINT_RNAME (1<<15)
@@ -296,9 +299,7 @@ print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
     fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
     for (i = 0; i < n; ++i) {
         fputs("\t0\t*\t*", fp);
-        if (conf->flag & MPLP_PRINT_QPOS)
-            fputs("\t*", fp);
-        int flag_value = MPLP_PRINT_QNAME;
+        int flag_value = MPLP_PRINT_MAPQ_CHAR;
         while(flag_value < MPLP_PRINT_QUAL + 1) {
             if (conf->flag & flag_value)
                 fputs("\t*", fp);
@@ -389,7 +390,7 @@ static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
             if (id < 0 || id >= m->n) {
                 assert(q); // otherwise a bug
                 fprintf(samtools_stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
-                exit(EXIT_FAILURE);
+                samtools_exit(EXIT_FAILURE);
             }
             if (m->n_plp[id] == m->m_plp[id]) {
                 m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
@@ -442,7 +443,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
 
     if (n == 0) {
         fprintf(samtools_stderr,"[%s] no input file/data given\n", __func__);
-        exit(EXIT_FAILURE);
+        samtools_exit(EXIT_FAILURE);
     }
 
     // read the header of each file in the list and initialize data
@@ -453,23 +454,23 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
         if ( !data[i]->fp )
         {
             fprintf(samtools_stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
             fprintf(samtools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         if (conf->fai_fname && hts_set_fai_filename(data[i]->fp, conf->fai_fname) != 0) {
             fprintf(samtools_stderr, "[%s] failed to process %s: %s\n",
                     __func__, conf->fai_fname, strerror(errno));
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         data[i]->conf = conf;
         data[i]->ref = &mp_ref;
         h_tmp = sam_hdr_read(data[i]->fp);
         if ( !h_tmp ) {
             fprintf(samtools_stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : sam_hdr_str(h_tmp));
         if (conf->flag & MPLP_BCF) {
@@ -487,11 +488,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
 
             if (idx == NULL) {
                 fprintf(samtools_stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
-                exit(EXIT_FAILURE);
+                samtools_exit(EXIT_FAILURE);
             }
             if ( (data[i]->iter=sam_itr_querys(idx, h_tmp, conf->reg)) == 0) {
                 fprintf(samtools_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
-                exit(EXIT_FAILURE);
+                samtools_exit(EXIT_FAILURE);
             }
             if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end, tid0 = data[i]->iter->tid;
             hts_idx_destroy(idx);
@@ -529,7 +530,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
         bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
         if (bcf_fp == NULL) {
             fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
 
         // BCF header creation
@@ -613,7 +614,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
         if (bcf_hdr_write(bcf_fp, bcf_hdr) != 0) {
             print_error_errno("mpileup", "Failed to write VCF/BCF header to \"%s\"",
                               conf->output_fname? conf->output_fname : "standard output");
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         // End of BCF header creation
 
@@ -652,7 +653,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
 
         if (pileup_fp == NULL) {
             fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
     }
 
@@ -698,7 +699,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
             if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
                 print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
                                   conf->output_fname?conf->output_fname:"standard output");
-                exit(EXIT_FAILURE);
+                samtools_exit(EXIT_FAILURE);
             }
             // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
             if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0)
@@ -712,7 +713,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                     if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
                         print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
                                           conf->output_fname?conf->output_fname:"standard output");
-                        exit(EXIT_FAILURE);
+                        samtools_exit(EXIT_FAILURE);
                     }
                 }
             }
@@ -759,9 +760,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                 fprintf(pileup_fp, "\t%d\t", cnt);
                 if (n_plp[i] == 0) {
                     fputs("*\t*", pileup_fp);
-                    if (conf->flag & MPLP_PRINT_QPOS)
-                        fputs("\t*", pileup_fp);
-                    int flag_value = MPLP_PRINT_QNAME;
+                    int flag_value = MPLP_PRINT_MAPQ_CHAR;
                     while(flag_value < MPLP_PRINT_QUAL + 1) {
                         if (conf->flag & flag_value)
                             fputs("\t*", pileup_fp);
@@ -807,25 +806,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                     }
                     if (!n) putc('*', pileup_fp);
 
-                    /* Print mpileup positions */
-                    if (conf->flag & MPLP_PRINT_QPOS) {
-                        n = 0;
-                        putc('\t', pileup_fp);
-                        for (j = 0; j < n_plp[i]; ++j) {
-                            const bam_pileup1_t *p = plp[i] + j;
-                            int c = p->qpos < p->b->core.l_qseq
-                                    ? bam_get_qual(p->b)[p->qpos]
-                                                         : 0;
-                            if ( c < conf->min_baseQ ) continue;
-                            if (n > 0) putc(',', pileup_fp);
-                            n++;
-                            fprintf(pileup_fp, "%d", p->qpos + 1);
-                        }
-                        if (!n) putc('*', pileup_fp);
-                    }
-
                     /* Print selected columns */
-                    int flag_value = MPLP_PRINT_QNAME;
+                    int flag_value = MPLP_PRINT_MAPQ_CHAR;
                     while(flag_value < MPLP_PRINT_QUAL + 1) {
                         if (conf->flag & flag_value) {
                             n = 0;
@@ -836,10 +818,18 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                                     ? bam_get_qual(p->b)[p->qpos]
                                     : 0;
                                 if ( c < conf->min_baseQ ) continue;
-                                if (n > 0 && flag_value != MPLP_PRINT_MAPQ) putc(',', pileup_fp);
+                                if (n > 0 && flag_value != MPLP_PRINT_MAPQ_CHAR) putc(',', pileup_fp);
                                 n++;
 
                                 switch (flag_value) {
+                                case MPLP_PRINT_MAPQ_CHAR:
+                                    c = p->b->core.qual + 33;
+                                    if (c > 126) c = 126;
+                                    putc(c, pileup_fp);
+                                    break;
+                                case MPLP_PRINT_QPOS:
+                                    fprintf(pileup_fp, "%d", p->qpos + 1);
+                                    break;
                                 case MPLP_PRINT_QNAME:
                                     fputs(bam_get_qname(p->b), pileup_fp);
                                     break;
@@ -856,9 +846,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
                                     fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.pos + 1);
                                     break;
                                 case MPLP_PRINT_MAPQ:
-                                    c = p->b->core.qual + 33;
-                                    if (c > 126) c = 126;
-                                    putc(c, pileup_fp);
+                                    fprintf(pileup_fp, "%d", p->b->core.qual);
                                     break;
                                 case MPLP_PRINT_RNEXT:
                                     if (p->b->core.mtid >= 0)
@@ -932,6 +920,12 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
         }
     }
 
+    if (ret < 0) {
+        print_error("mpileup", "error reading from input file");
+        ret = EXIT_FAILURE;
+        goto fail;
+    }
+
     if (conf->all && !(conf->flag & MPLP_BCF)) {
         // Handle terminating region
         if (last_tid < 0 && conf->reg && conf->all > 1) {
@@ -1073,7 +1067,7 @@ int parse_format_flag(const char *str)
         else
         {
             fprintf(samtools_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
-            exit(EXIT_FAILURE);
+            samtools_exit(EXIT_FAILURE);
         }
         free(tags[i]);
     }
@@ -1112,9 +1106,9 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
     fprintf(fp,
 "  -r, --region REG        region in which pileup is generated\n"
 "  -R, --ignore-RG         ignore RG tags (one BAM = one sample)\n"
-"  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset [%s]\n", tmp_require);
+"  --rf, --incl-flags STR|INT  required flags: include reads with any of the mask bits set [%s]\n", tmp_require);
     fprintf(fp,
-"  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set\n"
+"  --ff, --excl-flags STR|INT  filter flags: skip reads with any of the mask bits set\n"
 "                                            [%s]\n", tmp_filter);
     fprintf(fp,
 "  -x, --ignore-overlaps   disable read-pair overlap detection\n"
@@ -1283,7 +1277,7 @@ int bam_mpileup(int argc, char *argv[])
         case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
         case '6': mplp.flag |= MPLP_ILLUMINA13; break;
         case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
-        case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
+        case 's': mplp.flag |= MPLP_PRINT_MAPQ_CHAR; break;
         case 'O': mplp.flag |= MPLP_PRINT_QPOS; break;
         case 'C': mplp.capQ_thres = atoi(optarg); break;
         case 'q': mplp.min_mq = atoi(optarg); break;
diff --git a/samtools/bam_reheader.c.pysam.c b/samtools/bam_reheader.c.pysam.c
index 8149514..a48d7f6 100644
--- a/samtools/bam_reheader.c.pysam.c
+++ b/samtools/bam_reheader.c.pysam.c
@@ -444,7 +444,7 @@ static void usage(FILE *fp, int ret) {
            "    -i, --in-place      Modify the CRAM file directly, if possible.\n"
            "                        (Defaults to outputting to samtools_stdout.)\n"
            "    -c, --command CMD   Pass the header in SAM format to external program CMD.\n");
-    exit(ret);
+    samtools_exit(ret);
 }
 
 static sam_hdr_t* external_reheader(samFile* in, const char* external) {
@@ -533,7 +533,7 @@ cleanup:
     return h;
 }
 
-int main_reheader(int argc, char *argv[])
+int samtools_main_reheader(int argc, char *argv[])
 {
     int inplace = 0, r, no_pg = 0, c, skip_header = 0;
     sam_hdr_t *h;
diff --git a/samtools/bam_rmdupse.c.pysam.c b/samtools/bam_rmdupse.c.pysam.c
index 2c67fac..65689d7 100644
--- a/samtools/bam_rmdupse.c.pysam.c
+++ b/samtools/bam_rmdupse.c.pysam.c
@@ -86,8 +86,8 @@ static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, in
     p->discarded = 0;
     p->endpos = endpos; p->score = score;
     if (p->b == 0) p->b = bam_init1();
-    if (!p->b) { perror(NULL); exit(EXIT_FAILURE); }
-    if (bam_copy1(p->b, b) == NULL) { perror(NULL); exit(EXIT_FAILURE); }
+    if (!p->b) { perror(NULL); samtools_exit(EXIT_FAILURE); }
+    if (bam_copy1(p->b, b) == NULL) { perror(NULL); samtools_exit(EXIT_FAILURE); }
     return p;
 }
 
@@ -183,7 +183,7 @@ int bam_rmdupse_core(samFile *in, sam_hdr_t *hdr, samFile *out, int force_se)
                     } else { // replace
                         p->score = score; p->endpos = endpos;
                         if (bam_copy1(p->b, b) == NULL) {
-                            perror(NULL); exit(EXIT_FAILURE);
+                            perror(NULL); samtools_exit(EXIT_FAILURE);
                         }
                     }
                 } // otherwise, discard the alignment
diff --git a/samtools/bam_sort.c b/samtools/bam_sort.c
index 0bf346c..46a1d80 100644
--- a/samtools/bam_sort.c
+++ b/samtools/bam_sort.c
@@ -1,6 +1,6 @@
 /*  bam_sort.c -- sorting and merging.
 
-    Copyright (C) 2008-2019 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -33,11 +33,13 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <assert.h>
 #include <pthread.h>
+#include <inttypes.h>
 #include "htslib/ksort.h"
 #include "htslib/hts_os.h"
 #include "htslib/khash.h"
@@ -47,6 +49,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/hts_endian.h"
 #include "sam_opts.h"
 #include "samtools.h"
+#include "bedidx.h"
 
 
 // Struct which contains the a record, and the pointer to the sort tag (if any) or
@@ -97,6 +100,7 @@ KLIST_INIT(hdrln, char*, hdrln_free_char)
 
 static int g_is_by_qname = 0;
 static int g_is_by_tag = 0;
+static int g_is_by_minhash = 0;
 static char g_sort_tag[2] = {0,0};
 
 static int strnum_cmp(const char *_a, const char *_b)
@@ -133,8 +137,11 @@ typedef struct {
 } heap1_t;
 
 static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b);
+static inline int bam1_cmp_by_minhash(const bam1_tag a, const bam1_tag b);
 
 // Function to compare reads in the heap and determine which one is < the other
+// Note, unlike the bam1_cmp_by_X functions which return <0, 0, >0 this
+// is strictly 0 or 1 only.
 static inline int heap_lt(const heap1_t a, const heap1_t b)
 {
     if (!a.entry.bam_record)
@@ -146,6 +153,9 @@ static inline int heap_lt(const heap1_t a, const heap1_t b)
         int t;
         t = bam1_cmp_by_tag(a.entry, b.entry);
         if (t != 0) return t > 0;
+    } else if (g_is_by_minhash) {
+        int t = bam1_cmp_by_minhash(a.entry, b.entry);
+        if (t != 0) return t > 0;
     } else if (g_is_by_qname) {
         int t, fa, fb;
         t = strnum_cmp(bam_get_qname(a.entry.bam_record), bam_get_qname(b.entry.bam_record));
@@ -513,7 +523,8 @@ static klist_t(hdrln) * trans_rg_pg(bool is_rg, sam_hdr_t *translate,
                 id_len = id_end - idp;
 
                 if (id_len < transformed_id.l) {
-                    if (ks_resize(&new_hdr_line, new_hdr_line.l + transformed_id.l - id_len))
+                    if (ks_resize(&new_hdr_line, new_hdr_line.l
+                                  + transformed_id.l - id_len + 1/*nul*/))
                         goto fail;
                 }
                 if (id_len != transformed_id.l) {
@@ -714,6 +725,7 @@ static int trans_tbl_init(merged_header_t* merged_hdr, sam_hdr_t* translate,
     // Get translated header lines and fill in map for @PG records
     pg_list = trans_rg_pg(false, translate, merge_pg, merged_hdr->pg_ids,
                           tbl->pg_trans, NULL);
+    if (!pg_list) goto fail;
 
     // Fix-up PG: tags in the new @RG records and add to output
     if (finish_rg_pg(true, rg_list, tbl->pg_trans, &merged_hdr->out_rg))
@@ -911,10 +923,38 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
 #define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
 #define MERGE_FIRST_CO   64 // Use only first file's @CO headers (sort cmd only)
 
+
+static hts_reglist_t *duplicate_reglist(const hts_reglist_t *rl, int rn) {
+    if (!rl)
+        return NULL;
+
+    hts_reglist_t *new_rl = calloc(rn, sizeof(hts_reglist_t));
+    if (!new_rl)
+        return NULL;
+
+    int i;
+    for (i=0; i < rn; i++) {
+        new_rl[i].tid     = rl[i].tid;
+        new_rl[i].count   = rl[i].count;
+        new_rl[i].min_beg = rl[i].min_beg;
+        new_rl[i].max_end = rl[i].max_end;
+
+        new_rl[i].reg = rl[i].reg;
+        new_rl[i].intervals = malloc(new_rl[i].count * sizeof(hts_pair_pos_t));
+        if (!new_rl[i].intervals) {
+            hts_reglist_free(new_rl, i);
+            return NULL;
+        }
+        memcpy(new_rl[i].intervals, rl[i].intervals, new_rl[i].count * sizeof(hts_pair_pos_t));
+    }
+
+    return new_rl;
+}
+
 /*
  * How merging is handled
  *
- * If a hheader is defined use we will use that as our output header
+ * If a header is defined use we will use that as our output header
  * otherwise we use the first header from the first input file.
  *
  * Now go through each file and create a translation table for that file for:
@@ -957,9 +997,9 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  */
 int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *mode,
                     const char *headers, int n, char * const *fn, char * const *fn_idx,
-                    int flag, const char *reg, int n_threads, const char *cmd,
-                    const htsFormat *in_fmt, const htsFormat *out_fmt, int write_index,
-                    char *arg_list, int no_pg)
+                    const char *fn_bed, int flag, const char *reg, int n_threads,
+                    const char *cmd, const htsFormat *in_fmt, const htsFormat *out_fmt,
+                    int write_index, char *arg_list, int no_pg)
 {
     samFile *fpout, **fp = NULL;
     heap1_t *heap = NULL;
@@ -973,6 +1013,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     trans_tbl_t *translation_tbl = NULL;
     int *rtrans = NULL;
     char *out_idx_fn = NULL;
+    void *hreg = NULL;
+    hts_reglist_t *lreg = NULL;
     merged_header_t *merged_hdr = init_merged_header();
     if (!merged_hdr) return -1;
 
@@ -1030,7 +1072,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     }
 
     if (hin) {
-        // Popluate merged_hdr from the pre-prepared header
+        // Populate merged_hdr from the pre-prepared header
         trans_tbl_t dummy;
         int res;
         res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
@@ -1059,10 +1101,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
                            RG[i]))
             return -1; // FIXME: memory leak
 
-        // TODO sam_itr_next() doesn't yet work for SAM files,
-        // so for those keep the headers around for use with sam_read1()
-        if (hts_get_format(fp[i])->format == sam) hdr[i] = hin;
-        else { sam_hdr_destroy(hin); hdr[i] = NULL; }
+        hdr[i] = hin;
 
         if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
             fprintf(stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
@@ -1098,10 +1137,22 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     if (!hout) return -1;  // FIXME: memory leak
 
     // If we're only merging a specified region move our iters to start at that point
-    if (reg) {
-        int tid;
-        hts_pos_t beg, end;
+    int tid, nreg;
+    hts_pos_t beg, end;
 
+    if (fn_bed) {
+        hreg = bed_read(fn_bed);
+        if (!hreg) {
+            fprintf(stderr, "[%s] Could not read BED file: \"%s\"\n", __func__, fn_bed);
+            goto fail;
+        }
+        bed_unify(hreg);
+        lreg = bed_reglist(hreg, ALL, &nreg);
+        if (!lreg || !nreg) {
+            fprintf(stderr, "[%s] Null or empty region list\n", __func__);
+            goto fail;
+        }
+    } else if (reg) {
         rtrans = rtrans_build(n, sam_hdr_nref(hout), translation_tbl);
         if (!rtrans) goto mem_fail;
 
@@ -1109,55 +1160,69 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
             fprintf(stderr, "[%s] Badly formatted region or unknown reference name: \"%s\"\n", __func__, reg);
             goto fail;
         }
+
+    }
+
+    if (reg || fn_bed) {
+        hts_idx_t *reg_idx = NULL;
         for (i = 0; i < n; ++i) {
-            hts_idx_t *idx = NULL;
-            // If index filename has not been specfied, look in BAM folder
+
+            // If index filename has not been specified, look in the BAM folder
             if (fn_idx != NULL) {
-                idx = sam_index_load2(fp[i], fn[i], fn_idx[i]);
+                reg_idx = sam_index_load2(fp[i], fn[i], fn_idx[i]);
             } else {
-                idx = sam_index_load(fp[i], fn[i]);
+                reg_idx = sam_index_load(fp[i], fn[i]);
             }
-            // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
-            int mapped_tid = rtrans[i*sam_hdr_nref(hout)+tid];
-            if (idx == NULL) {
-                fprintf(stderr, "[%s] failed to load index for %s.  Random alignment retrieval only works for indexed BAM or CRAM files.\n",
+            if (reg_idx == NULL) {
+                fprintf(stderr, "[%s] failed to load index for %s. Random alignment retrieval only works for indexed BAM or CRAM files.\n",
                         __func__, fn[i]);
+                free(rtrans);
+                rtrans = NULL;
                 goto fail;
             }
-            if (mapped_tid != INT32_MIN) {
-                iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
+
+            int mapped_tid = INT32_MIN;
+            if (fn_bed) {
+                hts_reglist_t *rl = duplicate_reglist(lreg, nreg);
+                iter[i] = sam_itr_regions(reg_idx, hdr[i], rl, nreg);
             } else {
-                iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
+                // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
+                mapped_tid = rtrans[i*sam_hdr_nref(hout)+tid];
+                if (mapped_tid != INT32_MIN) {
+                    iter[i] = sam_itr_queryi(reg_idx, mapped_tid, beg, end);
+                } else {
+                    iter[i] = sam_itr_queryi(reg_idx, HTS_IDX_NONE, 0, 0);
+                }
             }
-            hts_idx_destroy(idx);
+
             if (iter[i] == NULL) {
-                if (mapped_tid != INT32_MIN) {
-                    fprintf(stderr,
-                            "[%s] failed to get iterator over "
-                            "{%s, %d, %"PRIhts_pos", %"PRIhts_pos"}\n",
-                            __func__, fn[i], mapped_tid, beg, end);
+                if (fn_bed) {
+                    fprintf(stderr, "[%s] failed to get multi-region iterator "
+                            "{%s, %s}\n", __func__, fn[i], fn_bed);
                 } else {
-                    fprintf(stderr,
-                            "[%s] failed to get iterator over "
-                            "{%s, HTS_IDX_NONE, 0, 0}\n",
-                            __func__, fn[i]);
+                    if (mapped_tid != INT32_MIN) {
+                        fprintf(stderr,
+                                "[%s] failed to get iterator over "
+                                "{%s, %d, %"PRIhts_pos", %"PRIhts_pos"}\n",
+                                __func__, fn[i], mapped_tid, beg, end);
+                    } else {
+                        fprintf(stderr,
+                                "[%s] failed to get iterator over "
+                                "{%s, HTS_IDX_NONE, 0, 0}\n",
+                                __func__, fn[i]);
+                    }
                 }
+                hts_idx_destroy(reg_idx);
+                free(rtrans);
+                rtrans = NULL;
                 goto fail;
             }
+
+            hts_idx_destroy(reg_idx);
         }
+
         free(rtrans);
         rtrans = NULL;
-    } else {
-        for (i = 0; i < n; ++i) {
-            if (hdr[i] == NULL) {
-                iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
-                if (iter[i] == NULL) {
-                    fprintf(stderr, "[%s] failed to get iterator\n", __func__);
-                    goto fail;
-                }
-            }
-            else iter[i] = NULL;
-        }
     }
 
     // Load the first read from each file into the heap
@@ -1279,6 +1344,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     sam_hdr_destroy(hin);
     sam_hdr_destroy(hout);
     free_merged_header(merged_hdr);
+    hts_reglist_free(lreg, nreg);
+    bed_destroy(hreg);
     free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
     if (sam_close(fpout) < 0) {
         print_error(cmd, "error closing output file");
@@ -1307,6 +1374,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     free(RG);
     free(translation_tbl);
     free(hdr);
+    hts_reglist_free(lreg, nreg);
+    bed_destroy(hreg);
     free(iter);
     free(heap);
     free(fp);
@@ -1322,13 +1391,14 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
     strcpy(mode, "wb");
     if (flag & MERGE_UNCOMP) strcat(mode, "0");
     else if (flag & MERGE_LEVEL1) strcat(mode, "1");
-    return bam_merge_core2(by_qname, NULL, out, mode, headers, n, fn, NULL, flag, reg, 0, "merge", NULL, NULL, 0, NULL, 1);
+    return bam_merge_core2(by_qname, NULL, out, mode, headers, n, fn, NULL, NULL, flag, reg, 0, "merge", NULL, NULL, 0, NULL, 1);
 }
 
 static void merge_usage(FILE *to)
 {
     fprintf(to,
-"Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
+"Usage: samtools merge [options] -o <out.bam> [options] <in1.bam> ... <inN.bam>\n"
+"   or: samtools merge [options] <out.bam> <in1.bam> ... <inN.bam>\n"
 "\n"
 "Options:\n"
 "  -n         Input files are sorted by read name\n"
@@ -1336,6 +1406,7 @@ static void merge_usage(FILE *to)
 "  -r         Attach RG tag (inferred from file names)\n"
 "  -u         Uncompressed BAM output\n"
 "  -f         Overwrite the output BAM if exist\n"
+"  -o FILE    Specify output file via option instead of <out.bam> argument\n"
 "  -1         Compress level 1\n"
 "  -l INT     Compression level, from 0 to 9 [-1]\n"
 "  -R STR     Merge file in the specified region STR [all]\n"
@@ -1345,6 +1416,7 @@ static void merge_usage(FILE *to)
 "  -s VALUE   Override random seed\n"
 "  -b FILE    List of input BAM filenames, one per line [null]\n"
 "  -X         Use customized index files\n"
+"  -L FILE    Specify a BED file for multiple region filtering [null]\n"
 "  --no-PG    do not add a PG line\n");
     sam_global_opt_help(to, "-.O..@..");
 }
@@ -1353,10 +1425,10 @@ int bam_merge(int argc, char *argv[])
 {
     int c, is_by_qname = 0, flag = 0, ret = 0, level = -1, has_index_file = 0;
     char *fn_headers = NULL, *reg = NULL, mode[12];
-    char *sort_tag = NULL, *arg_list = NULL;
+    char *sort_tag = NULL, *fnout = NULL, *arg_list = NULL;
     long random_seed = (long)time(NULL);
     char** fn = NULL;
-    char** fn_idx = NULL;
+    char** fn_idx = NULL, *fn_bed = NULL;
     int fn_size = 0, no_pg = 0;
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
@@ -1372,12 +1444,13 @@ int bam_merge(int argc, char *argv[])
         return 0;
     }
 
-    while ((c = getopt_long(argc, argv, "h:nru1R:f@:l:cps:b:O:t:X", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h:nru1R:o:f@:l:cps:b:O:t:XL:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'r': flag |= MERGE_RG; break;
         case 'f': flag |= MERGE_FORCE; break;
         case 'h': fn_headers = optarg; break;
         case 'n': is_by_qname = 1; break;
+        case 'o': fnout = optarg; break;
         case 't': sort_tag = optarg; break;
         case '1': flag |= MERGE_LEVEL1; level = 1; break;
         case 'u': flag |= MERGE_UNCOMP; level = 0; break;
@@ -1387,6 +1460,7 @@ int bam_merge(int argc, char *argv[])
         case 'p': flag |= MERGE_COMBINE_PG; break;
         case 's': random_seed = atol(optarg); break;
         case 'X': has_index_file = 1; break; // -X flag for index filename
+        case 'L': fn_bed = optarg; break;
         case 'b': {
             // load the list of files to read
             if (has_index_file) {
@@ -1415,7 +1489,12 @@ int bam_merge(int argc, char *argv[])
         case '?': merge_usage(stderr); return 1;
         }
     }
-    if ( argc - optind < 1 ) {
+
+    if (fnout == NULL && argc - optind >= 1) {
+        fnout = argv[optind];
+        optind++;
+    }
+    if (fnout == NULL) {
         print_error("merge", "You must at least specify the output file");
         merge_usage(stderr);
         return 1;
@@ -1426,50 +1505,57 @@ int bam_merge(int argc, char *argv[])
         return 1;
     }
 
-    srand48(random_seed);
-    if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
-        FILE *fp = fopen(argv[optind], "rb");
-        if (fp != NULL) {
-            fclose(fp);
-            fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
-            return 1;
+    hts_srand48(random_seed);
+    if (!(flag & MERGE_FORCE) && strcmp(fnout, "-") != 0) {
+        struct stat sbuf;
+        if (stat(fnout, &sbuf) == 0 && S_ISREG(sbuf.st_mode)) {
+            fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, fnout);
+            ret = 1;
+            goto end;
         }
     }
 
     int nargcfiles = 0;
     if (has_index_file) { // Calculate # of input BAM files
-        if ((argc - optind - 1) % 2 != 0) {
+        if ((argc - optind) % 2 != 0) {
             fprintf(stderr, "Odd number of filenames detected! Each BAM file should have an index file\n");
-            return 1;
+            ret = 1;
+            goto end;
         }
-        nargcfiles = (argc - optind - 1) / 2;
+        nargcfiles = (argc - optind) / 2;
     } else {
-        nargcfiles = argc - optind - 1;
+        nargcfiles = argc - optind;
     }
 
     if (nargcfiles > 0) {
         // Add argc files to end of array
         fn = realloc(fn, (fn_size+nargcfiles) * sizeof(char*));
         if (fn == NULL) { ret = 1; goto end; }
-        memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
+        memcpy(fn+fn_size, argv + optind, nargcfiles * sizeof(char*));
 
         if(has_index_file) {
             fn_idx = realloc(fn_idx, nargcfiles * sizeof(char*));
             if (fn_idx == NULL) { ret = 1; goto end; }
-            memcpy(fn_idx+fn_size, argv + nargcfiles + (optind+1), nargcfiles * sizeof(char*));
+            memcpy(fn_idx+fn_size, argv + nargcfiles + optind, nargcfiles * sizeof(char*));
         }
     }
     if (fn_size+nargcfiles < 1) {
         print_error("merge", "You must specify at least one (and usually two or more) input files");
         merge_usage(stderr);
-        free(fn_idx);
-        return 1;
+        ret = 1;
+        goto end;
+    }
+
+    if (reg && fn_bed) {
+        print_error("merge", "You must specify either a BED file or a region");
+        ret = 1;
+        goto end;
     }
     strcpy(mode, "wb");
-    sam_open_mode(mode+1, argv[optind], NULL);
+    sam_open_mode(mode+1, fnout, NULL);
     if (level >= 0) sprintf(strchr(mode, '\0'), "%d", level < 9? level : 9);
-    if (bam_merge_core2(is_by_qname, sort_tag, argv[optind], mode, fn_headers,
-                        fn_size+nargcfiles, fn, fn_idx, flag, reg, ga.nthreads,
+    if (bam_merge_core2(is_by_qname, sort_tag, fnout, mode, fn_headers,
+                        fn_size+nargcfiles, fn, fn_idx, fn_bed, flag, reg, ga.nthreads,
                         "merge", &ga.in, &ga.out, ga.write_index, arg_list, no_pg) < 0)
         ret = 1;
 
@@ -1631,6 +1717,12 @@ static int bam_merge_simple(int by_qname, char *sort_tag, const char *out,
     ks_heapmake(heap, heap_size, heap);
     while (heap->pos != HEAP_EMPTY) {
         bam1_t *b = heap->entry.bam_record;
+        if (g_is_by_minhash && b->core.tid == -1) {
+            // Remove the cached minhash value
+            b->core.pos = -1;
+            b->core.mpos = -1;
+            b->core.isize = 0;
+        }
         if (sam_write1(fpout, hout, b) < 0) {
             print_error_errno(cmd, "failed writing to \"%s\"", out);
             goto fail;
@@ -1789,12 +1881,45 @@ static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b)
     }
 }
 
+// Sort by minimiser (stored in bam1_tag.u.pos).
+// If equal, sort by position.
+//
+// The 64-bit sort key is split over the bam pos and isize fields.
+// This permits it to survive writing to temporary file and coming back.
+static inline int bam1_cmp_by_minhash(const bam1_tag a, const bam1_tag b)
+{
+    const bam1_t *A = a.bam_record;
+    const bam1_t *B = b.bam_record;
+
+    if (!A) return 1;
+    if (!B) return 0;
+
+    if (A->core.tid != -1 || B->core.tid != -1)
+        return bam1_cmp_core(a,b);
+
+    const uint64_t m_a = (((uint64_t)A->core.pos)<<32)|(uint32_t)A->core.mpos;
+    const uint64_t m_b = (((uint64_t)B->core.pos)<<32)|(uint32_t)B->core.mpos;
+
+    if (m_a < m_b) // by hash
+        return -1;
+    else if (m_a > m_b)
+        return 1;
+    else if (A->core.isize < B->core.isize) // by hash location in seq
+        return -1;
+    else if (A->core.isize > B->core.isize)
+        return 1;
+    else
+        return bam1_cmp_core(a,b);
+}
+
 // Function to compare reads and determine which one is < the other
 // Handle sort-by-pos, sort-by-name, or sort-by-tag
 static inline int bam1_lt(const bam1_tag a, const bam1_tag b)
 {
     if (g_is_by_tag) {
         return bam1_cmp_by_tag(a, b) < 0;
+    } else if (g_is_by_minhash) {
+        return bam1_cmp_by_minhash(a, b) < 0;
     } else {
         return bam1_cmp_core(a,b) < 0;
     }
@@ -1818,7 +1943,7 @@ typedef struct {
 //        -1 for failure
 static int write_buffer(const char *fn, const char *mode, size_t l, bam1_tag *buf,
                         const sam_hdr_t *h, int n_threads, const htsFormat *fmt,
-                        char *arg_list, int no_pg, int write_index)
+                        int clear_minhash, char *arg_list, int no_pg, int write_index)
 {
     size_t i;
     samFile* fp;
@@ -1826,22 +1951,27 @@ static int write_buffer(const char *fn, const char *mode, size_t l, bam1_tag *bu
 
     fp = sam_open_format(fn, mode, fmt);
     if (fp == NULL) return -1;
-    if (!no_pg && sam_hdr_add_pg((sam_hdr_t *)h, "samtools",
-                                 "VN", samtools_version(),
+    if (!no_pg && sam_hdr_add_pg((sam_hdr_t *)h, "samtools", "VN", samtools_version(),
                                  arg_list ? "CL": NULL,
                                  arg_list ? arg_list : NULL,
                                  NULL)) {
         goto fail;
     }
-    if (sam_hdr_write(fp, (sam_hdr_t *)h) != 0) goto fail;
+    if (sam_hdr_write(fp, h) != 0) goto fail;
 
-    if (write_index) {
+    if (write_index)
         if (!(out_idx_fn = auto_index(fp, fn, (sam_hdr_t *)h))) goto fail;
-    }
 
     if (n_threads > 1) hts_set_threads(fp, n_threads);
     for (i = 0; i < l; ++i) {
-        if (sam_write1(fp, (sam_hdr_t *)h, buf[i].bam_record) < 0) goto fail;
+        bam1_t *b = buf[i].bam_record;
+        if (clear_minhash && b->core.tid == -1) {
+            // Remove the cached minhash value
+            b->core.pos = -1;
+            b->core.mpos = -1;
+            b->core.isize = 0;
+        }
+        if (sam_write1(fp, h, b) < 0) goto fail;
     }
 
     if (write_index) {
@@ -1944,18 +2074,206 @@ err:
     return ret;
 }
 
+/*
+ * Computes the minhash of a sequence using both forward and reverse strands.
+ *
+ * This is used as a sort key for unmapped data, to collate like sequences
+ * together and to improve compression ratio.
+ *
+ * The minhash is returned and *pos filled out with location of this hash
+ * key in the sequence if pos != NULL.
+ */
+static uint64_t minhash(bam1_t *b, int kmer, int *pos, int *rev) {
+    uint64_t hashf = 0, minhashf = UINT64_MAX;
+    uint64_t hashr = 0, minhashr = UINT64_MAX;
+    int minhashpf = 0, minhashpr = 0, i;
+    uint64_t mask = (1L<<(2*kmer))-1;
+    unsigned char *seq = bam_get_seq(b);
+    int len = b->core.l_qseq;
+
+    // Lookup tables for bam_seqi to 0123 fwd/rev hashes
+    // =ACM GRSV TWYH KDBN
+#define X 0
+    unsigned char L[16] = {
+        X,0,1,X,  2,X,X,X,  3,X,X,X,  X,X,X,X,
+    };
+    uint64_t R[16] = {
+        X,3,2,X,  1,X,X,X,  0,X,X,X,  X,X,X,X,
+    };
+    for (i = 0; i < 16; i++)
+        R[i] <<= 2*(kmer-1);
+
+    // Punt homopolymers somewhere central in the hash space
+#define XOR (0xdead7878beef7878 & mask)
+
+    // Initialise hash keys
+    for (i = 0; i < kmer-1 && i < len; i++) {
+        int base = bam_seqi(seq, i);
+        hashf = (hashf<<2) | L[base];
+        hashr = (hashr>>2) | R[base];
+    }
+
+    // Loop to find minimum
+    for (; i < len; i++) {
+        int base = bam_seqi(seq, i);
+
+        hashf = ((hashf<<2) | L[base]) & mask;
+        hashr =  (hashr>>2) | R[base];
+
+        if (minhashf > (hashf^XOR))
+            minhashf = (hashf^XOR), minhashpf = i;
+        if (minhashr > (hashr^XOR))
+            minhashr = (hashr^XOR), minhashpr = len-i+kmer-2;
+
+    }
+
+    if (minhashf <= minhashr) {
+        if (rev) *rev = 0;
+        if (pos) *pos = minhashpf;
+        return minhashf;
+    } else {
+        if (rev) *rev = 1;
+        if (pos) *pos = minhashpr;
+        return minhashr;
+    }
+}
+
+//--- Start of candidates to punt to htslib
+/*!
+ * @abstract
+ * Extracts the sequence (in current alignment orientation) from
+ * a bam record and places it in buf, which is nul terminated.
+ *
+ * @param b     The bam structure
+ * @param buf   A buffer at least b->core.l_qseq+1 bytes long
+ */
+static void bam_to_seq(bam1_t *b, char *buf) {
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    for (i = 0; i < b->core.l_qseq; i++)
+        buf[i] = seq_nt16_str[bam_seqi(seq, i)];
+    buf[i] = 0;
+}
+
+/*!
+ * @abstract
+ * Writes a new sequence, of length b->core.l_qseq, to a BAM record.
+ *
+ * If a sequence of a new length is required the caller must first make
+ * room for it by updating the bam1_t struct.
+ *
+ * @param b     The bam structure
+ * @param buf   A buffer at least b->core.l_qseq bytes long
+ */
+static void seq_to_bam(bam1_t *b, char *buf) {
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    for (i = 0; i < b->core.l_qseq; i++)
+        bam_set_seqi(seq, i, seq_nt16_table[(unsigned char)buf[i]]);
+}
+
+/*!
+ * @abstract Reverse complements a BAM record.
+ *
+ * It's possible to do this inline, but complex due to the 4-bit sequence
+ * encoding.  For now I take the dumb approach.
+ *
+ * @param b  Pointer to a BAM alignment
+ *
+ * @return   0 on success, -1 on failure (ENOMEM)
+ */
+static int reverse_complement(bam1_t *b) {
+    static char comp[256] = {
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//00
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//10
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//20
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//30
+
+       //    *   *   *    *   E   F   *    *   I   J   *    L   *   *   O
+        '@','T','V','G', 'H','E','F','C', 'D','I','H','M', 'L','K','N','O',//40
+       //P   Q   *   *    *   *   *   *    X   Y   Z   [    \   ]   ^   _
+        'P','Q','Y','S', 'A','A','B','W', 'X','Y','Z','[','\\','[','^','_',//50
+       //`   *   *   *    *   E   F   *    *   I   J   *    L   *   *   O
+        '`','t','v','g', 'h','e','f','c', 'd','i','j','m', 'l','k','n','o',//60
+       //P   Q   *   *    *   *   *   *    X   Y   Z   {    |   }   ~   DEL
+        'p','q','y','s', 'a','a','b','w', 'x','y','z','{', '|','}','~',127,//70
+
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//80
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//90
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//A0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//B0
+
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//C0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//D0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//E0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//F0
+    };
+    char seq_[10000], *seq = seq_;
+    uint8_t *qual = bam_get_qual(b);
+    int i, j;
+
+    if (b->core.l_qseq >= 10000)
+        if (!(seq = malloc(b->core.l_qseq+1)))
+            return -1;
+
+    bam_to_seq(b, seq);
+
+    for (i = 0, j = b->core.l_qseq-1; i < j; i++, j--) {
+        unsigned char tmp = seq[i];
+        seq[i] = comp[(unsigned char)seq[j]];
+        seq[j] = comp[tmp];
+        tmp = qual[i];
+        qual[i] = qual[j];
+        qual[j] = tmp;
+    }
+    if (i ==j)
+        seq[i] = comp[(unsigned char)seq[i]];
+
+    seq_to_bam(b, seq);
+
+    if (seq != seq_)
+        free(seq);
+
+    b->core.flag ^= 0x10;
+
+    return 0;
+}
+//--- End of candidates to punt to htslib
+
 static void *worker(void *data)
 {
     worker_t *w = (worker_t*)data;
     char *name;
     w->error = 0;
 
-    if (!g_is_by_qname && !g_is_by_tag) {
+    if (!g_is_by_qname && !g_is_by_tag && !g_is_by_minhash) {
         if (ks_radixsort(w->buf_len, w->buf, w->h) < 0) {
             w->error = errno;
             return NULL;
         }
     } else {
+        if (g_is_by_minhash) {
+            int i;
+            for (i = 0; i < w->buf_len; i++) {
+                bam1_t *b = w->buf[i].bam_record;
+                if (b->core.tid != -1)
+                    continue;
+
+                int pos = 0, rev = 0;
+                uint64_t mh = minhash(b, g_is_by_minhash, &pos, &rev);
+                if (rev)
+                    reverse_complement(b);
+
+                // Store 64-bit hash in unmapped pos and mpos fields.
+                // The position of hash is in isize, which we use for
+                // resolving ties when sorting by hash key.
+                // These are unused for completely unmapped data and
+                // will be reset during final output.
+                b->core.pos = mh>>31;
+                b->core.mpos = mh&0x7fffffff;
+                b->core.isize = 65535-pos >=0 ? 65535-pos : 0;
+            }
+        }
         ks_mergesort(sort, w->buf_len, w->buf, 0);
     }
 
@@ -1983,10 +2301,10 @@ static void *worker(void *data)
             return 0;
         }
 
-        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt, NULL, 1, 0) < 0)
+        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt, 0, NULL, 1, 0) < 0)
             w->error = errno;
     } else {
-        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL, NULL, 1, 0) < 0)
+        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL, 0, NULL, 1, 0) < 0)
             w->error = errno;
     }
 
@@ -2043,6 +2361,7 @@ static int sort_blocks(int n_files, size_t k, bam1_tag *buf, const char *prefix,
     return n_files + n_threads;
 }
 
+
 /*!
   @abstract Sort an unsorted BAM file based on the chromosome order
   and the leftmost position of an alignment
@@ -2067,7 +2386,7 @@ static int sort_blocks(int n_files, size_t k, bam1_tag *buf, const char *prefix,
  */
 int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const char *prefix,
                       const char *fnout, const char *modeout,
-                      size_t _max_mem, int n_threads,
+                      size_t _max_mem, int by_minimiser, int n_threads,
                       const htsFormat *in_fmt, const htsFormat *out_fmt,
                       char *arg_list, int no_pg, int write_index)
 {
@@ -2090,6 +2409,7 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
 
     if (n_threads < 2) n_threads = 1;
     g_is_by_qname = is_by_qname;
+    g_is_by_minhash = by_minimiser;
     if (sort_by_tag) {
         g_is_by_tag = 1;
         g_sort_tag[0] = sort_by_tag[0];
@@ -2116,11 +2436,23 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
     else
         new_so = "coordinate";
 
-    if ((-1 == sam_hdr_update_hd(header, "SO", new_so))
-     && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION, "SO", new_so, NULL))
-     ) {
-        print_error("sort", "failed to change sort order header to '%s'\n", new_so);
-        goto err;
+    if (by_minimiser) {
+        const char *new_ss = "coordinate:minhash";
+        if ((-1 == sam_hdr_update_hd(header, "SO", new_so, "SS", new_ss))
+            && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION,
+                                       "SO", new_so, "SS", new_ss, NULL))
+            ) {
+            print_error("sort", "failed to change sort order header to 'SO:%s SS:%s'\n",
+                        new_so, new_ss);
+            goto err;
+        }
+    } else {
+        if ((-1 == sam_hdr_update_hd(header, "SO", new_so))
+            && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION, "SO", new_so, NULL))
+            ) {
+            print_error("sort", "failed to change sort order header to 'SO:%s'\n", new_so);
+            goto err;
+        }
     }
 
     if (-1 == sam_hdr_remove_tag_hd(header, "GO")) {
@@ -2207,7 +2539,8 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
 
     // write the final output
     if (n_files == 0 && num_in_mem < 2) { // a single block
-        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt, arg_list, no_pg, write_index) != 0) {
+        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt,
+                         g_is_by_minhash, arg_list, no_pg, write_index) != 0) {
             print_error_errno("sort", "failed to create \"%s\"", fnout);
             goto err;
         }
@@ -2261,7 +2594,7 @@ int bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t ma
     char *fnout = calloc(strlen(prefix) + 4 + 1, 1);
     if (!fnout) return -1;
     sprintf(fnout, "%s.bam", prefix);
-    ret = bam_sort_core_ext(is_by_qname, NULL, fn, prefix, fnout, "wb", max_mem, 0, NULL, NULL, NULL, 1, 0);
+    ret = bam_sort_core_ext(is_by_qname, NULL, fn, prefix, fnout, "wb", max_mem, 0, 0, NULL, NULL, NULL, 1, 0);
     free(fnout);
     return ret;
 }
@@ -2272,13 +2605,16 @@ static void sort_usage(FILE *fp)
 "Usage: samtools sort [options...] [in.bam]\n"
 "Options:\n"
 "  -l INT     Set compression level, from 0 (uncompressed) to 9 (best)\n"
+"  -u         Output uncompressed data (equivalent to -l 0)\n"
 "  -m INT     Set maximum memory per thread; suffix K/M/G recognized [768M]\n"
-"  -n         Sort by read name\n"
+"  -M         Use minimiser for clustering unaligned/unplaced reads\n"
+"  -K INT     Kmer size to use for minimiser [20]\n"
+"  -n         Sort by read name (not compatible with samtools index command)\n"
 "  -t TAG     Sort by value of TAG. Uses position as secondary index (or read name if -n is set)\n"
 "  -o FILE    Write final output to FILE rather than standard output\n"
 "  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n"
 "  --no-PG    do not add a PG line\n");
-    sam_global_opt_help(fp, "-.O..@-.");
+    sam_global_opt_help(fp, "-.O..@..");
 }
 
 static void complain_about_memory_setting(size_t max_mem) {
@@ -2302,6 +2638,7 @@ int bam_sort(int argc, char *argv[])
 {
     size_t max_mem = SORT_DEFAULT_MEGS_PER_THREAD << 20;
     int c, nargs, is_by_qname = 0, ret, o_seen = 0, level = -1, no_pg = 0;
+    int by_minimiser = 0, minimiser_kmer = 20;
     char* sort_tag = NULL, *arg_list = NULL;
     char *fnout = "-", modeout[12];
     kstring_t tmpprefix = { 0, 0, NULL };
@@ -2315,7 +2652,7 @@ int bam_sort(int argc, char *argv[])
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "l:m:no:O:T:@:t:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "l:m:no:O:T:@:t:MK:u", lopts, NULL)) >= 0) {
         switch (c) {
         case 'o': fnout = optarg; o_seen = 1; break;
         case 'n': is_by_qname = 1; break;
@@ -2330,7 +2667,16 @@ int bam_sort(int argc, char *argv[])
             }
         case 'T': kputs(optarg, &tmpprefix); break;
         case 'l': level = atoi(optarg); break;
-        case 1: no_pg = 1; break;
+        case 'u': level = 0; break;
+        case   1: no_pg = 1; break;
+        case 'M': by_minimiser = 1; break;
+        case 'K':
+            minimiser_kmer = atoi(optarg);
+            if (minimiser_kmer < 1)
+                minimiser_kmer = 1;
+            else if (minimiser_kmer > 31)
+                minimiser_kmer = 31;
+            break;
 
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                   /* else fall-through */
@@ -2385,7 +2731,8 @@ int bam_sort(int argc, char *argv[])
     }
 
     ret = bam_sort_core_ext(is_by_qname, sort_tag, (nargs > 0)? argv[optind] : "-",
-                            tmpprefix.s, fnout, modeout, max_mem, ga.nthreads,
+                            tmpprefix.s, fnout, modeout, max_mem,
+                            by_minimiser * minimiser_kmer, ga.nthreads,
                             &ga.in, &ga.out, arg_list, no_pg, ga.write_index);
     if (ret >= 0)
         ret = EXIT_SUCCESS;
diff --git a/samtools/bam_sort.c.pysam.c b/samtools/bam_sort.c.pysam.c
index 3093960..6cbf66a 100644
--- a/samtools/bam_sort.c.pysam.c
+++ b/samtools/bam_sort.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_sort.c -- sorting and merging.
 
-    Copyright (C) 2008-2019 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
     Portions copyright (C) 2009-2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -35,11 +35,13 @@ DEALINGS IN THE SOFTWARE.  */
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <assert.h>
 #include <pthread.h>
+#include <inttypes.h>
 #include "htslib/ksort.h"
 #include "htslib/hts_os.h"
 #include "htslib/khash.h"
@@ -49,6 +51,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/hts_endian.h"
 #include "sam_opts.h"
 #include "samtools.h"
+#include "bedidx.h"
 
 
 // Struct which contains the a record, and the pointer to the sort tag (if any) or
@@ -99,6 +102,7 @@ KLIST_INIT(hdrln, char*, hdrln_free_char)
 
 static int g_is_by_qname = 0;
 static int g_is_by_tag = 0;
+static int g_is_by_minhash = 0;
 static char g_sort_tag[2] = {0,0};
 
 static int strnum_cmp(const char *_a, const char *_b)
@@ -135,8 +139,11 @@ typedef struct {
 } heap1_t;
 
 static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b);
+static inline int bam1_cmp_by_minhash(const bam1_tag a, const bam1_tag b);
 
 // Function to compare reads in the heap and determine which one is < the other
+// Note, unlike the bam1_cmp_by_X functions which return <0, 0, >0 this
+// is strictly 0 or 1 only.
 static inline int heap_lt(const heap1_t a, const heap1_t b)
 {
     if (!a.entry.bam_record)
@@ -148,6 +155,9 @@ static inline int heap_lt(const heap1_t a, const heap1_t b)
         int t;
         t = bam1_cmp_by_tag(a.entry, b.entry);
         if (t != 0) return t > 0;
+    } else if (g_is_by_minhash) {
+        int t = bam1_cmp_by_minhash(a.entry, b.entry);
+        if (t != 0) return t > 0;
     } else if (g_is_by_qname) {
         int t, fa, fb;
         t = strnum_cmp(bam_get_qname(a.entry.bam_record), bam_get_qname(b.entry.bam_record));
@@ -515,7 +525,8 @@ static klist_t(hdrln) * trans_rg_pg(bool is_rg, sam_hdr_t *translate,
                 id_len = id_end - idp;
 
                 if (id_len < transformed_id.l) {
-                    if (ks_resize(&new_hdr_line, new_hdr_line.l + transformed_id.l - id_len))
+                    if (ks_resize(&new_hdr_line, new_hdr_line.l
+                                  + transformed_id.l - id_len + 1/*nul*/))
                         goto fail;
                 }
                 if (id_len != transformed_id.l) {
@@ -716,6 +727,7 @@ static int trans_tbl_init(merged_header_t* merged_hdr, sam_hdr_t* translate,
     // Get translated header lines and fill in map for @PG records
     pg_list = trans_rg_pg(false, translate, merge_pg, merged_hdr->pg_ids,
                           tbl->pg_trans, NULL);
+    if (!pg_list) goto fail;
 
     // Fix-up PG: tags in the new @RG records and add to output
     if (finish_rg_pg(true, rg_list, tbl->pg_trans, &merged_hdr->out_rg))
@@ -913,10 +925,38 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
 #define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
 #define MERGE_FIRST_CO   64 // Use only first file's @CO headers (sort cmd only)
 
+
+static hts_reglist_t *duplicate_reglist(const hts_reglist_t *rl, int rn) {
+    if (!rl)
+        return NULL;
+
+    hts_reglist_t *new_rl = calloc(rn, sizeof(hts_reglist_t));
+    if (!new_rl)
+        return NULL;
+
+    int i;
+    for (i=0; i < rn; i++) {
+        new_rl[i].tid     = rl[i].tid;
+        new_rl[i].count   = rl[i].count;
+        new_rl[i].min_beg = rl[i].min_beg;
+        new_rl[i].max_end = rl[i].max_end;
+
+        new_rl[i].reg = rl[i].reg;
+        new_rl[i].intervals = malloc(new_rl[i].count * sizeof(hts_pair_pos_t));
+        if (!new_rl[i].intervals) {
+            hts_reglist_free(new_rl, i);
+            return NULL;
+        }
+        memcpy(new_rl[i].intervals, rl[i].intervals, new_rl[i].count * sizeof(hts_pair_pos_t));
+    }
+
+    return new_rl;
+}
+
 /*
  * How merging is handled
  *
- * If a hheader is defined use we will use that as our output header
+ * If a header is defined use we will use that as our output header
  * otherwise we use the first header from the first input file.
  *
  * Now go through each file and create a translation table for that file for:
@@ -959,9 +999,9 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
  */
 int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *mode,
                     const char *headers, int n, char * const *fn, char * const *fn_idx,
-                    int flag, const char *reg, int n_threads, const char *cmd,
-                    const htsFormat *in_fmt, const htsFormat *out_fmt, int write_index,
-                    char *arg_list, int no_pg)
+                    const char *fn_bed, int flag, const char *reg, int n_threads,
+                    const char *cmd, const htsFormat *in_fmt, const htsFormat *out_fmt,
+                    int write_index, char *arg_list, int no_pg)
 {
     samFile *fpout, **fp = NULL;
     heap1_t *heap = NULL;
@@ -975,6 +1015,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     trans_tbl_t *translation_tbl = NULL;
     int *rtrans = NULL;
     char *out_idx_fn = NULL;
+    void *hreg = NULL;
+    hts_reglist_t *lreg = NULL;
     merged_header_t *merged_hdr = init_merged_header();
     if (!merged_hdr) return -1;
 
@@ -1032,7 +1074,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     }
 
     if (hin) {
-        // Popluate merged_hdr from the pre-prepared header
+        // Populate merged_hdr from the pre-prepared header
         trans_tbl_t dummy;
         int res;
         res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
@@ -1061,10 +1103,7 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
                            RG[i]))
             return -1; // FIXME: memory leak
 
-        // TODO sam_itr_next() doesn't yet work for SAM files,
-        // so for those keep the headers around for use with sam_read1()
-        if (hts_get_format(fp[i])->format == sam) hdr[i] = hin;
-        else { sam_hdr_destroy(hin); hdr[i] = NULL; }
+        hdr[i] = hin;
 
         if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
             fprintf(samtools_stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
@@ -1100,10 +1139,22 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     if (!hout) return -1;  // FIXME: memory leak
 
     // If we're only merging a specified region move our iters to start at that point
-    if (reg) {
-        int tid;
-        hts_pos_t beg, end;
+    int tid, nreg;
+    hts_pos_t beg, end;
 
+    if (fn_bed) {
+        hreg = bed_read(fn_bed);
+        if (!hreg) {
+            fprintf(samtools_stderr, "[%s] Could not read BED file: \"%s\"\n", __func__, fn_bed);
+            goto fail;
+        }
+        bed_unify(hreg);
+        lreg = bed_reglist(hreg, ALL, &nreg);
+        if (!lreg || !nreg) {
+            fprintf(samtools_stderr, "[%s] Null or empty region list\n", __func__);
+            goto fail;
+        }
+    } else if (reg) {
         rtrans = rtrans_build(n, sam_hdr_nref(hout), translation_tbl);
         if (!rtrans) goto mem_fail;
 
@@ -1111,55 +1162,69 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
             fprintf(samtools_stderr, "[%s] Badly formatted region or unknown reference name: \"%s\"\n", __func__, reg);
             goto fail;
         }
+
+    }
+
+    if (reg || fn_bed) {
+        hts_idx_t *reg_idx = NULL;
         for (i = 0; i < n; ++i) {
-            hts_idx_t *idx = NULL;
-            // If index filename has not been specfied, look in BAM folder
+
+            // If index filename has not been specified, look in the BAM folder
             if (fn_idx != NULL) {
-                idx = sam_index_load2(fp[i], fn[i], fn_idx[i]);
+                reg_idx = sam_index_load2(fp[i], fn[i], fn_idx[i]);
             } else {
-                idx = sam_index_load(fp[i], fn[i]);
+                reg_idx = sam_index_load(fp[i], fn[i]);
             }
-            // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
-            int mapped_tid = rtrans[i*sam_hdr_nref(hout)+tid];
-            if (idx == NULL) {
-                fprintf(samtools_stderr, "[%s] failed to load index for %s.  Random alignment retrieval only works for indexed BAM or CRAM files.\n",
+            if (reg_idx == NULL) {
+                fprintf(samtools_stderr, "[%s] failed to load index for %s. Random alignment retrieval only works for indexed BAM or CRAM files.\n",
                         __func__, fn[i]);
+                free(rtrans);
+                rtrans = NULL;
                 goto fail;
             }
-            if (mapped_tid != INT32_MIN) {
-                iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
+
+            int mapped_tid = INT32_MIN;
+            if (fn_bed) {
+                hts_reglist_t *rl = duplicate_reglist(lreg, nreg);
+                iter[i] = sam_itr_regions(reg_idx, hdr[i], rl, nreg);
             } else {
-                iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
+                // (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
+                mapped_tid = rtrans[i*sam_hdr_nref(hout)+tid];
+                if (mapped_tid != INT32_MIN) {
+                    iter[i] = sam_itr_queryi(reg_idx, mapped_tid, beg, end);
+                } else {
+                    iter[i] = sam_itr_queryi(reg_idx, HTS_IDX_NONE, 0, 0);
+                }
             }
-            hts_idx_destroy(idx);
+
             if (iter[i] == NULL) {
-                if (mapped_tid != INT32_MIN) {
-                    fprintf(samtools_stderr,
-                            "[%s] failed to get iterator over "
-                            "{%s, %d, %"PRIhts_pos", %"PRIhts_pos"}\n",
-                            __func__, fn[i], mapped_tid, beg, end);
+                if (fn_bed) {
+                    fprintf(samtools_stderr, "[%s] failed to get multi-region iterator "
+                            "{%s, %s}\n", __func__, fn[i], fn_bed);
                 } else {
-                    fprintf(samtools_stderr,
-                            "[%s] failed to get iterator over "
-                            "{%s, HTS_IDX_NONE, 0, 0}\n",
-                            __func__, fn[i]);
+                    if (mapped_tid != INT32_MIN) {
+                        fprintf(samtools_stderr,
+                                "[%s] failed to get iterator over "
+                                "{%s, %d, %"PRIhts_pos", %"PRIhts_pos"}\n",
+                                __func__, fn[i], mapped_tid, beg, end);
+                    } else {
+                        fprintf(samtools_stderr,
+                                "[%s] failed to get iterator over "
+                                "{%s, HTS_IDX_NONE, 0, 0}\n",
+                                __func__, fn[i]);
+                    }
                 }
+                hts_idx_destroy(reg_idx);
+                free(rtrans);
+                rtrans = NULL;
                 goto fail;
             }
+
+            hts_idx_destroy(reg_idx);
         }
+
         free(rtrans);
         rtrans = NULL;
-    } else {
-        for (i = 0; i < n; ++i) {
-            if (hdr[i] == NULL) {
-                iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
-                if (iter[i] == NULL) {
-                    fprintf(samtools_stderr, "[%s] failed to get iterator\n", __func__);
-                    goto fail;
-                }
-            }
-            else iter[i] = NULL;
-        }
     }
 
     // Load the first read from each file into the heap
@@ -1281,6 +1346,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     sam_hdr_destroy(hin);
     sam_hdr_destroy(hout);
     free_merged_header(merged_hdr);
+    hts_reglist_free(lreg, nreg);
+    bed_destroy(hreg);
     free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
     if (sam_close(fpout) < 0) {
         print_error(cmd, "error closing output file");
@@ -1309,6 +1376,8 @@ int bam_merge_core2(int by_qname, char* sort_tag, const char *out, const char *m
     free(RG);
     free(translation_tbl);
     free(hdr);
+    hts_reglist_free(lreg, nreg);
+    bed_destroy(hreg);
     free(iter);
     free(heap);
     free(fp);
@@ -1324,13 +1393,14 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
     strcpy(mode, "wb");
     if (flag & MERGE_UNCOMP) strcat(mode, "0");
     else if (flag & MERGE_LEVEL1) strcat(mode, "1");
-    return bam_merge_core2(by_qname, NULL, out, mode, headers, n, fn, NULL, flag, reg, 0, "merge", NULL, NULL, 0, NULL, 1);
+    return bam_merge_core2(by_qname, NULL, out, mode, headers, n, fn, NULL, NULL, flag, reg, 0, "merge", NULL, NULL, 0, NULL, 1);
 }
 
 static void merge_usage(FILE *to)
 {
     fprintf(to,
-"Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
+"Usage: samtools merge [options] -o <out.bam> [options] <in1.bam> ... <inN.bam>\n"
+"   or: samtools merge [options] <out.bam> <in1.bam> ... <inN.bam>\n"
 "\n"
 "Options:\n"
 "  -n         Input files are sorted by read name\n"
@@ -1338,6 +1408,7 @@ static void merge_usage(FILE *to)
 "  -r         Attach RG tag (inferred from file names)\n"
 "  -u         Uncompressed BAM output\n"
 "  -f         Overwrite the output BAM if exist\n"
+"  -o FILE    Specify output file via option instead of <out.bam> argument\n"
 "  -1         Compress level 1\n"
 "  -l INT     Compression level, from 0 to 9 [-1]\n"
 "  -R STR     Merge file in the specified region STR [all]\n"
@@ -1347,6 +1418,7 @@ static void merge_usage(FILE *to)
 "  -s VALUE   Override random seed\n"
 "  -b FILE    List of input BAM filenames, one per line [null]\n"
 "  -X         Use customized index files\n"
+"  -L FILE    Specify a BED file for multiple region filtering [null]\n"
 "  --no-PG    do not add a PG line\n");
     sam_global_opt_help(to, "-.O..@..");
 }
@@ -1355,10 +1427,10 @@ int bam_merge(int argc, char *argv[])
 {
     int c, is_by_qname = 0, flag = 0, ret = 0, level = -1, has_index_file = 0;
     char *fn_headers = NULL, *reg = NULL, mode[12];
-    char *sort_tag = NULL, *arg_list = NULL;
+    char *sort_tag = NULL, *fnout = NULL, *arg_list = NULL;
     long random_seed = (long)time(NULL);
     char** fn = NULL;
-    char** fn_idx = NULL;
+    char** fn_idx = NULL, *fn_bed = NULL;
     int fn_size = 0, no_pg = 0;
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
@@ -1374,12 +1446,13 @@ int bam_merge(int argc, char *argv[])
         return 0;
     }
 
-    while ((c = getopt_long(argc, argv, "h:nru1R:f@:l:cps:b:O:t:X", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "h:nru1R:o:f@:l:cps:b:O:t:XL:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'r': flag |= MERGE_RG; break;
         case 'f': flag |= MERGE_FORCE; break;
         case 'h': fn_headers = optarg; break;
         case 'n': is_by_qname = 1; break;
+        case 'o': fnout = optarg; break;
         case 't': sort_tag = optarg; break;
         case '1': flag |= MERGE_LEVEL1; level = 1; break;
         case 'u': flag |= MERGE_UNCOMP; level = 0; break;
@@ -1389,6 +1462,7 @@ int bam_merge(int argc, char *argv[])
         case 'p': flag |= MERGE_COMBINE_PG; break;
         case 's': random_seed = atol(optarg); break;
         case 'X': has_index_file = 1; break; // -X flag for index filename
+        case 'L': fn_bed = optarg; break;
         case 'b': {
             // load the list of files to read
             if (has_index_file) {
@@ -1417,7 +1491,12 @@ int bam_merge(int argc, char *argv[])
         case '?': merge_usage(samtools_stderr); return 1;
         }
     }
-    if ( argc - optind < 1 ) {
+
+    if (fnout == NULL && argc - optind >= 1) {
+        fnout = argv[optind];
+        optind++;
+    }
+    if (fnout == NULL) {
         print_error("merge", "You must at least specify the output file");
         merge_usage(samtools_stderr);
         return 1;
@@ -1428,50 +1507,57 @@ int bam_merge(int argc, char *argv[])
         return 1;
     }
 
-    srand48(random_seed);
-    if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
-        FILE *fp = fopen(argv[optind], "rb");
-        if (fp != NULL) {
-            fclose(fp);
-            fprintf(samtools_stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
-            return 1;
+    hts_srand48(random_seed);
+    if (!(flag & MERGE_FORCE) && strcmp(fnout, "-") != 0) {
+        struct stat sbuf;
+        if (stat(fnout, &sbuf) == 0 && S_ISREG(sbuf.st_mode)) {
+            fprintf(samtools_stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, fnout);
+            ret = 1;
+            goto end;
         }
     }
 
     int nargcfiles = 0;
     if (has_index_file) { // Calculate # of input BAM files
-        if ((argc - optind - 1) % 2 != 0) {
+        if ((argc - optind) % 2 != 0) {
             fprintf(samtools_stderr, "Odd number of filenames detected! Each BAM file should have an index file\n");
-            return 1;
+            ret = 1;
+            goto end;
         }
-        nargcfiles = (argc - optind - 1) / 2;
+        nargcfiles = (argc - optind) / 2;
     } else {
-        nargcfiles = argc - optind - 1;
+        nargcfiles = argc - optind;
     }
 
     if (nargcfiles > 0) {
         // Add argc files to end of array
         fn = realloc(fn, (fn_size+nargcfiles) * sizeof(char*));
         if (fn == NULL) { ret = 1; goto end; }
-        memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
+        memcpy(fn+fn_size, argv + optind, nargcfiles * sizeof(char*));
 
         if(has_index_file) {
             fn_idx = realloc(fn_idx, nargcfiles * sizeof(char*));
             if (fn_idx == NULL) { ret = 1; goto end; }
-            memcpy(fn_idx+fn_size, argv + nargcfiles + (optind+1), nargcfiles * sizeof(char*));
+            memcpy(fn_idx+fn_size, argv + nargcfiles + optind, nargcfiles * sizeof(char*));
         }
     }
     if (fn_size+nargcfiles < 1) {
         print_error("merge", "You must specify at least one (and usually two or more) input files");
         merge_usage(samtools_stderr);
-        free(fn_idx);
-        return 1;
+        ret = 1;
+        goto end;
+    }
+
+    if (reg && fn_bed) {
+        print_error("merge", "You must specify either a BED file or a region");
+        ret = 1;
+        goto end;
     }
     strcpy(mode, "wb");
-    sam_open_mode(mode+1, argv[optind], NULL);
+    sam_open_mode(mode+1, fnout, NULL);
     if (level >= 0) sprintf(strchr(mode, '\0'), "%d", level < 9? level : 9);
-    if (bam_merge_core2(is_by_qname, sort_tag, argv[optind], mode, fn_headers,
-                        fn_size+nargcfiles, fn, fn_idx, flag, reg, ga.nthreads,
+    if (bam_merge_core2(is_by_qname, sort_tag, fnout, mode, fn_headers,
+                        fn_size+nargcfiles, fn, fn_idx, fn_bed, flag, reg, ga.nthreads,
                         "merge", &ga.in, &ga.out, ga.write_index, arg_list, no_pg) < 0)
         ret = 1;
 
@@ -1633,6 +1719,12 @@ static int bam_merge_simple(int by_qname, char *sort_tag, const char *out,
     ks_heapmake(heap, heap_size, heap);
     while (heap->pos != HEAP_EMPTY) {
         bam1_t *b = heap->entry.bam_record;
+        if (g_is_by_minhash && b->core.tid == -1) {
+            // Remove the cached minhash value
+            b->core.pos = -1;
+            b->core.mpos = -1;
+            b->core.isize = 0;
+        }
         if (sam_write1(fpout, hout, b) < 0) {
             print_error_errno(cmd, "failed writing to \"%s\"", out);
             goto fail;
@@ -1791,12 +1883,45 @@ static inline int bam1_cmp_by_tag(const bam1_tag a, const bam1_tag b)
     }
 }
 
+// Sort by minimiser (stored in bam1_tag.u.pos).
+// If equal, sort by position.
+//
+// The 64-bit sort key is split over the bam pos and isize fields.
+// This permits it to survive writing to temporary file and coming back.
+static inline int bam1_cmp_by_minhash(const bam1_tag a, const bam1_tag b)
+{
+    const bam1_t *A = a.bam_record;
+    const bam1_t *B = b.bam_record;
+
+    if (!A) return 1;
+    if (!B) return 0;
+
+    if (A->core.tid != -1 || B->core.tid != -1)
+        return bam1_cmp_core(a,b);
+
+    const uint64_t m_a = (((uint64_t)A->core.pos)<<32)|(uint32_t)A->core.mpos;
+    const uint64_t m_b = (((uint64_t)B->core.pos)<<32)|(uint32_t)B->core.mpos;
+
+    if (m_a < m_b) // by hash
+        return -1;
+    else if (m_a > m_b)
+        return 1;
+    else if (A->core.isize < B->core.isize) // by hash location in seq
+        return -1;
+    else if (A->core.isize > B->core.isize)
+        return 1;
+    else
+        return bam1_cmp_core(a,b);
+}
+
 // Function to compare reads and determine which one is < the other
 // Handle sort-by-pos, sort-by-name, or sort-by-tag
 static inline int bam1_lt(const bam1_tag a, const bam1_tag b)
 {
     if (g_is_by_tag) {
         return bam1_cmp_by_tag(a, b) < 0;
+    } else if (g_is_by_minhash) {
+        return bam1_cmp_by_minhash(a, b) < 0;
     } else {
         return bam1_cmp_core(a,b) < 0;
     }
@@ -1820,7 +1945,7 @@ typedef struct {
 //        -1 for failure
 static int write_buffer(const char *fn, const char *mode, size_t l, bam1_tag *buf,
                         const sam_hdr_t *h, int n_threads, const htsFormat *fmt,
-                        char *arg_list, int no_pg, int write_index)
+                        int clear_minhash, char *arg_list, int no_pg, int write_index)
 {
     size_t i;
     samFile* fp;
@@ -1828,22 +1953,27 @@ static int write_buffer(const char *fn, const char *mode, size_t l, bam1_tag *bu
 
     fp = sam_open_format(fn, mode, fmt);
     if (fp == NULL) return -1;
-    if (!no_pg && sam_hdr_add_pg((sam_hdr_t *)h, "samtools",
-                                 "VN", samtools_version(),
+    if (!no_pg && sam_hdr_add_pg((sam_hdr_t *)h, "samtools", "VN", samtools_version(),
                                  arg_list ? "CL": NULL,
                                  arg_list ? arg_list : NULL,
                                  NULL)) {
         goto fail;
     }
-    if (sam_hdr_write(fp, (sam_hdr_t *)h) != 0) goto fail;
+    if (sam_hdr_write(fp, h) != 0) goto fail;
 
-    if (write_index) {
+    if (write_index)
         if (!(out_idx_fn = auto_index(fp, fn, (sam_hdr_t *)h))) goto fail;
-    }
 
     if (n_threads > 1) hts_set_threads(fp, n_threads);
     for (i = 0; i < l; ++i) {
-        if (sam_write1(fp, (sam_hdr_t *)h, buf[i].bam_record) < 0) goto fail;
+        bam1_t *b = buf[i].bam_record;
+        if (clear_minhash && b->core.tid == -1) {
+            // Remove the cached minhash value
+            b->core.pos = -1;
+            b->core.mpos = -1;
+            b->core.isize = 0;
+        }
+        if (sam_write1(fp, h, b) < 0) goto fail;
     }
 
     if (write_index) {
@@ -1946,18 +2076,206 @@ err:
     return ret;
 }
 
+/*
+ * Computes the minhash of a sequence using both forward and reverse strands.
+ *
+ * This is used as a sort key for unmapped data, to collate like sequences
+ * together and to improve compression ratio.
+ *
+ * The minhash is returned and *pos filled out with location of this hash
+ * key in the sequence if pos != NULL.
+ */
+static uint64_t minhash(bam1_t *b, int kmer, int *pos, int *rev) {
+    uint64_t hashf = 0, minhashf = UINT64_MAX;
+    uint64_t hashr = 0, minhashr = UINT64_MAX;
+    int minhashpf = 0, minhashpr = 0, i;
+    uint64_t mask = (1L<<(2*kmer))-1;
+    unsigned char *seq = bam_get_seq(b);
+    int len = b->core.l_qseq;
+
+    // Lookup tables for bam_seqi to 0123 fwd/rev hashes
+    // =ACM GRSV TWYH KDBN
+#define X 0
+    unsigned char L[16] = {
+        X,0,1,X,  2,X,X,X,  3,X,X,X,  X,X,X,X,
+    };
+    uint64_t R[16] = {
+        X,3,2,X,  1,X,X,X,  0,X,X,X,  X,X,X,X,
+    };
+    for (i = 0; i < 16; i++)
+        R[i] <<= 2*(kmer-1);
+
+    // Punt homopolymers somewhere central in the hash space
+#define XOR (0xdead7878beef7878 & mask)
+
+    // Initialise hash keys
+    for (i = 0; i < kmer-1 && i < len; i++) {
+        int base = bam_seqi(seq, i);
+        hashf = (hashf<<2) | L[base];
+        hashr = (hashr>>2) | R[base];
+    }
+
+    // Loop to find minimum
+    for (; i < len; i++) {
+        int base = bam_seqi(seq, i);
+
+        hashf = ((hashf<<2) | L[base]) & mask;
+        hashr =  (hashr>>2) | R[base];
+
+        if (minhashf > (hashf^XOR))
+            minhashf = (hashf^XOR), minhashpf = i;
+        if (minhashr > (hashr^XOR))
+            minhashr = (hashr^XOR), minhashpr = len-i+kmer-2;
+
+    }
+
+    if (minhashf <= minhashr) {
+        if (rev) *rev = 0;
+        if (pos) *pos = minhashpf;
+        return minhashf;
+    } else {
+        if (rev) *rev = 1;
+        if (pos) *pos = minhashpr;
+        return minhashr;
+    }
+}
+
+//--- Start of candidates to punt to htslib
+/*!
+ * @abstract
+ * Extracts the sequence (in current alignment orientation) from
+ * a bam record and places it in buf, which is nul terminated.
+ *
+ * @param b     The bam structure
+ * @param buf   A buffer at least b->core.l_qseq+1 bytes long
+ */
+static void bam_to_seq(bam1_t *b, char *buf) {
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    for (i = 0; i < b->core.l_qseq; i++)
+        buf[i] = seq_nt16_str[bam_seqi(seq, i)];
+    buf[i] = 0;
+}
+
+/*!
+ * @abstract
+ * Writes a new sequence, of length b->core.l_qseq, to a BAM record.
+ *
+ * If a sequence of a new length is required the caller must first make
+ * room for it by updating the bam1_t struct.
+ *
+ * @param b     The bam structure
+ * @param buf   A buffer at least b->core.l_qseq bytes long
+ */
+static void seq_to_bam(bam1_t *b, char *buf) {
+    int i;
+    uint8_t *seq = bam_get_seq(b);
+    for (i = 0; i < b->core.l_qseq; i++)
+        bam_set_seqi(seq, i, seq_nt16_table[(unsigned char)buf[i]]);
+}
+
+/*!
+ * @abstract Reverse complements a BAM record.
+ *
+ * It's possible to do this inline, but complex due to the 4-bit sequence
+ * encoding.  For now I take the dumb approach.
+ *
+ * @param b  Pointer to a BAM alignment
+ *
+ * @return   0 on success, -1 on failure (ENOMEM)
+ */
+static int reverse_complement(bam1_t *b) {
+    static char comp[256] = {
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//00
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//10
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//20
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//30
+
+       //    *   *   *    *   E   F   *    *   I   J   *    L   *   *   O
+        '@','T','V','G', 'H','E','F','C', 'D','I','H','M', 'L','K','N','O',//40
+       //P   Q   *   *    *   *   *   *    X   Y   Z   [    \   ]   ^   _
+        'P','Q','Y','S', 'A','A','B','W', 'X','Y','Z','[','\\','[','^','_',//50
+       //`   *   *   *    *   E   F   *    *   I   J   *    L   *   *   O
+        '`','t','v','g', 'h','e','f','c', 'd','i','j','m', 'l','k','n','o',//60
+       //P   Q   *   *    *   *   *   *    X   Y   Z   {    |   }   ~   DEL
+        'p','q','y','s', 'a','a','b','w', 'x','y','z','{', '|','}','~',127,//70
+
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//80
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//90
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//A0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//B0
+
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//C0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//D0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//E0
+        'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',//F0
+    };
+    char seq_[10000], *seq = seq_;
+    uint8_t *qual = bam_get_qual(b);
+    int i, j;
+
+    if (b->core.l_qseq >= 10000)
+        if (!(seq = malloc(b->core.l_qseq+1)))
+            return -1;
+
+    bam_to_seq(b, seq);
+
+    for (i = 0, j = b->core.l_qseq-1; i < j; i++, j--) {
+        unsigned char tmp = seq[i];
+        seq[i] = comp[(unsigned char)seq[j]];
+        seq[j] = comp[tmp];
+        tmp = qual[i];
+        qual[i] = qual[j];
+        qual[j] = tmp;
+    }
+    if (i ==j)
+        seq[i] = comp[(unsigned char)seq[i]];
+
+    seq_to_bam(b, seq);
+
+    if (seq != seq_)
+        free(seq);
+
+    b->core.flag ^= 0x10;
+
+    return 0;
+}
+//--- End of candidates to punt to htslib
+
 static void *worker(void *data)
 {
     worker_t *w = (worker_t*)data;
     char *name;
     w->error = 0;
 
-    if (!g_is_by_qname && !g_is_by_tag) {
+    if (!g_is_by_qname && !g_is_by_tag && !g_is_by_minhash) {
         if (ks_radixsort(w->buf_len, w->buf, w->h) < 0) {
             w->error = errno;
             return NULL;
         }
     } else {
+        if (g_is_by_minhash) {
+            int i;
+            for (i = 0; i < w->buf_len; i++) {
+                bam1_t *b = w->buf[i].bam_record;
+                if (b->core.tid != -1)
+                    continue;
+
+                int pos = 0, rev = 0;
+                uint64_t mh = minhash(b, g_is_by_minhash, &pos, &rev);
+                if (rev)
+                    reverse_complement(b);
+
+                // Store 64-bit hash in unmapped pos and mpos fields.
+                // The position of hash is in isize, which we use for
+                // resolving ties when sorting by hash key.
+                // These are unused for completely unmapped data and
+                // will be reset during final output.
+                b->core.pos = mh>>31;
+                b->core.mpos = mh&0x7fffffff;
+                b->core.isize = 65535-pos >=0 ? 65535-pos : 0;
+            }
+        }
         ks_mergesort(sort, w->buf_len, w->buf, 0);
     }
 
@@ -1985,10 +2303,10 @@ static void *worker(void *data)
             return 0;
         }
 
-        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt, NULL, 1, 0) < 0)
+        if (write_buffer(name, "wcx1", w->buf_len, w->buf, w->h, 0, &fmt, 0, NULL, 1, 0) < 0)
             w->error = errno;
     } else {
-        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL, NULL, 1, 0) < 0)
+        if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL, 0, NULL, 1, 0) < 0)
             w->error = errno;
     }
 
@@ -2045,6 +2363,7 @@ static int sort_blocks(int n_files, size_t k, bam1_tag *buf, const char *prefix,
     return n_files + n_threads;
 }
 
+
 /*!
   @abstract Sort an unsorted BAM file based on the chromosome order
   and the leftmost position of an alignment
@@ -2069,7 +2388,7 @@ static int sort_blocks(int n_files, size_t k, bam1_tag *buf, const char *prefix,
  */
 int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const char *prefix,
                       const char *fnout, const char *modeout,
-                      size_t _max_mem, int n_threads,
+                      size_t _max_mem, int by_minimiser, int n_threads,
                       const htsFormat *in_fmt, const htsFormat *out_fmt,
                       char *arg_list, int no_pg, int write_index)
 {
@@ -2092,6 +2411,7 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
 
     if (n_threads < 2) n_threads = 1;
     g_is_by_qname = is_by_qname;
+    g_is_by_minhash = by_minimiser;
     if (sort_by_tag) {
         g_is_by_tag = 1;
         g_sort_tag[0] = sort_by_tag[0];
@@ -2118,11 +2438,23 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
     else
         new_so = "coordinate";
 
-    if ((-1 == sam_hdr_update_hd(header, "SO", new_so))
-     && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION, "SO", new_so, NULL))
-     ) {
-        print_error("sort", "failed to change sort order header to '%s'\n", new_so);
-        goto err;
+    if (by_minimiser) {
+        const char *new_ss = "coordinate:minhash";
+        if ((-1 == sam_hdr_update_hd(header, "SO", new_so, "SS", new_ss))
+            && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION,
+                                       "SO", new_so, "SS", new_ss, NULL))
+            ) {
+            print_error("sort", "failed to change sort order header to 'SO:%s SS:%s'\n",
+                        new_so, new_ss);
+            goto err;
+        }
+    } else {
+        if ((-1 == sam_hdr_update_hd(header, "SO", new_so))
+            && (-1 == sam_hdr_add_line(header, "HD", "VN", SAM_FORMAT_VERSION, "SO", new_so, NULL))
+            ) {
+            print_error("sort", "failed to change sort order header to 'SO:%s'\n", new_so);
+            goto err;
+        }
     }
 
     if (-1 == sam_hdr_remove_tag_hd(header, "GO")) {
@@ -2209,7 +2541,8 @@ int bam_sort_core_ext(int is_by_qname, char* sort_by_tag, const char *fn, const
 
     // write the final output
     if (n_files == 0 && num_in_mem < 2) { // a single block
-        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt, arg_list, no_pg, write_index) != 0) {
+        if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt,
+                         g_is_by_minhash, arg_list, no_pg, write_index) != 0) {
             print_error_errno("sort", "failed to create \"%s\"", fnout);
             goto err;
         }
@@ -2263,7 +2596,7 @@ int bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t ma
     char *fnout = calloc(strlen(prefix) + 4 + 1, 1);
     if (!fnout) return -1;
     sprintf(fnout, "%s.bam", prefix);
-    ret = bam_sort_core_ext(is_by_qname, NULL, fn, prefix, fnout, "wb", max_mem, 0, NULL, NULL, NULL, 1, 0);
+    ret = bam_sort_core_ext(is_by_qname, NULL, fn, prefix, fnout, "wb", max_mem, 0, 0, NULL, NULL, NULL, 1, 0);
     free(fnout);
     return ret;
 }
@@ -2274,13 +2607,16 @@ static void sort_usage(FILE *fp)
 "Usage: samtools sort [options...] [in.bam]\n"
 "Options:\n"
 "  -l INT     Set compression level, from 0 (uncompressed) to 9 (best)\n"
+"  -u         Output uncompressed data (equivalent to -l 0)\n"
 "  -m INT     Set maximum memory per thread; suffix K/M/G recognized [768M]\n"
-"  -n         Sort by read name\n"
+"  -M         Use minimiser for clustering unaligned/unplaced reads\n"
+"  -K INT     Kmer size to use for minimiser [20]\n"
+"  -n         Sort by read name (not compatible with samtools index command)\n"
 "  -t TAG     Sort by value of TAG. Uses position as secondary index (or read name if -n is set)\n"
 "  -o FILE    Write final output to FILE rather than standard output\n"
 "  -T PREFIX  Write temporary files to PREFIX.nnnn.bam\n"
 "  --no-PG    do not add a PG line\n");
-    sam_global_opt_help(fp, "-.O..@-.");
+    sam_global_opt_help(fp, "-.O..@..");
 }
 
 static void complain_about_memory_setting(size_t max_mem) {
@@ -2304,6 +2640,7 @@ int bam_sort(int argc, char *argv[])
 {
     size_t max_mem = SORT_DEFAULT_MEGS_PER_THREAD << 20;
     int c, nargs, is_by_qname = 0, ret, o_seen = 0, level = -1, no_pg = 0;
+    int by_minimiser = 0, minimiser_kmer = 20;
     char* sort_tag = NULL, *arg_list = NULL;
     char *fnout = "-", modeout[12];
     kstring_t tmpprefix = { 0, 0, NULL };
@@ -2317,7 +2654,7 @@ int bam_sort(int argc, char *argv[])
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "l:m:no:O:T:@:t:", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "l:m:no:O:T:@:t:MK:u", lopts, NULL)) >= 0) {
         switch (c) {
         case 'o': fnout = optarg; o_seen = 1; break;
         case 'n': is_by_qname = 1; break;
@@ -2332,7 +2669,16 @@ int bam_sort(int argc, char *argv[])
             }
         case 'T': kputs(optarg, &tmpprefix); break;
         case 'l': level = atoi(optarg); break;
-        case 1: no_pg = 1; break;
+        case 'u': level = 0; break;
+        case   1: no_pg = 1; break;
+        case 'M': by_minimiser = 1; break;
+        case 'K':
+            minimiser_kmer = atoi(optarg);
+            if (minimiser_kmer < 1)
+                minimiser_kmer = 1;
+            else if (minimiser_kmer > 31)
+                minimiser_kmer = 31;
+            break;
 
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                   /* else fall-through */
@@ -2387,7 +2733,8 @@ int bam_sort(int argc, char *argv[])
     }
 
     ret = bam_sort_core_ext(is_by_qname, sort_tag, (nargs > 0)? argv[optind] : "-",
-                            tmpprefix.s, fnout, modeout, max_mem, ga.nthreads,
+                            tmpprefix.s, fnout, modeout, max_mem,
+                            by_minimiser * minimiser_kmer, ga.nthreads,
                             &ga.in, &ga.out, arg_list, no_pg, ga.write_index);
     if (ret >= 0)
         ret = EXIT_SUCCESS;
diff --git a/samtools/bam_stat.c b/samtools/bam_stat.c
index 5fb9ba0..31dc8fe 100644
--- a/samtools/bam_stat.c
+++ b/samtools/bam_stat.c
@@ -1,6 +1,6 @@
 /*  bam_stat.c -- flagstat subcommand.
 
-    Copyright (C) 2009, 2011, 2013-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2009, 2011, 2013-2015, 2019, 2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -42,32 +42,41 @@ typedef struct {
     long long n_dup[2];
     long long n_diffchr[2], n_diffhigh[2];
     long long n_secondary[2], n_supp[2];
+    long long n_primary[2], n_pmapped[2], n_pdup[2];
 } bam_flagstat_t;
 
-#define flagstat_loop(s, c) do {                                        \
-        int w = ((c)->flag & BAM_FQCFAIL)? 1 : 0;                       \
-        ++(s)->n_reads[w];                                              \
-        if ((c)->flag & BAM_FSECONDARY ) {                              \
-            ++(s)->n_secondary[w];                                      \
-        } else if ((c)->flag & BAM_FSUPPLEMENTARY ) {                   \
-            ++(s)->n_supp[w];                                           \
-        } else if ((c)->flag & BAM_FPAIRED) {                           \
-            ++(s)->n_pair_all[w];                                       \
-            if (((c)->flag & BAM_FPROPER_PAIR) && !((c)->flag & BAM_FUNMAP) ) ++(s)->n_pair_good[w];    \
-            if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w];              \
-            if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w];              \
-            if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w];  \
-            if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
-                ++(s)->n_pair_map[w];                                   \
-                if ((c)->mtid != (c)->tid) {                            \
-                    ++(s)->n_diffchr[w];                                \
-                    if ((c)->qual >= 5) ++(s)->n_diffhigh[w];           \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-        if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped[w];              \
-        if ((c)->flag & BAM_FDUP) ++(s)->n_dup[w];                      \
-    } while (0)
+inline static void flagstat_loop(bam_flagstat_t *s, bam1_core_t *c)
+{
+    int w = (c->flag & BAM_FQCFAIL)? 1 : 0;
+    ++s->n_reads[w];
+    if (c->flag & BAM_FSECONDARY ) {
+        ++s->n_secondary[w];
+    } else if (c->flag & BAM_FSUPPLEMENTARY ) {
+        ++s->n_supp[w];
+    } else {
+        ++s->n_primary[w];
+
+        if (c->flag & BAM_FPAIRED) {
+            ++s->n_pair_all[w];
+            if ((c->flag & BAM_FPROPER_PAIR) && !(c->flag & BAM_FUNMAP) ) ++s->n_pair_good[w];
+            if (c->flag & BAM_FREAD1) ++s->n_read1[w];
+            if (c->flag & BAM_FREAD2) ++s->n_read2[w];
+            if ((c->flag & BAM_FMUNMAP) && !(c->flag & BAM_FUNMAP)) ++s->n_sgltn[w];
+            if (!(c->flag & BAM_FUNMAP) && !(c->flag & BAM_FMUNMAP)) {
+                ++s->n_pair_map[w];
+                if (c->mtid != c->tid) {
+                    ++s->n_diffchr[w];
+                    if (c->qual >= 5) ++s->n_diffhigh[w];
+                }
+            }
+        }
+
+        if (!(c->flag & BAM_FUNMAP)) ++s->n_pmapped[w];
+        if (c->flag & BAM_FDUP) ++s->n_pdup[w];
+    }
+    if (!(c->flag & BAM_FUNMAP)) ++s->n_mapped[w];
+    if (c->flag & BAM_FDUP) ++s->n_dup[w];
+}
 
 bam_flagstat_t *bam_flagstat_core(samFile *fp, sam_hdr_t *h)
 {
@@ -81,8 +90,10 @@ bam_flagstat_t *bam_flagstat_core(samFile *fp, sam_hdr_t *h)
     while ((ret = sam_read1(fp, h, b)) >= 0)
         flagstat_loop(s, c);
     bam_destroy1(b);
-    if (ret != -1)
-        fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
+    if (ret != -1) {
+        free(s);
+        return NULL;
+    }
     return s;
 }
 
@@ -114,10 +125,13 @@ static void out_fmt_default(bam_flagstat_t *s)
 {
     char b0[16], b1[16];
     printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+    printf("%lld + %lld primary\n", s->n_primary[0], s->n_primary[1]);
     printf("%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
     printf("%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
     printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
+    printf("%lld + %lld primary duplicates\n", s->n_pdup[0], s->n_pdup[1]);
     printf("%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+    printf("%lld + %lld primary mapped (%s : %s)\n", s->n_pmapped[0], s->n_pmapped[1], percent(b0, s->n_pmapped[0], s->n_primary[0]), percent(b1, s->n_pmapped[1], s->n_primary[1]));
     printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
     printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
     printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
@@ -129,14 +143,18 @@ static void out_fmt_default(bam_flagstat_t *s)
 }
 
 static void out_fmt_json(bam_flagstat_t *s) {
-    char b0[16], b1[16];
+    char b0[16], b1[16], p0[16], p1[16], pp0[16], pp1[16], s0[16], s1[16];
     printf("{\n \"QC-passed reads\": { \n"
                  "  \"total\": %lld, \n"
+                 "  \"primary\": %lld, \n"
                  "  \"secondary\": %lld, \n"
                  "  \"supplementary\": %lld, \n"
                  "  \"duplicates\": %lld, \n"
+                 "  \"primary duplicates\": %lld, \n"
                  "  \"mapped\": %lld, \n"
                  "  \"mapped %%\": %s, \n"
+                 "  \"primary mapped\": %lld, \n"
+                 "  \"primary mapped %%\": %s, \n"
                  "  \"paired in sequencing\": %lld, \n"
                  "  \"read1\": %lld, \n"
                  "  \"read2\": %lld, \n"
@@ -150,11 +168,15 @@ static void out_fmt_json(bam_flagstat_t *s) {
                  " },"
             "\n \"QC-failed reads\": { \n"
                  "  \"total\": %lld, \n"
+                 "  \"primary\": %lld, \n"
                  "  \"secondary\": %lld, \n"
                  "  \"supplementary\": %lld, \n"
                  "  \"duplicates\": %lld, \n"
+                 "  \"primary duplicates\": %lld, \n"
                  "  \"mapped\": %lld, \n"
                  "  \"mapped %%\": %s, \n"
+                 "  \"primary mapped\": %lld, \n"
+                 "  \"primary mapped %%\": %s, \n"
                  "  \"paired in sequencing\": %lld, \n"
                  "  \"read1\": %lld, \n"
                  "  \"read2\": %lld, \n"
@@ -168,35 +190,43 @@ static void out_fmt_json(bam_flagstat_t *s) {
                  " }\n"
             "}\n",
         s->n_reads[0],
+        s->n_primary[0],
         s->n_secondary[0],
         s->n_supp[0],
         s->n_dup[0],
+        s->n_pdup[0],
         s->n_mapped[0],
         percent_json(b0, s->n_mapped[0], s->n_reads[0]),
+        s->n_pmapped[0],
+        percent_json(p0, s->n_pmapped[0], s->n_primary[0]),
         s->n_pair_all[0],
         s->n_read1[0],
         s->n_read2[0],
         s->n_pair_good[0],
-        percent_json(b0, s->n_pair_good[0], s->n_pair_all[0]),
+        percent_json(pp0, s->n_pair_good[0], s->n_pair_all[0]),
         s->n_pair_map[0],
         s->n_sgltn[0],
-        percent_json(b0, s->n_sgltn[0], s->n_pair_all[0]),
+        percent_json(s0, s->n_sgltn[0], s->n_pair_all[0]),
         s->n_diffchr[0],
         s->n_diffhigh[0],
         s->n_reads[1],
+        s->n_primary[1],
         s->n_secondary[1],
         s->n_supp[1],
         s->n_dup[1],
+        s->n_pdup[1],
         s->n_mapped[1],
         percent_json(b1, s->n_mapped[1], s->n_reads[1]),
+        s->n_pmapped[1],
+        percent_json(p1, s->n_pmapped[1], s->n_primary[1]),
         s->n_pair_all[1],
         s->n_read1[1],
         s->n_read2[1],
         s->n_pair_good[1],
-        percent_json(b1, s->n_pair_good[1], s->n_pair_all[1]),
+        percent_json(pp1, s->n_pair_good[1], s->n_pair_all[1]),
         s->n_pair_map[1],
         s->n_sgltn[1],
-        percent_json(b1, s->n_sgltn[1], s->n_pair_all[1]),
+        percent_json(s1, s->n_sgltn[1], s->n_pair_all[1]),
         s->n_diffchr[1],
         s->n_diffhigh[1]
     );
@@ -205,11 +235,15 @@ static void out_fmt_json(bam_flagstat_t *s) {
 static void out_fmt_tsv(bam_flagstat_t *s) {
     char b0[16], b1[16];
     printf("%lld\t%lld\ttotal (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+    printf("%lld\t%lld\tprimary\n", s->n_primary[0], s->n_primary[1]);
     printf("%lld\t%lld\tsecondary\n", s->n_secondary[0], s->n_secondary[1]);
     printf("%lld\t%lld\tsupplementary\n", s->n_supp[0], s->n_supp[1]);
     printf("%lld\t%lld\tduplicates\n", s->n_dup[0], s->n_dup[1]);
+    printf("%lld\t%lld\tprimary duplicates\n", s->n_pdup[0], s->n_pdup[1]);
     printf("%lld\t%lld\tmapped\n", s->n_mapped[0], s->n_mapped[1]);
     printf("%s\t%s\tmapped %%\n", percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+    printf("%lld\t%lld\tprimary mapped\n", s->n_pmapped[0], s->n_pmapped[1]);
+    printf("%s\t%s\tprimary mapped %%\n", percent(b0, s->n_pmapped[0], s->n_primary[0]), percent(b1, s->n_pmapped[1], s->n_primary[1]));
     printf("%lld\t%lld\tpaired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
     printf("%lld\t%lld\tread1\n", s->n_read1[0], s->n_read1[1]);
     printf("%lld\t%lld\tread2\n", s->n_read2[0], s->n_read2[1]);
@@ -242,7 +276,7 @@ int bam_flagstat(int argc, char *argv[])
     sam_hdr_t *header;
     bam_flagstat_t *s;
     const char *out_fmt = "default";
-    int c;
+    int c, status = EXIT_SUCCESS;
 
     enum {
         INPUT_FMT_OPTION = CHAR_MAX+1,
@@ -296,10 +330,17 @@ int bam_flagstat(int argc, char *argv[])
     }
 
     s = bam_flagstat_core(fp, header);
-    output_fmt(s, out_fmt);
-    free(s);
+    if (s) {
+        output_fmt(s, out_fmt);
+        free(s);
+    }
+    else {
+        print_error("flagstat", "error reading from \"%s\"", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     sam_hdr_destroy(header);
     sam_close(fp);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/bam_stat.c.pysam.c b/samtools/bam_stat.c.pysam.c
index 84a9ea4..bd6f4ca 100644
--- a/samtools/bam_stat.c.pysam.c
+++ b/samtools/bam_stat.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bam_stat.c -- flagstat subcommand.
 
-    Copyright (C) 2009, 2011, 2013-2015, 2019 Genome Research Ltd.
+    Copyright (C) 2009, 2011, 2013-2015, 2019, 2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -44,32 +44,41 @@ typedef struct {
     long long n_dup[2];
     long long n_diffchr[2], n_diffhigh[2];
     long long n_secondary[2], n_supp[2];
+    long long n_primary[2], n_pmapped[2], n_pdup[2];
 } bam_flagstat_t;
 
-#define flagstat_loop(s, c) do {                                        \
-        int w = ((c)->flag & BAM_FQCFAIL)? 1 : 0;                       \
-        ++(s)->n_reads[w];                                              \
-        if ((c)->flag & BAM_FSECONDARY ) {                              \
-            ++(s)->n_secondary[w];                                      \
-        } else if ((c)->flag & BAM_FSUPPLEMENTARY ) {                   \
-            ++(s)->n_supp[w];                                           \
-        } else if ((c)->flag & BAM_FPAIRED) {                           \
-            ++(s)->n_pair_all[w];                                       \
-            if (((c)->flag & BAM_FPROPER_PAIR) && !((c)->flag & BAM_FUNMAP) ) ++(s)->n_pair_good[w];    \
-            if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w];              \
-            if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w];              \
-            if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w];  \
-            if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
-                ++(s)->n_pair_map[w];                                   \
-                if ((c)->mtid != (c)->tid) {                            \
-                    ++(s)->n_diffchr[w];                                \
-                    if ((c)->qual >= 5) ++(s)->n_diffhigh[w];           \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-        if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped[w];              \
-        if ((c)->flag & BAM_FDUP) ++(s)->n_dup[w];                      \
-    } while (0)
+inline static void flagstat_loop(bam_flagstat_t *s, bam1_core_t *c)
+{
+    int w = (c->flag & BAM_FQCFAIL)? 1 : 0;
+    ++s->n_reads[w];
+    if (c->flag & BAM_FSECONDARY ) {
+        ++s->n_secondary[w];
+    } else if (c->flag & BAM_FSUPPLEMENTARY ) {
+        ++s->n_supp[w];
+    } else {
+        ++s->n_primary[w];
+
+        if (c->flag & BAM_FPAIRED) {
+            ++s->n_pair_all[w];
+            if ((c->flag & BAM_FPROPER_PAIR) && !(c->flag & BAM_FUNMAP) ) ++s->n_pair_good[w];
+            if (c->flag & BAM_FREAD1) ++s->n_read1[w];
+            if (c->flag & BAM_FREAD2) ++s->n_read2[w];
+            if ((c->flag & BAM_FMUNMAP) && !(c->flag & BAM_FUNMAP)) ++s->n_sgltn[w];
+            if (!(c->flag & BAM_FUNMAP) && !(c->flag & BAM_FMUNMAP)) {
+                ++s->n_pair_map[w];
+                if (c->mtid != c->tid) {
+                    ++s->n_diffchr[w];
+                    if (c->qual >= 5) ++s->n_diffhigh[w];
+                }
+            }
+        }
+
+        if (!(c->flag & BAM_FUNMAP)) ++s->n_pmapped[w];
+        if (c->flag & BAM_FDUP) ++s->n_pdup[w];
+    }
+    if (!(c->flag & BAM_FUNMAP)) ++s->n_mapped[w];
+    if (c->flag & BAM_FDUP) ++s->n_dup[w];
+}
 
 bam_flagstat_t *bam_flagstat_core(samFile *fp, sam_hdr_t *h)
 {
@@ -83,8 +92,10 @@ bam_flagstat_t *bam_flagstat_core(samFile *fp, sam_hdr_t *h)
     while ((ret = sam_read1(fp, h, b)) >= 0)
         flagstat_loop(s, c);
     bam_destroy1(b);
-    if (ret != -1)
-        fprintf(samtools_stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
+    if (ret != -1) {
+        free(s);
+        return NULL;
+    }
     return s;
 }
 
@@ -109,17 +120,20 @@ static void usage_exit(FILE *fp, int exit_status)
     fprintf(fp, "  -O, --");
     fprintf(fp, "output-fmt FORMAT[,OPT[=VAL]]...\n"
             "               Specify output format (json, tsv)\n");
-    exit(exit_status);
+    samtools_exit(exit_status);
 }
 
 static void out_fmt_default(bam_flagstat_t *s)
 {
     char b0[16], b1[16];
     fprintf(samtools_stdout, "%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+    fprintf(samtools_stdout, "%lld + %lld primary\n", s->n_primary[0], s->n_primary[1]);
     fprintf(samtools_stdout, "%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
     fprintf(samtools_stdout, "%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
     fprintf(samtools_stdout, "%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
+    fprintf(samtools_stdout, "%lld + %lld primary duplicates\n", s->n_pdup[0], s->n_pdup[1]);
     fprintf(samtools_stdout, "%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+    fprintf(samtools_stdout, "%lld + %lld primary mapped (%s : %s)\n", s->n_pmapped[0], s->n_pmapped[1], percent(b0, s->n_pmapped[0], s->n_primary[0]), percent(b1, s->n_pmapped[1], s->n_primary[1]));
     fprintf(samtools_stdout, "%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
     fprintf(samtools_stdout, "%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
     fprintf(samtools_stdout, "%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
@@ -131,14 +145,18 @@ static void out_fmt_default(bam_flagstat_t *s)
 }
 
 static void out_fmt_json(bam_flagstat_t *s) {
-    char b0[16], b1[16];
+    char b0[16], b1[16], p0[16], p1[16], pp0[16], pp1[16], s0[16], s1[16];
     fprintf(samtools_stdout, "{\n \"QC-passed reads\": { \n"
                  "  \"total\": %lld, \n"
+                 "  \"primary\": %lld, \n"
                  "  \"secondary\": %lld, \n"
                  "  \"supplementary\": %lld, \n"
                  "  \"duplicates\": %lld, \n"
+                 "  \"primary duplicates\": %lld, \n"
                  "  \"mapped\": %lld, \n"
                  "  \"mapped %%\": %s, \n"
+                 "  \"primary mapped\": %lld, \n"
+                 "  \"primary mapped %%\": %s, \n"
                  "  \"paired in sequencing\": %lld, \n"
                  "  \"read1\": %lld, \n"
                  "  \"read2\": %lld, \n"
@@ -152,11 +170,15 @@ static void out_fmt_json(bam_flagstat_t *s) {
                  " },"
             "\n \"QC-failed reads\": { \n"
                  "  \"total\": %lld, \n"
+                 "  \"primary\": %lld, \n"
                  "  \"secondary\": %lld, \n"
                  "  \"supplementary\": %lld, \n"
                  "  \"duplicates\": %lld, \n"
+                 "  \"primary duplicates\": %lld, \n"
                  "  \"mapped\": %lld, \n"
                  "  \"mapped %%\": %s, \n"
+                 "  \"primary mapped\": %lld, \n"
+                 "  \"primary mapped %%\": %s, \n"
                  "  \"paired in sequencing\": %lld, \n"
                  "  \"read1\": %lld, \n"
                  "  \"read2\": %lld, \n"
@@ -170,35 +192,43 @@ static void out_fmt_json(bam_flagstat_t *s) {
                  " }\n"
             "}\n",
         s->n_reads[0],
+        s->n_primary[0],
         s->n_secondary[0],
         s->n_supp[0],
         s->n_dup[0],
+        s->n_pdup[0],
         s->n_mapped[0],
         percent_json(b0, s->n_mapped[0], s->n_reads[0]),
+        s->n_pmapped[0],
+        percent_json(p0, s->n_pmapped[0], s->n_primary[0]),
         s->n_pair_all[0],
         s->n_read1[0],
         s->n_read2[0],
         s->n_pair_good[0],
-        percent_json(b0, s->n_pair_good[0], s->n_pair_all[0]),
+        percent_json(pp0, s->n_pair_good[0], s->n_pair_all[0]),
         s->n_pair_map[0],
         s->n_sgltn[0],
-        percent_json(b0, s->n_sgltn[0], s->n_pair_all[0]),
+        percent_json(s0, s->n_sgltn[0], s->n_pair_all[0]),
         s->n_diffchr[0],
         s->n_diffhigh[0],
         s->n_reads[1],
+        s->n_primary[1],
         s->n_secondary[1],
         s->n_supp[1],
         s->n_dup[1],
+        s->n_pdup[1],
         s->n_mapped[1],
         percent_json(b1, s->n_mapped[1], s->n_reads[1]),
+        s->n_pmapped[1],
+        percent_json(p1, s->n_pmapped[1], s->n_primary[1]),
         s->n_pair_all[1],
         s->n_read1[1],
         s->n_read2[1],
         s->n_pair_good[1],
-        percent_json(b1, s->n_pair_good[1], s->n_pair_all[1]),
+        percent_json(pp1, s->n_pair_good[1], s->n_pair_all[1]),
         s->n_pair_map[1],
         s->n_sgltn[1],
-        percent_json(b1, s->n_sgltn[1], s->n_pair_all[1]),
+        percent_json(s1, s->n_sgltn[1], s->n_pair_all[1]),
         s->n_diffchr[1],
         s->n_diffhigh[1]
     );
@@ -207,11 +237,15 @@ static void out_fmt_json(bam_flagstat_t *s) {
 static void out_fmt_tsv(bam_flagstat_t *s) {
     char b0[16], b1[16];
     fprintf(samtools_stdout, "%lld\t%lld\ttotal (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+    fprintf(samtools_stdout, "%lld\t%lld\tprimary\n", s->n_primary[0], s->n_primary[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tsecondary\n", s->n_secondary[0], s->n_secondary[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tsupplementary\n", s->n_supp[0], s->n_supp[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tduplicates\n", s->n_dup[0], s->n_dup[1]);
+    fprintf(samtools_stdout, "%lld\t%lld\tprimary duplicates\n", s->n_pdup[0], s->n_pdup[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tmapped\n", s->n_mapped[0], s->n_mapped[1]);
     fprintf(samtools_stdout, "%s\t%s\tmapped %%\n", percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+    fprintf(samtools_stdout, "%lld\t%lld\tprimary mapped\n", s->n_pmapped[0], s->n_pmapped[1]);
+    fprintf(samtools_stdout, "%s\t%s\tprimary mapped %%\n", percent(b0, s->n_pmapped[0], s->n_primary[0]), percent(b1, s->n_pmapped[1], s->n_primary[1]));
     fprintf(samtools_stdout, "%lld\t%lld\tpaired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tread1\n", s->n_read1[0], s->n_read1[1]);
     fprintf(samtools_stdout, "%lld\t%lld\tread2\n", s->n_read2[0], s->n_read2[1]);
@@ -244,7 +278,7 @@ int bam_flagstat(int argc, char *argv[])
     sam_hdr_t *header;
     bam_flagstat_t *s;
     const char *out_fmt = "default";
-    int c;
+    int c, status = EXIT_SUCCESS;
 
     enum {
         INPUT_FMT_OPTION = CHAR_MAX+1,
@@ -298,10 +332,17 @@ int bam_flagstat(int argc, char *argv[])
     }
 
     s = bam_flagstat_core(fp, header);
-    output_fmt(s, out_fmt);
-    free(s);
+    if (s) {
+        output_fmt(s, out_fmt);
+        free(s);
+    }
+    else {
+        print_error("flagstat", "error reading from \"%s\"", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     sam_hdr_destroy(header);
     sam_close(fp);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/bamtk.c b/samtools/bamtk.c
index a6959f9..93e6468 100644
--- a/samtools/bamtk.c
+++ b/samtools/bamtk.c
@@ -1,6 +1,6 @@
 /*  bamtk.c -- main samtools command front-end.
 
-    Copyright (C) 2008-2019 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -30,6 +30,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <string.h>
 
 #include "htslib/hts.h"
+#include "htslib/hfile.h"
 #include "samtools.h"
 #include "version.h"
 
@@ -46,7 +47,6 @@ int bam_fillmd(int argc, char *argv[]);
 int bam_idxstats(int argc, char *argv[]);
 int bam_markdup(int argc, char *argv[]);
 int main_samview(int argc, char *argv[]);
-int main_import(int argc, char *argv[]);
 int main_reheader(int argc, char *argv[]);
 int main_cut_target(int argc, char *argv[]);
 int main_phase(int argc, char *argv[]);
@@ -65,12 +65,78 @@ int main_addreplacerg(int argc, char *argv[]);
 int faidx_main(int argc, char *argv[]);
 int dict_main(int argc, char *argv[]);
 int fqidx_main(int argc, char *argv[]);
+int amplicon_clip_main(int argc, char *argv[]);
+int main_ampliconstats(int argc, char *argv[]);
+int main_import(int argc, char *argv[]);
 
 const char *samtools_version()
 {
     return SAMTOOLS_VERSION;
 }
 
+// These come out of the config.h file built by autoconf or Makefile
+const char *samtools_feature_string(void) {
+    const char *fmt =
+
+#ifdef PACKAGE_URL
+    "build=configure "
+#else
+    "build=Makefile "
+#endif
+
+#ifdef HAVE_CURSES
+    "curses=yes "
+#else
+    "curses=no "
+#endif
+    ;
+
+    return fmt;
+}
+
+static void long_version(void) {
+    printf("samtools %s\n"
+           "Using htslib %s\n"
+           "Copyright (C) 2021 Genome Research Ltd.\n",
+           samtools_version(), hts_version());
+
+    printf("\nSamtools compilation details:\n");
+    printf("    Features:       %s\n", samtools_feature_string());
+    printf("    CC:             %s\n", SAMTOOLS_CC);
+    printf("    CPPFLAGS:       %s\n", SAMTOOLS_CPPFLAGS);
+    printf("    CFLAGS:         %s\n", SAMTOOLS_CFLAGS);
+    printf("    LDFLAGS:        %s\n", SAMTOOLS_LDFLAGS);
+    printf("    HTSDIR:         %s\n", SAMTOOLS_HTSDIR);
+    printf("    LIBS:           %s\n", SAMTOOLS_LIBS);
+    printf("    CURSES_LIB:     %s\n", SAMTOOLS_CURSES_LIB);
+
+    printf("\nHTSlib compilation details:\n");
+    printf("    Features:       %s\n", hts_feature_string());
+    printf("    CC:             %s\n", hts_test_feature(HTS_FEATURE_CC));
+    printf("    CPPFLAGS:       %s\n", hts_test_feature(HTS_FEATURE_CPPFLAGS));
+    printf("    CFLAGS:         %s\n", hts_test_feature(HTS_FEATURE_CFLAGS));
+    printf("    LDFLAGS:        %s\n", hts_test_feature(HTS_FEATURE_LDFLAGS));
+
+    // Plugins and schemes
+    printf("\nHTSlib URL scheme handlers present:\n");
+    const char *plugins[100];
+    int np = 100, i, j;
+
+    if (hfile_list_plugins(plugins, &np) < 0)
+        return;
+
+    for (i = 0; i < np; i++) {
+        const char *sc_list[100];
+        int nschemes = 100;
+        if (hfile_list_schemes(plugins[i], sc_list, &nschemes) < 0)
+            return;
+
+        printf("    %s:\t", plugins[i]);
+        for (j = 0; j < nschemes; j++)
+            printf(" %s%c", sc_list[j], ",\n"[j+1==nschemes]);
+    }
+}
+
 static void usage(FILE *fp)
 {
     /* Please improve the grouping */
@@ -96,6 +162,7 @@ static void usage(FILE *fp)
 "     targetcut      cut fosmid regions (for fosmid pool only)\n"
 "     addreplacerg   adds or replaces RG tags\n"
 "     markdup        mark duplicates\n"
+"     ampliconclip   clip oligos from the end of reads\n"
 "\n"
 "  -- File operations\n"
 "     collate        shuffle and group alignments by name\n"
@@ -107,6 +174,7 @@ static void usage(FILE *fp)
 "     quickcheck     quickly check if SAM/BAM/CRAM file appears intact\n"
 "     fastq          converts a BAM to a FASTQ\n"
 "     fasta          converts a BAM to a FASTA\n"
+"     import         Converts FASTA or FASTQ files to SAM/BAM/CRAM\n"
 "\n"
 "  -- Statistics\n"
 "     bedcov         read depth per BED region\n"
@@ -116,19 +184,18 @@ static void usage(FILE *fp)
 "     idxstats       BAM index stats\n"
 "     phase          phase heterozygotes\n"
 "     stats          generate stats (former bamcheck)\n"
+"     ampliconstats  generate amplicon specific stats\n"
 "\n"
 "  -- Viewing\n"
 "     flags          explain BAM flags\n"
 "     tview          text alignment viewer\n"
 "     view           SAM<->BAM<->CRAM conversion\n"
 "     depad          convert padded BAM to unpadded BAM\n"
+"\n"
+"  -- Misc\n"
+"     help [cmd]     display this help message or help for [cmd]\n"
+"     version        detailed version information\n"
 "\n");
-#ifdef _WIN32
-    fprintf(fp,
-"Note: The Windows version of SAMtools is mainly designed for read-only\n"
-"      operations, such as viewing the alignments and generating the pileup.\n"
-"      Binary files generated by the Windows version may be buggy.\n\n");
-#endif
 }
 
 // This is a tricky one, but on Windows the filename wildcard expansion is done by
@@ -176,6 +243,7 @@ int main(int argc, char *argv[])
     else if (strcmp(argv[1], "fixmate") == 0)   ret = bam_mating(argc-1, argv+1);
     else if (strcmp(argv[1], "rmdup") == 0)     ret = bam_rmdup(argc-1, argv+1);
     else if (strcmp(argv[1], "markdup") == 0)   ret = bam_markdup(argc-1, argv+1);
+    else if (strcmp(argv[1], "ampliconclip") == 0) ret = amplicon_clip_main(argc-1, argv+1);
     else if (strcmp(argv[1], "flagstat") == 0 ||
              strcmp(argv[1], "flagstats") == 0) ret = bam_flagstat(argc-1, argv+1);
     else if (strcmp(argv[1], "calmd") == 0)     ret = bam_fillmd(argc-1, argv+1);
@@ -206,12 +274,10 @@ int main(int argc, char *argv[])
         return 1;
     }
     else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1);
-    else if (strcmp(argv[1], "--version") == 0) {
-        printf(
-"samtools %s\n"
-"Using htslib %s\n"
-"Copyright (C) 2019 Genome Research Ltd.\n",
-               samtools_version(), hts_version());
+    else if (strcmp(argv[1], "ampliconstats") == 0)     ret = main_ampliconstats(argc-1, argv+1);
+    else if (strcmp(argv[1], "version") == 0 || \
+             strcmp(argv[1], "--version") == 0) {
+        long_version();
     }
     else if (strcmp(argv[1], "--version-only") == 0) {
         printf("%s+htslib-%s\n", samtools_version(), hts_version());
diff --git a/samtools/bamtk.c.pysam.c b/samtools/bamtk.c.pysam.c
index 91c29b8..dfb2cdd 100644
--- a/samtools/bamtk.c.pysam.c
+++ b/samtools/bamtk.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  bamtk.c -- main samtools command front-end.
 
-    Copyright (C) 2008-2019 Genome Research Ltd.
+    Copyright (C) 2008-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -32,8 +32,10 @@ DEALINGS IN THE SOFTWARE.  */
 #include <string.h>
 
 #include "htslib/hts.h"
+#include "htslib/hfile.h"
 #include "samtools.h"
 #include "version.h"
+#include "samtools_config_vars.h"
 
 int bam_taf2baf(int argc, char *argv[]);
 int bam_mpileup(int argc, char *argv[]);
@@ -48,8 +50,7 @@ int bam_fillmd(int argc, char *argv[]);
 int bam_idxstats(int argc, char *argv[]);
 int bam_markdup(int argc, char *argv[]);
 int main_samview(int argc, char *argv[]);
-int main_import(int argc, char *argv[]);
-int main_reheader(int argc, char *argv[]);
+int samtools_main_reheader(int argc, char *argv[]);
 int main_cut_target(int argc, char *argv[]);
 int main_phase(int argc, char *argv[]);
 int main_cat(int argc, char *argv[]);
@@ -67,12 +68,78 @@ int main_addreplacerg(int argc, char *argv[]);
 int faidx_main(int argc, char *argv[]);
 int dict_main(int argc, char *argv[]);
 int fqidx_main(int argc, char *argv[]);
+int amplicon_clip_main(int argc, char *argv[]);
+int main_ampliconstats(int argc, char *argv[]);
+int main_import(int argc, char *argv[]);
 
 const char *samtools_version()
 {
     return SAMTOOLS_VERSION;
 }
 
+// These come out of the config.h file built by autoconf or Makefile
+const char *samtools_feature_string(void) {
+    const char *fmt =
+
+#ifdef PACKAGE_URL
+    "build=configure "
+#else
+    "build=Makefile "
+#endif
+
+#ifdef HAVE_CURSES
+    "curses=yes "
+#else
+    "curses=no "
+#endif
+    ;
+
+    return fmt;
+}
+
+static void long_version(void) {
+    fprintf(samtools_stdout, "samtools %s\n"
+           "Using htslib %s\n"
+           "Copyright (C) 2021 Genome Research Ltd.\n",
+           samtools_version(), hts_version());
+
+    fprintf(samtools_stdout, "\nSamtools compilation details:\n");
+    fprintf(samtools_stdout, "    Features:       %s\n", samtools_feature_string());
+    fprintf(samtools_stdout, "    CC:             %s\n", SAMTOOLS_CC);
+    fprintf(samtools_stdout, "    CPPFLAGS:       %s\n", SAMTOOLS_CPPFLAGS);
+    fprintf(samtools_stdout, "    CFLAGS:         %s\n", SAMTOOLS_CFLAGS);
+    fprintf(samtools_stdout, "    LDFLAGS:        %s\n", SAMTOOLS_LDFLAGS);
+    fprintf(samtools_stdout, "    HTSDIR:         %s\n", SAMTOOLS_HTSDIR);
+    fprintf(samtools_stdout, "    LIBS:           %s\n", SAMTOOLS_LIBS);
+    fprintf(samtools_stdout, "    CURSES_LIB:     %s\n", SAMTOOLS_CURSES_LIB);
+
+    fprintf(samtools_stdout, "\nHTSlib compilation details:\n");
+    fprintf(samtools_stdout, "    Features:       %s\n", hts_feature_string());
+    fprintf(samtools_stdout, "    CC:             %s\n", hts_test_feature(HTS_FEATURE_CC));
+    fprintf(samtools_stdout, "    CPPFLAGS:       %s\n", hts_test_feature(HTS_FEATURE_CPPFLAGS));
+    fprintf(samtools_stdout, "    CFLAGS:         %s\n", hts_test_feature(HTS_FEATURE_CFLAGS));
+    fprintf(samtools_stdout, "    LDFLAGS:        %s\n", hts_test_feature(HTS_FEATURE_LDFLAGS));
+
+    // Plugins and schemes
+    fprintf(samtools_stdout, "\nHTSlib URL scheme handlers present:\n");
+    const char *plugins[100];
+    int np = 100, i, j;
+
+    if (hfile_list_plugins(plugins, &np) < 0)
+        return;
+
+    for (i = 0; i < np; i++) {
+        const char *sc_list[100];
+        int nschemes = 100;
+        if (hfile_list_schemes(plugins[i], sc_list, &nschemes) < 0)
+            return;
+
+        fprintf(samtools_stdout, "    %s:\t", plugins[i]);
+        for (j = 0; j < nschemes; j++)
+            fprintf(samtools_stdout, " %s%c", sc_list[j], ",\n"[j+1==nschemes]);
+    }
+}
+
 static void usage(FILE *fp)
 {
     /* Please improve the grouping */
@@ -98,6 +165,7 @@ static void usage(FILE *fp)
 "     targetcut      cut fosmid regions (for fosmid pool only)\n"
 "     addreplacerg   adds or replaces RG tags\n"
 "     markdup        mark duplicates\n"
+"     ampliconclip   clip oligos from the end of reads\n"
 "\n"
 "  -- File operations\n"
 "     collate        shuffle and group alignments by name\n"
@@ -109,6 +177,7 @@ static void usage(FILE *fp)
 "     quickcheck     quickly check if SAM/BAM/CRAM file appears intact\n"
 "     fastq          converts a BAM to a FASTQ\n"
 "     fasta          converts a BAM to a FASTA\n"
+"     import         Converts FASTA or FASTQ files to SAM/BAM/CRAM\n"
 "\n"
 "  -- Statistics\n"
 "     bedcov         read depth per BED region\n"
@@ -118,19 +187,18 @@ static void usage(FILE *fp)
 "     idxstats       BAM index stats\n"
 "     phase          phase heterozygotes\n"
 "     stats          generate stats (former bamcheck)\n"
+"     ampliconstats  generate amplicon specific stats\n"
 "\n"
 "  -- Viewing\n"
 "     flags          explain BAM flags\n"
 "     tview          text alignment viewer\n"
 "     view           SAM<->BAM<->CRAM conversion\n"
 "     depad          convert padded BAM to unpadded BAM\n"
+"\n"
+"  -- Misc\n"
+"     help [cmd]     display this help message or help for [cmd]\n"
+"     version        detailed version information\n"
 "\n");
-#ifdef _WIN32
-    fprintf(fp,
-"Note: The Windows version of SAMtools is mainly designed for read-only\n"
-"      operations, such as viewing the alignments and generating the pileup.\n"
-"      Binary files generated by the Windows version may be buggy.\n\n");
-#endif
 }
 
 // This is a tricky one, but on Windows the filename wildcard expansion is done by
@@ -178,11 +246,12 @@ int samtools_main(int argc, char *argv[])
     else if (strcmp(argv[1], "fixmate") == 0)   ret = bam_mating(argc-1, argv+1);
     else if (strcmp(argv[1], "rmdup") == 0)     ret = bam_rmdup(argc-1, argv+1);
     else if (strcmp(argv[1], "markdup") == 0)   ret = bam_markdup(argc-1, argv+1);
+    else if (strcmp(argv[1], "ampliconclip") == 0) ret = amplicon_clip_main(argc-1, argv+1);
     else if (strcmp(argv[1], "flagstat") == 0 ||
              strcmp(argv[1], "flagstats") == 0) ret = bam_flagstat(argc-1, argv+1);
     else if (strcmp(argv[1], "calmd") == 0)     ret = bam_fillmd(argc-1, argv+1);
     else if (strcmp(argv[1], "fillmd") == 0)    ret = bam_fillmd(argc-1, argv+1);
-    else if (strcmp(argv[1], "reheader") == 0)  ret = main_reheader(argc-1, argv+1);
+    else if (strcmp(argv[1], "reheader") == 0)  ret = samtools_main_reheader(argc-1, argv+1);
     else if (strcmp(argv[1], "cat") == 0)       ret = main_cat(argc-1, argv+1);
     else if (strcmp(argv[1], "targetcut") == 0) ret = main_cut_target(argc-1, argv+1);
     else if (strcmp(argv[1], "phase") == 0)     ret = main_phase(argc-1, argv+1);
@@ -208,12 +277,10 @@ int samtools_main(int argc, char *argv[])
         return 1;
     }
     //else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1);
-    else if (strcmp(argv[1], "--version") == 0) {
-        fprintf(samtools_stdout, 
-"samtools %s\n"
-"Using htslib %s\n"
-"Copyright (C) 2019 Genome Research Ltd.\n",
-               samtools_version(), hts_version());
+    else if (strcmp(argv[1], "ampliconstats") == 0)     ret = main_ampliconstats(argc-1, argv+1);
+    else if (strcmp(argv[1], "version") == 0 || \
+             strcmp(argv[1], "--version") == 0) {
+        long_version();
     }
     else if (strcmp(argv[1], "--version-only") == 0) {
         fprintf(samtools_stdout, "%s+htslib-%s\n", samtools_version(), hts_version());
diff --git a/samtools/bedcov.c b/samtools/bedcov.c
index a36d672..bccc09b 100644
--- a/samtools/bedcov.c
+++ b/samtools/bedcov.c
@@ -1,7 +1,7 @@
 /*  bedcov.c -- bedcov subcommand.
 
     Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013-2014, 2018, 2019 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2018-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -40,11 +40,14 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/kseq.h"
 KSTREAM_INIT(gzFile, gzread, 16384)
 
+#define DEFAULT_DEPTH 64000
+
 typedef struct {
     htsFile *fp;
     sam_hdr_t *header;
     hts_itr_t *iter;
     int min_mapQ;
+    uint32_t flags;  // read filtering flags
 } aux_t;
 
 static int read_bam(void *data, bam1_t *b)
@@ -55,7 +58,7 @@ static int read_bam(void *data, bam1_t *b)
     {
         ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->header, b);
         if ( ret<0 ) break;
-        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) ) continue;
+        if ( b->core.flag & aux->flags ) continue;
         if ( (int)b->core.qual < aux->min_mapQ ) continue;
         break;
     }
@@ -69,10 +72,12 @@ int main_bedcov(int argc, char *argv[])
     kstream_t *ks;
     hts_idx_t **idx;
     aux_t **aux;
-    int *n_plp, dret, i, j, m, n, c, min_mapQ = 0, skip_DN = 0;
-    int64_t *cnt;
+    int *n_plp, dret, i, j, m, n, c, ret, status = 0, min_mapQ = 0, skip_DN = 0;
+    int64_t *cnt, *pcov = NULL;;
     const bam_pileup1_t **plp;
     int usage = 0, has_index_file = 0;
+    uint32_t flags = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);
+    int tflags = 0, min_depth = -1;
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
@@ -80,11 +85,28 @@ int main_bedcov(int argc, char *argv[])
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "Q:Xj", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Q:Xg:G:jd:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'Q': min_mapQ = atoi(optarg); break;
         case 'X': has_index_file = 1; break;
+        case 'g':
+            tflags = bam_str2flag(optarg);
+            if (tflags < 0 || tflags > ((BAM_FSUPPLEMENTARY << 1) - 1)) {
+                print_error("bedcov", "Flag value \"%s\" is not supported", optarg);
+                return 1;
+            }
+            flags &= ~tflags;
+            break;
+        case 'G':
+            tflags = bam_str2flag(optarg);
+            if (tflags < 0 || tflags > ((BAM_FSUPPLEMENTARY << 1) - 1)) {
+                print_error("bedcov", "Flag value \"%s\" is not supported", optarg);
+                return 1;
+            }
+            flags |= tflags;
+            break;
         case 'j': skip_DN = 1; break;
+        case 'd': min_depth = atoi(optarg); break;
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                   /* else fall-through */
         case '?': usage = 1; break;
@@ -96,7 +118,12 @@ int main_bedcov(int argc, char *argv[])
         fprintf(stderr, "Options:\n");
         fprintf(stderr, "      -Q <int>            mapping quality threshold [0]\n");
         fprintf(stderr, "      -X                  use customized index files\n");
+        fprintf(stderr, "      -g <flags>          remove the specified flags from the set used to filter out reads\n");
+        fprintf(stderr, "      -G <flags>          add the specified flags to the set used to filter out reads\n"
+                        "                          The default set is UNMAP,SECONDARY,QCFAIL,DUP or 0x704");
         fprintf(stderr, "      -j                  do not include deletions (D) and ref skips (N) in bedcov computation\n");
+        fprintf(stderr, "      -d <int>            depth threshold. Number of reference bases with coverage above and"
+                        "                          including this value will be displayed in a separate column\n");
         sam_global_opt_help(stderr, "-.--.--.");
         return 1;
     }
@@ -136,8 +163,11 @@ int main_bedcov(int argc, char *argv[])
                     argv[i+optind+1]);
             return 2;
         }
+        aux[i]->flags = flags;
     }
-    cnt = calloc(n, 8);
+    cnt = calloc(n, sizeof(*cnt));
+    if (min_depth >= 0) pcov = calloc(n, sizeof(*pcov));
+    if (!cnt || (min_depth >= 0 && !pcov)) return 2;
 
     fp = gzopen(argv[optind], "rb");
     if (fp == NULL) {
@@ -149,7 +179,8 @@ int main_bedcov(int argc, char *argv[])
     plp = calloc(n, sizeof(bam_pileup1_t*));
     while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
         char *p, *q;
-        int tid, beg, end, pos;
+        int tid, pos, num = 0;
+        int64_t beg = 0, end = 0;
         bam_mplp_t mplp;
 
         if (str.l == 0 || *str.s == '#') continue; /* empty or comment line */
@@ -158,53 +189,75 @@ int main_bedcov(int argc, char *argv[])
            be followed by a tab in that case). */
         if (strncmp(str.s, "track ", 6) == 0) continue;
         if (strncmp(str.s, "browser ", 8) == 0) continue;
-        for (p = q = str.s; *p && *p != '\t'; ++p);
-        if (*p != '\t') goto bed_error;
-        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t';
+        for (p = q = str.s; *p && !isspace(*p); ++p);
+        if (*p == 0) goto bed_error;
+        char c = *p;
+        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = c;
         if (tid < 0) goto bed_error;
-        for (q = p = p + 1; isdigit(*p); ++p);
-        if (*p != '\t') goto bed_error;
-        *p = 0; beg = atoi(q); *p = '\t';
-        for (q = p = p + 1; isdigit(*p); ++p);
-        if (*p == '\t' || *p == 0) {
-            int c = *p;
-            *p = 0; end = atoi(q); *p = c;
-        } else goto bed_error;
+        num = sscanf(p + 1, "%"SCNd64" %"SCNd64, &beg, &end);
+        if (num < 2 || end < beg) goto bed_error;
 
         for (i = 0; i < n; ++i) {
             if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
             aux[i]->iter = sam_itr_queryi(idx[i], tid, beg, end);
         }
+
         mplp = bam_mplp_init(n, read_bam, (void**)aux);
-        bam_mplp_set_maxcnt(mplp, 64000);
-        memset(cnt, 0, 8 * n);
-        while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
+        if (min_depth > DEFAULT_DEPTH)
+            bam_mplp_set_maxcnt(mplp, min_depth);
+        else
+            bam_mplp_set_maxcnt(mplp, DEFAULT_DEPTH);
+
+        memset(cnt, 0, sizeof(*cnt) * n);
+        if (min_depth >= 0) memset(pcov, 0, sizeof(*pcov) * n);
+
+        while ((ret = bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0)
             if (pos >= beg && pos < end) {
-                for (i = 0, m = 0; i < n; ++i) {
-                    if (skip_DN)
+                for (i = 0; i < n; ++i) {
+                    m = 0;
+                    if (skip_DN || min_depth >= 0) {
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *pi = plp[i] + j;
                             if (pi->is_del || pi->is_refskip) ++m;
                         }
-                    cnt[i] += n_plp[i] - m;
+                    }
+                    int pd = n_plp[i] - m;
+                    cnt[i] += pd;
+                    if (min_depth >= 0 && pd >= min_depth) pcov[i]++;
                 }
             }
+
+        if (ret < 0) {
+            print_error("bedcov", "error reading from input file");
+            status = 2;
+            bam_mplp_destroy(mplp);
+            break;
+        }
+
         for (i = 0; i < n; ++i) {
             kputc('\t', &str);
             kputl(cnt[i], &str);
         }
+        if (min_depth >= 0) {
+            for (i = 0; i < n; ++i) {
+                kputc('\t', &str);
+                kputl(pcov[i], &str);
+            }
+        }
         puts(str.s);
         bam_mplp_destroy(mplp);
         continue;
 
 bed_error:
         fprintf(stderr, "Errors in BED line '%s'\n", str.s);
+        status = 2;
     }
     free(n_plp); free(plp);
     ks_destroy(ks);
     gzclose(fp);
 
     free(cnt);
+    free(pcov);
     for (i = 0; i < n; ++i) {
         if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
         hts_idx_destroy(idx[i]);
@@ -215,5 +268,5 @@ bed_error:
     free(aux); free(idx);
     free(str.s);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/bedcov.c.pysam.c b/samtools/bedcov.c.pysam.c
index 82b63aa..b72cbf1 100644
--- a/samtools/bedcov.c.pysam.c
+++ b/samtools/bedcov.c.pysam.c
@@ -3,7 +3,7 @@
 /*  bedcov.c -- bedcov subcommand.
 
     Copyright (C) 2012 Broad Institute.
-    Copyright (C) 2013-2014, 2018, 2019 Genome Research Ltd.
+    Copyright (C) 2013-2014, 2018-2021 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -42,11 +42,14 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/kseq.h"
 KSTREAM_INIT(gzFile, gzread, 16384)
 
+#define DEFAULT_DEPTH 64000
+
 typedef struct {
     htsFile *fp;
     sam_hdr_t *header;
     hts_itr_t *iter;
     int min_mapQ;
+    uint32_t flags;  // read filtering flags
 } aux_t;
 
 static int read_bam(void *data, bam1_t *b)
@@ -57,7 +60,7 @@ static int read_bam(void *data, bam1_t *b)
     {
         ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->header, b);
         if ( ret<0 ) break;
-        if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) ) continue;
+        if ( b->core.flag & aux->flags ) continue;
         if ( (int)b->core.qual < aux->min_mapQ ) continue;
         break;
     }
@@ -71,10 +74,12 @@ int main_bedcov(int argc, char *argv[])
     kstream_t *ks;
     hts_idx_t **idx;
     aux_t **aux;
-    int *n_plp, dret, i, j, m, n, c, min_mapQ = 0, skip_DN = 0;
-    int64_t *cnt;
+    int *n_plp, dret, i, j, m, n, c, ret, status = 0, min_mapQ = 0, skip_DN = 0;
+    int64_t *cnt, *pcov = NULL;;
     const bam_pileup1_t **plp;
     int usage = 0, has_index_file = 0;
+    uint32_t flags = (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP);
+    int tflags = 0, min_depth = -1;
 
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     static const struct option lopts[] = {
@@ -82,11 +87,28 @@ int main_bedcov(int argc, char *argv[])
         { NULL, 0, NULL, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "Q:Xj", lopts, NULL)) >= 0) {
+    while ((c = getopt_long(argc, argv, "Q:Xg:G:jd:", lopts, NULL)) >= 0) {
         switch (c) {
         case 'Q': min_mapQ = atoi(optarg); break;
         case 'X': has_index_file = 1; break;
+        case 'g':
+            tflags = bam_str2flag(optarg);
+            if (tflags < 0 || tflags > ((BAM_FSUPPLEMENTARY << 1) - 1)) {
+                print_error("bedcov", "Flag value \"%s\" is not supported", optarg);
+                return 1;
+            }
+            flags &= ~tflags;
+            break;
+        case 'G':
+            tflags = bam_str2flag(optarg);
+            if (tflags < 0 || tflags > ((BAM_FSUPPLEMENTARY << 1) - 1)) {
+                print_error("bedcov", "Flag value \"%s\" is not supported", optarg);
+                return 1;
+            }
+            flags |= tflags;
+            break;
         case 'j': skip_DN = 1; break;
+        case 'd': min_depth = atoi(optarg); break;
         default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
                   /* else fall-through */
         case '?': usage = 1; break;
@@ -98,7 +120,12 @@ int main_bedcov(int argc, char *argv[])
         fprintf(samtools_stderr, "Options:\n");
         fprintf(samtools_stderr, "      -Q <int>            mapping quality threshold [0]\n");
         fprintf(samtools_stderr, "      -X                  use customized index files\n");
+        fprintf(samtools_stderr, "      -g <flags>          remove the specified flags from the set used to filter out reads\n");
+        fprintf(samtools_stderr, "      -G <flags>          add the specified flags to the set used to filter out reads\n"
+                        "                          The default set is UNMAP,SECONDARY,QCFAIL,DUP or 0x704");
         fprintf(samtools_stderr, "      -j                  do not include deletions (D) and ref skips (N) in bedcov computation\n");
+        fprintf(samtools_stderr, "      -d <int>            depth threshold. Number of reference bases with coverage above and"
+                        "                          including this value will be displayed in a separate column\n");
         sam_global_opt_help(samtools_stderr, "-.--.--.");
         return 1;
     }
@@ -138,8 +165,11 @@ int main_bedcov(int argc, char *argv[])
                     argv[i+optind+1]);
             return 2;
         }
+        aux[i]->flags = flags;
     }
-    cnt = calloc(n, 8);
+    cnt = calloc(n, sizeof(*cnt));
+    if (min_depth >= 0) pcov = calloc(n, sizeof(*pcov));
+    if (!cnt || (min_depth >= 0 && !pcov)) return 2;
 
     fp = gzopen(argv[optind], "rb");
     if (fp == NULL) {
@@ -151,7 +181,8 @@ int main_bedcov(int argc, char *argv[])
     plp = calloc(n, sizeof(bam_pileup1_t*));
     while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
         char *p, *q;
-        int tid, beg, end, pos;
+        int tid, pos, num = 0;
+        int64_t beg = 0, end = 0;
         bam_mplp_t mplp;
 
         if (str.l == 0 || *str.s == '#') continue; /* empty or comment line */
@@ -160,53 +191,75 @@ int main_bedcov(int argc, char *argv[])
            be followed by a tab in that case). */
         if (strncmp(str.s, "track ", 6) == 0) continue;
         if (strncmp(str.s, "browser ", 8) == 0) continue;
-        for (p = q = str.s; *p && *p != '\t'; ++p);
-        if (*p != '\t') goto bed_error;
-        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t';
+        for (p = q = str.s; *p && !isspace(*p); ++p);
+        if (*p == 0) goto bed_error;
+        char c = *p;
+        *p = 0; tid = bam_name2id(aux[0]->header, q); *p = c;
         if (tid < 0) goto bed_error;
-        for (q = p = p + 1; isdigit(*p); ++p);
-        if (*p != '\t') goto bed_error;
-        *p = 0; beg = atoi(q); *p = '\t';
-        for (q = p = p + 1; isdigit(*p); ++p);
-        if (*p == '\t' || *p == 0) {
-            int c = *p;
-            *p = 0; end = atoi(q); *p = c;
-        } else goto bed_error;
+        num = sscanf(p + 1, "%"SCNd64" %"SCNd64, &beg, &end);
+        if (num < 2 || end < beg) goto bed_error;
 
         for (i = 0; i < n; ++i) {
             if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
             aux[i]->iter = sam_itr_queryi(idx[i], tid, beg, end);
         }
+
         mplp = bam_mplp_init(n, read_bam, (void**)aux);
-        bam_mplp_set_maxcnt(mplp, 64000);
-        memset(cnt, 0, 8 * n);
-        while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
+        if (min_depth > DEFAULT_DEPTH)
+            bam_mplp_set_maxcnt(mplp, min_depth);
+        else
+            bam_mplp_set_maxcnt(mplp, DEFAULT_DEPTH);
+
+        memset(cnt, 0, sizeof(*cnt) * n);
+        if (min_depth >= 0) memset(pcov, 0, sizeof(*pcov) * n);
+
+        while ((ret = bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0)
             if (pos >= beg && pos < end) {
-                for (i = 0, m = 0; i < n; ++i) {
-                    if (skip_DN)
+                for (i = 0; i < n; ++i) {
+                    m = 0;
+                    if (skip_DN || min_depth >= 0) {
                         for (j = 0; j < n_plp[i]; ++j) {
                             const bam_pileup1_t *pi = plp[i] + j;
                             if (pi->is_del || pi->is_refskip) ++m;
                         }
-                    cnt[i] += n_plp[i] - m;
+                    }
+                    int pd = n_plp[i] - m;
+                    cnt[i] += pd;
+                    if (min_depth >= 0 && pd >= min_depth) pcov[i]++;
                 }
             }
+
+        if (ret < 0) {
+            print_error("bedcov", "error reading from input file");
+            status = 2;
+            bam_mplp_destroy(mplp);
+            break;
+        }
+
         for (i = 0; i < n; ++i) {
             kputc('\t', &str);
             kputl(cnt[i], &str);
         }
+        if (min_depth >= 0) {
+            for (i = 0; i < n; ++i) {
+                kputc('\t', &str);
+                kputl(pcov[i], &str);
+            }
+        }
         samtools_puts(str.s);
         bam_mplp_destroy(mplp);
         continue;
 
 bed_error:
         fprintf(samtools_stderr, "Errors in BED line '%s'\n", str.s);
+        status = 2;
     }
     free(n_plp); free(plp);
     ks_destroy(ks);
     gzclose(fp);
 
     free(cnt);
+    free(pcov);
     for (i = 0; i < n; ++i) {
         if (aux[i]->iter) hts_itr_destroy(aux[i]->iter);
         hts_idx_destroy(idx[i]);
@@ -217,5 +270,5 @@ bed_error:
     free(aux); free(idx);
     free(str.s);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/bedidx.c b/samtools/bedidx.c
index ded2314..6b22d4e 100644
--- a/samtools/bedidx.c
+++ b/samtools/bedidx.c
@@ -573,6 +573,14 @@ const char* bed_get(void *reg_hash, int i, int filter) {
     return kh_key(h, i);
 }
 
+/**
+ * Create a region list from a the region hash table
+ * @param  reg_hash  The region hash table
+ * @param  filter    0 - allow all regions, 1 - allow only selected regions
+ * @param  n_reg     Pointer to the returned region number
+ * @return           The regions list as a hts_reglist_t
+ */
+
 hts_reglist_t *bed_reglist(void *reg_hash, int filter, int *n_reg) {
 
     reghash_t *h;
diff --git a/samtools/bedidx.c.pysam.c b/samtools/bedidx.c.pysam.c
index 027e08e..533b42a 100644
--- a/samtools/bedidx.c.pysam.c
+++ b/samtools/bedidx.c.pysam.c
@@ -575,6 +575,14 @@ const char* bed_get(void *reg_hash, int i, int filter) {
     return kh_key(h, i);
 }
 
+/**
+ * Create a region list from a the region hash table
+ * @param  reg_hash  The region hash table
+ * @param  filter    0 - allow all regions, 1 - allow only selected regions
+ * @param  n_reg     Pointer to the returned region number
+ * @return           The regions list as a hts_reglist_t
+ */
+
 hts_reglist_t *bed_reglist(void *reg_hash, int filter, int *n_reg) {
 
     reghash_t *h;
diff --git a/samtools/coverage.c b/samtools/coverage.c
index c4f38de..cab1f8b 100644
--- a/samtools/coverage.c
+++ b/samtools/coverage.c
@@ -1,7 +1,7 @@
 /* coverage.c -- samtools coverage subcommand
 
     Copyright (C) 2018,2019 Florian Breitwieser
-    Portions copyright (C) 2019 Genome Research Ltd.
+    Portions copyright (C) 2019-2021 Genome Research Ltd.
 
     Author: Florian P Breitwieser <florian.bw@gmail.com>
 
@@ -24,7 +24,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
 /* This program calculates coverage from multiple BAMs
- * simutaneously, to achieve random access and to use the BED interface.
+ * simultaneously, to achieve random access and to use the BED interface.
  * To compile this program separately, you may:
  *
  *   gcc -g -O2 -Wall -o bamcov -D_MAIN_BAMCOV coverage.c -lhts -lz
@@ -57,19 +57,6 @@ DEALINGS IN THE SOFTWARE.  */
 
 const char *VERSION = "0.1";
 
-typedef struct {  // auxiliary data structure to hold a BAM file
-    samFile *fp;     // file handle
-    sam_hdr_t *hdr;  // file header
-    hts_itr_t *iter; // iterator to a region - NULL for us by default
-    int min_mapQ;    // mapQ filter
-    int min_len;     // length filter
-    unsigned int n_reads;  // records the number of reads seen in file
-    unsigned int n_selected_reads; // records the number of reads passing filter
-    unsigned long summed_mapQ; // summed mapQ of all reads passing filter
-    int fail_flags;
-    int required_flags;
-} bam_aux_t;
-
 typedef struct {  // auxiliary data structure to hold stats on coverage
     unsigned long long n_covered_bases;
     unsigned long long summed_coverage;
@@ -77,12 +64,23 @@ typedef struct {  // auxiliary data structure to hold stats on coverage
     unsigned long long summed_mapQ;
     unsigned int n_reads;
     unsigned int n_selected_reads;
-    int32_t tid;    // chromosome ID, defined by header
+    bool covered;
     hts_pos_t beg;
     hts_pos_t end;
     int64_t bin_width;
 } stats_aux_t;
 
+typedef struct {  // auxiliary data structure to hold a BAM file
+    samFile *fp;     // file handle
+    sam_hdr_t *hdr;  // file header
+    hts_itr_t *iter; // iterator to a region - NULL for us by default
+    int min_mapQ;    // mapQ filter
+    int min_len;     // length filter
+    int fail_flags;
+    int required_flags;
+    stats_aux_t *stats;
+} bam_aux_t;
+
 #if __STDC_VERSION__ >= 199901L
 #define VERTICAL_LINE "\u2502" // BOX DRAWINGS LIGHT VERTICAL
 
@@ -91,7 +89,7 @@ typedef struct {  // auxiliary data structure to hold stats on coverage
 // LOWER ONE EIGHTH BLOCK â¦ FULL BLOCK
 static const char *const BLOCK_CHARS8[8] = {"\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"};
 // In some terminals / with some fonts not all UTF8 block characters are supported (e.g. Putty). Use only half and full block for those
-static const char *const BLOCK_CHARS2[2] = {"\u2584", "\u2588"};
+static const char *const BLOCK_CHARS2[2] = {".", ":"};
 
 #else
 
@@ -102,7 +100,7 @@ static const char *const BLOCK_CHARS8[8] = {
     "\xE2\x96\x81", "\xE2\x96\x82", "\xE2\x96\x83", "\xE2\x96\x84",
     "\xE2\x96\x85", "\xE2\x96\x86", "\xE2\x96\x87", "\xE2\x96\x88" };
 
-static const char *const BLOCK_CHARS2[2] = {"\xE2\x96\x84", "\xE2\x96\x88"};
+static const char *const BLOCK_CHARS2[2] = {".", ":"};
 
 #endif
 
@@ -114,11 +112,14 @@ static int usage() {
             "Input options:\n"
             "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
             "  -l, --min-read-len INT  ignore reads shorter than INT bp [0]\n"
-            "  -q, --min-MQ INT        base quality threshold [0]\n"
-            "  -Q, --min-BQ INT        mapping quality threshold [0]\n"
+            "  -q, --min-MQ INT        mapping quality threshold [0]\n"
+            "  -Q, --min-BQ INT        base quality threshold [0]\n"
             "  --rf <int|str>          required flags: skip reads with mask bits unset []\n"
             "  --ff <int|str>          filter flags: skip reads with mask bits set \n"
             "                                      [UNMAP,SECONDARY,QCFAIL,DUP]\n"
+            "  -d, --depth INT         maximum allowed coverage depth [1000000].\n"
+            "                          If 0, depth is set to the maximum integer value,\n"
+            "                          effectively removing any depth limit.\n"
             "Output options:\n"
             "  -m, --histogram         show histogram instead of tabular output\n"
             "  -A, --ascii             show only ASCII characters in histogram\n"
@@ -171,79 +172,63 @@ static char* readable_bps(double base_pairs, char *buf) {
     return buf;
 }
 
-static void set_read_counts(bam_aux_t **data, stats_aux_t *stats, int n_bam_files) {
-    int i;
-    stats->n_reads = 0;
-    stats->n_selected_reads = 0;
-    stats->summed_mapQ = 0;
-    for (i = 0; i < n_bam_files && data[i]; ++i) {
-        stats->n_reads += data[i]->n_reads;
-        stats->n_selected_reads += data[i]->n_selected_reads;
-        stats->summed_mapQ += data[i]->summed_mapQ;
-        data[i]->n_reads = 0;
-        data[i]->n_selected_reads = 0;
-        data[i]->summed_mapQ = 0;
-    }
-}
-
 // read one alignment from one BAM file
 static int read_bam(void *data, bam1_t *b) {
     bam_aux_t *aux = (bam_aux_t*)data; // data in fact is a pointer to an auxiliary structure
+    int nref = sam_hdr_nref(aux->hdr);
     int ret;
     while (1) {
         if((ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->hdr, b)) < 0) break;
-        ++aux->n_reads;
+        if (b->core.tid >= 0 && b->core.tid < nref)
+            aux->stats[b->core.tid].n_reads++;
 
         if ( aux->fail_flags && (b->core.flag & aux->fail_flags) ) continue;
         if ( aux->required_flags && !(b->core.flag & aux->required_flags) ) continue;
         if ( b->core.qual < aux->min_mapQ ) continue;
         if ( aux->min_len && bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)) < aux->min_len ) continue;
-        ++aux->n_selected_reads;
-        aux->summed_mapQ += b->core.qual;
+        if (b->core.tid >= 0 && b->core.tid < nref) {
+            aux->stats[b->core.tid].n_selected_reads++;
+            aux->stats[b->core.tid].summed_mapQ += b->core.qual;
+        }
         break;
     }
     return ret;
 }
 
-void print_tabular_line(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats) {
-    fputs(sam_hdr_tid2name(h, stats->tid), file_out);
-    double region_len = (double) stats->end - stats->beg;
+void print_tabular_line(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, int tid) {
+    fputs(sam_hdr_tid2name(h, tid), file_out);
+    double region_len = (double) stats[tid].end - stats[tid].beg;
     fprintf(file_out, "\t%"PRId64"\t%"PRId64"\t%u\t%llu\t%g\t%g\t%.3g\t%.3g\n",
-            stats->beg+1,
-            stats->end,
-            stats->n_selected_reads,
-            stats->n_covered_bases,
-            100.0 * stats->n_covered_bases / region_len,
-            stats->summed_coverage / region_len,
-            stats->summed_coverage > 0? stats->summed_baseQ/(double) stats->summed_coverage : 0,
-            stats->n_selected_reads > 0? stats->summed_mapQ/(double) stats->n_selected_reads : 0
+            stats[tid].beg+1,
+            stats[tid].end,
+            stats[tid].n_selected_reads,
+            stats[tid].n_covered_bases,
+            100.0 * stats[tid].n_covered_bases / region_len,
+            stats[tid].summed_coverage / region_len,
+            stats[tid].summed_coverage > 0? stats[tid].summed_baseQ/(double) stats[tid].summed_coverage : 0,
+            stats[tid].n_selected_reads > 0? stats[tid].summed_mapQ/(double) stats[tid].n_selected_reads : 0
            );
 }
 
-void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, const uint32_t *hist,
+void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, int tid, const uint32_t *hist,
         const int hist_size, const bool full_utf) {
     int i, col;
     bool show_percentiles = false;
     const int n_rows = 10;
     const char * const * BLOCK_CHARS = full_utf? BLOCK_CHARS8 : BLOCK_CHARS2;
     const int blockchar_len = full_utf? 8 : 2;
-    /*
-       if (stats->beg == 0) {
-       stats->end = h->target_len[stats->tid];
-       }
-       */
-    double region_len = stats->end - stats->beg;
+    double region_len = stats[tid].end - stats[tid].beg;
 
     // Calculate histogram that contains percent covered
     double hist_data[hist_size];
     double max_val = 0.0;
     for (i = 0; i < hist_size; ++i) {
-        hist_data[i] = 100 * hist[i] / (double) stats->bin_width;
+        hist_data[i] = 100 * hist[i] / (double) stats[tid].bin_width;
         if (hist_data[i] > max_val) max_val = hist_data[i];
     }
 
     char buf[30];
-    fprintf(file_out, "%s (%sbp)\n", sam_hdr_tid2name(h, stats->tid), readable_bps(sam_hdr_tid2len(h, stats->tid), buf));
+    fprintf(file_out, "%s (%sbp)\n", sam_hdr_tid2name(h, tid), readable_bps(sam_hdr_tid2len(h, tid), buf));
 
     double row_bin_size = max_val / (double) n_rows;
     for (i = n_rows-1; i >= 0; --i) {
@@ -253,7 +238,7 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
         } else {
             fprintf(file_out, ">%7.2f%% ", current_bin);
         }
-        fprintf(file_out, VERTICAL_LINE);
+        fprintf(file_out, full_utf ? VERTICAL_LINE : "|");
         for (col = 0; col < hist_size; ++col) {
             // get the difference in eights, or halfs when full UTF8 is not supported
             int cur_val_diff = round(blockchar_len * (hist_data[col] - current_bin) / row_bin_size) - 1;
@@ -266,22 +251,22 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
                 fprintf(file_out, "%s", BLOCK_CHARS[cur_val_diff]);
             }
         }
-        fprintf(file_out, VERTICAL_LINE);
+        fprintf(file_out, full_utf ? VERTICAL_LINE : "|");
         fputc(' ', file_out);
         switch (i) {
-            case 9: fprintf(file_out, "Number of reads: %i", stats->n_selected_reads); break;
-            case 8: if (stats->n_reads - stats->n_selected_reads > 0) fprintf(file_out, "    (%i filtered)", stats->n_reads - stats->n_selected_reads); break;
-            case 7: fprintf(file_out, "Covered bases:   %sbp", readable_bps(stats->n_covered_bases, buf)); break;
+            case 9: fprintf(file_out, "Number of reads: %i", stats[tid].n_selected_reads); break;
+            case 8: if (stats[tid].n_reads - stats[tid].n_selected_reads > 0) fprintf(file_out, "    (%i filtered)", stats[tid].n_reads - stats[tid].n_selected_reads); break;
+            case 7: fprintf(file_out, "Covered bases:   %sbp", readable_bps(stats[tid].n_covered_bases, buf)); break;
             case 6: fprintf(file_out, "Percent covered: %.4g%%",
-                            100.0 * stats->n_covered_bases / region_len); break;
+                            100.0 * stats[tid].n_covered_bases / region_len); break;
             case 5: fprintf(file_out, "Mean coverage:   %.3gx",
-                            stats->summed_coverage / region_len); break;
+                            stats[tid].summed_coverage / region_len); break;
             case 4: fprintf(file_out, "Mean baseQ:      %.3g",
-                            stats->summed_baseQ/(double) stats->summed_coverage); break;
+                            stats[tid].summed_baseQ/(double) stats[tid].summed_coverage); break;
             case 3: fprintf(file_out, "Mean mapQ:       %.3g",
-                            stats->summed_mapQ/(double) stats->n_selected_reads); break;
+                            stats[tid].summed_mapQ/(double) stats[tid].n_selected_reads); break;
             case 1: fprintf(file_out, "Histo bin width: %sbp",
-                            readable_bps(stats->bin_width, buf)); break;
+                            readable_bps(stats[tid].bin_width, buf)); break;
             case 0: fprintf(file_out, "Histo max bin:   %.5g%%", max_val); break;
         };
         fputc('\n', file_out);
@@ -290,22 +275,22 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
     // print x axis. Could be made pretty for widths that are not divisible
     // by 10 by variable spacing of the labels, instead of placing a label every 10 characters
     char buf2[50];
-    fprintf(file_out, "     %s", center_text(readable_bps(stats->beg + 1, buf), buf2, 10));
+    fprintf(file_out, "     %s", center_text(readable_bps(stats[tid].beg + 1, buf), buf2, 10));
     int rest;
     for (rest = 10; rest < 10*(hist_size/10); rest += 10) {
-        fprintf(file_out, "%s", center_text(readable_bps(stats->beg + stats->bin_width*rest, buf), buf2, 10));
+        fprintf(file_out, "%s", center_text(readable_bps(stats[tid].beg + stats[tid].bin_width*rest, buf), buf2, 10));
     }
     int last_padding = hist_size%10;
-    fprintf(file_out, "%*s%s", last_padding, " ", center_text(readable_bps(stats->end, buf), buf2, 10));
+    fprintf(file_out, "%*s%s", last_padding, " ", center_text(readable_bps(stats[tid].end, buf), buf2, 10));
     fprintf(file_out, "\n");
 }
 
 int main_coverage(int argc, char *argv[]) {
     int status = EXIT_SUCCESS;
 
-    int ret, tid, pos, i, j;
+    int ret, tid = -1, old_tid = -1, pos, i, j;
 
-    int max_depth = 0;
+    int max_depth = 1000000;
     int opt_min_baseQ = 0;
     int opt_min_mapQ = 0;
     int opt_min_len = 0;
@@ -330,7 +315,6 @@ int main_coverage(int argc, char *argv[]) {
     bool opt_print_header = true;
     bool opt_print_tabular = true;
     bool opt_print_histogram = false;
-    bool *covered_tids = NULL;
     bool opt_full_utf = true;
 
     FILE *file_out = stdout;
@@ -343,7 +327,7 @@ int main_coverage(int argc, char *argv[]) {
         {"incl-flags", required_argument, NULL, 1}, // require flag
         {"excl-flags", required_argument, NULL, 2}, // filter flag
         {"bam-list", required_argument, NULL, 'b'},
-        {"min-read-len", required_argument, NULL, 'L'},
+        {"min-read-len", required_argument, NULL, 'l'},
         {"min-MQ", required_argument, NULL, 'q'},
         {"min-mq", required_argument, NULL, 'q'},
         {"min-BQ", required_argument, NULL, 'Q'},
@@ -355,13 +339,14 @@ int main_coverage(int argc, char *argv[]) {
         {"n-bins", required_argument, NULL, 'w'},
         {"region", required_argument, NULL, 'r'},
         {"help", no_argument, NULL, 'h'},
+        {"depth", required_argument, NULL, 'd'},
         { NULL, 0, NULL, 0 }
     };
 
     // parse the command line
     int c;
     opterr = 0;
-    while ((c = getopt_long(argc, argv, "Ao:L:q:Q:hHw:r:b:m", lopts, NULL)) != -1) {
+    while ((c = getopt_long(argc, argv, "Ao:l:q:Q:hHw:r:b:md:", lopts, NULL)) != -1) {
         switch (c) {
             case 1:
                 if ((required_flags = bam_str2flag(optarg)) < 0) {
@@ -372,9 +357,10 @@ int main_coverage(int argc, char *argv[]) {
                     fprintf(stderr,"Could not parse --ff %s\n", optarg); return EXIT_FAILURE;
                 }; break;
             case 'o': opt_output_file = optarg; opt_full_width = false; break;
-            case 'L': opt_min_len = atoi(optarg); break;
-            case 'q': opt_min_baseQ = atoi(optarg); break;
-            case 'Q': opt_min_mapQ = atoi(optarg); break;
+            case 'l': opt_min_len = atoi(optarg); break;
+            case 'q': opt_min_mapQ = atoi(optarg); break;
+            case 'Q': opt_min_baseQ = atoi(optarg); break;
+            case 'd': max_depth = atoi(optarg); break; // maximum coverage depth
             case 'w': opt_n_bins = atoi(optarg); opt_full_width = false;
                       opt_print_histogram = true; opt_print_tabular = false;
                       break;
@@ -427,7 +413,7 @@ int main_coverage(int argc, char *argv[]) {
             if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
                 columns = csbi.srWindow.Right - csbi.srWindow.Left + 1;
             }
-#else
+#elif defined TIOCGWINSZ
             struct winsize w;
             if (ioctl(2, TIOCGWINSZ, &w) == 0)
                 columns = w.ws_col;
@@ -460,7 +446,7 @@ int main_coverage(int argc, char *argv[]) {
 
     data = (bam_aux_t **)calloc(n_bam_files, sizeof(bam_aux_t*)); // data[i] for the i-th BAM file
     if (!data) {
-        print_error("coverage", "Failed to allocate memory");
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
@@ -469,7 +455,7 @@ int main_coverage(int argc, char *argv[]) {
         int rf;
         data[i] = (bam_aux_t *) calloc(1, sizeof(bam_aux_t));
         if (!data[i]) {
-            print_error("coverage", "Failed to allocate memory");
+            print_error_errno("coverage", "Failed to allocate memory");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
@@ -485,12 +471,12 @@ int main_coverage(int argc, char *argv[]) {
 
         // Set CRAM options on file handle - returns 0 on success
         if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-            print_error_errno("coverage", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
+            print_error("coverage", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
         if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            print_error_errno("coverage", "Failed to set CRAM_OPT_DECODE_MD value");
+            print_error("coverage", "Failed to set CRAM_OPT_DECODE_MD value");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
@@ -516,7 +502,7 @@ int main_coverage(int argc, char *argv[]) {
             data[i]->iter = sam_itr_querys(idx, data[i]->hdr, opt_reg); // set the iterator
             hts_idx_destroy(idx); // the index is not needed any more; free the memory
             if (data[i]->iter == NULL) {
-                print_error_errno("coverage", "Failed to parse region \"%s\"", opt_reg);
+                print_error("coverage", "Failed to parse region \"%s\". Check the region format or region name presence in the file \"%s\"", opt_reg, argv[optind+i]);
                 status = EXIT_FAILURE;
                 goto coverage_end;
             }
@@ -528,30 +514,30 @@ int main_coverage(int argc, char *argv[]) {
 
     h = data[0]->hdr; // easy access to the header of the 1st BAM
     int n_targets = sam_hdr_nref(h);
-    covered_tids = calloc(n_targets, sizeof(bool));
-    stats = calloc(1, sizeof(stats_aux_t));
-    if (!covered_tids || !stats) {
-        print_error("coverage", "Failed to allocate memory");
+    stats = calloc(n_targets, sizeof(stats_aux_t));
+    if (!stats) {
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
 
     int64_t n_bins = opt_n_bins;
     if (opt_reg) {
-        stats->tid = data[0]->iter->tid;
-        stats->beg = data[0]->iter->beg; // and to the parsed region coordinates
-        stats->end = data[0]->iter->end;
-        if (stats->end == HTS_POS_MAX) {
-            stats->end = sam_hdr_tid2len(h, stats->tid);
+        stats_aux_t *s = stats + data[0]->iter->tid;
+        s->beg = data[0]->iter->beg; // and to the parsed region coordinates
+        s->end = data[0]->iter->end;
+        if (s->end == HTS_POS_MAX) {
+            s->end = sam_hdr_tid2len(h, data[0]->iter->tid);
         }
-        if (opt_n_bins > stats->end - stats->beg) {
-            n_bins = stats->end - stats->beg;
+        if (opt_n_bins > s->end - s->beg) {
+            n_bins = s->end - s->beg;
         }
-        stats->bin_width = (stats->end-stats->beg) / n_bins;
-    } else {
-        stats->tid = -1;
+        s->bin_width = (s->end-s->beg) / (n_bins > 0 ? n_bins : 1);
     }
 
+    for (i=0; i<n_bam_files; i++)
+        data[i]->stats = stats;
+
     int64_t current_bin = 0;
 
     // the core multi-pileup loop
@@ -567,43 +553,41 @@ int main_coverage(int argc, char *argv[]) {
     n_plp = (int*) calloc(n_bam_files, sizeof(int*)); // n_plp[i] is the number of covering reads from the i-th BAM
     plp = (const bam_pileup1_t**) calloc(n_bam_files, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
     if (!hist || !n_plp || !plp) {
-        print_error("coverage", "Failed to allocate memory");
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
     while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
 
-        if (tid != stats->tid) { // Next target sequence
-            if (stats->tid >= 0) { // It's not the first sequence, print results
-                set_read_counts(data, stats, n_bam_files);
+        if (tid != old_tid) { // Next target sequence
+            if (old_tid >= 0) {
                 if (opt_print_histogram) {
-                    print_hist(file_out, h, stats, hist, n_bins, opt_full_utf);
+                    print_hist(file_out, h, stats, old_tid, hist, n_bins, opt_full_utf);
                     fputc('\n', file_out);
                 } else if (opt_print_tabular) {
-                    print_tabular_line(file_out, h, stats);
+                    print_tabular_line(file_out, h, stats, old_tid);
                 }
 
-                // reset data
-                memset(stats, 0, sizeof(stats_aux_t));
                 if (opt_print_histogram)
                     memset(hist, 0, n_bins*sizeof(uint32_t));
             }
 
-            stats->tid = tid;
-            covered_tids[tid] = true;
+            stats[tid].covered = true;
             if (!opt_reg)
-                stats->end = sam_hdr_tid2len(h, tid);
+                stats[tid].end = sam_hdr_tid2len(h, tid);
 
             if (opt_print_histogram) {
-                n_bins = opt_n_bins > stats->end-stats->beg? stats->end-stats->beg : opt_n_bins;
-                stats->bin_width = (stats->end-stats->beg) / n_bins;
+                n_bins = opt_n_bins > stats[tid].end-stats[tid].beg? stats[tid].end-stats[tid].beg : opt_n_bins;
+                stats[tid].bin_width = (stats[tid].end-stats[tid].beg) / n_bins;
             }
+
+            old_tid = tid;
         }
-        if (pos < stats->beg || pos >= stats->end) continue; // out of range; skip
+        if (pos < stats[tid].beg || pos >= stats[tid].end) continue; // out of range; skip
         if (tid >= n_targets) continue;     // diff number of @SQ lines per file?
 
         if (opt_print_histogram) {
-            current_bin = (pos - stats->beg) / stats->bin_width;
+            current_bin = (pos - stats[tid].beg) / stats[tid].bin_width;
         }
 
         bool count_base = false;
@@ -616,39 +600,40 @@ int main_coverage(int argc, char *argv[]) {
                 else if (p->qpos < p->b->core.l_qseq &&
                         bam_get_qual(p->b)[p->qpos] < opt_min_baseQ) --depth_at_pos; // low base quality
                 else
-                    stats->summed_baseQ += bam_get_qual(p->b)[p->qpos];
+                    stats[tid].summed_baseQ += bam_get_qual(p->b)[p->qpos];
             }
             if (depth_at_pos > 0) {
                 count_base = true;
-                stats->summed_coverage += depth_at_pos;
+                stats[tid].summed_coverage += depth_at_pos;
             }
             // hist[current_bin] += depth_at_pos;  // Add counts to the histogram here to have one based on coverage
             //fprintf(file_out, "\t%d", n_plp[i] - m); // this the depth to output
         }
         if (count_base) {
-            ++(stats->n_covered_bases);
+            stats[tid].n_covered_bases++;
             if (opt_print_histogram && current_bin < n_bins)
                 ++(hist[current_bin]); // Histogram based on breadth of coverage
         }
     }
 
-    if (stats->tid != -1) {
-        set_read_counts(data, stats, n_bam_files);
+    if (tid == -1 && opt_reg && *opt_reg != '*')
+        // Region specified but no data covering it.
+        tid = data[0]->iter->tid;
+
+    if (tid < n_targets && tid >=0) {
         if (opt_print_histogram) {
-            print_hist(file_out, h, stats, hist, n_bins, opt_full_utf);
+            print_hist(file_out, h, stats, tid, hist, n_bins, opt_full_utf);
         } else if (opt_print_tabular) {
-            print_tabular_line(file_out, h, stats);
+            print_tabular_line(file_out, h, stats, tid);
         }
     }
 
 
     if (!opt_reg && opt_print_tabular) {
-        memset(stats, 0, sizeof(stats_aux_t));
         for (i = 0; i < n_targets; ++i) {
-            if (!covered_tids[i]) {
-                stats->tid = i;
-                stats->end = sam_hdr_tid2len(h, i);
-                print_tabular_line(file_out, h, stats);
+            if (!stats[i].covered) {
+                stats[i].end = sam_hdr_tid2len(h, i);
+                print_tabular_line(file_out, h, stats, i);
             }
         }
     }
@@ -658,13 +643,11 @@ int main_coverage(int argc, char *argv[]) {
 coverage_end:
     if (n_plp) free(n_plp);
     if (plp) free(plp);
-    bam_mplp_destroy(mplp);
+    if (mplp) bam_mplp_destroy(mplp);
 
-    if (covered_tids) free(covered_tids);
     if (hist) free(hist);
     if (stats) free(stats);
 
-
     // Close files and free data structures
     if (!(file_out == stdout || fclose(file_out) == 0)) {
         if (status == EXIT_SUCCESS) {
diff --git a/samtools/coverage.c.pysam.c b/samtools/coverage.c.pysam.c
index 127a528..662deb5 100644
--- a/samtools/coverage.c.pysam.c
+++ b/samtools/coverage.c.pysam.c
@@ -3,7 +3,7 @@
 /* coverage.c -- samtools coverage subcommand
 
     Copyright (C) 2018,2019 Florian Breitwieser
-    Portions copyright (C) 2019 Genome Research Ltd.
+    Portions copyright (C) 2019-2021 Genome Research Ltd.
 
     Author: Florian P Breitwieser <florian.bw@gmail.com>
 
@@ -26,7 +26,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.  */
 
 /* This program calculates coverage from multiple BAMs
- * simutaneously, to achieve random access and to use the BED interface.
+ * simultaneously, to achieve random access and to use the BED interface.
  * To compile this program separately, you may:
  *
  *   gcc -g -O2 -Wall -o bamcov -D_MAIN_BAMCOV coverage.c -lhts -lz
@@ -59,19 +59,6 @@ DEALINGS IN THE SOFTWARE.  */
 
 const char *VERSION = "0.1";
 
-typedef struct {  // auxiliary data structure to hold a BAM file
-    samFile *fp;     // file handle
-    sam_hdr_t *hdr;  // file header
-    hts_itr_t *iter; // iterator to a region - NULL for us by default
-    int min_mapQ;    // mapQ filter
-    int min_len;     // length filter
-    unsigned int n_reads;  // records the number of reads seen in file
-    unsigned int n_selected_reads; // records the number of reads passing filter
-    unsigned long summed_mapQ; // summed mapQ of all reads passing filter
-    int fail_flags;
-    int required_flags;
-} bam_aux_t;
-
 typedef struct {  // auxiliary data structure to hold stats on coverage
     unsigned long long n_covered_bases;
     unsigned long long summed_coverage;
@@ -79,12 +66,23 @@ typedef struct {  // auxiliary data structure to hold stats on coverage
     unsigned long long summed_mapQ;
     unsigned int n_reads;
     unsigned int n_selected_reads;
-    int32_t tid;    // chromosome ID, defined by header
+    bool covered;
     hts_pos_t beg;
     hts_pos_t end;
     int64_t bin_width;
 } stats_aux_t;
 
+typedef struct {  // auxiliary data structure to hold a BAM file
+    samFile *fp;     // file handle
+    sam_hdr_t *hdr;  // file header
+    hts_itr_t *iter; // iterator to a region - NULL for us by default
+    int min_mapQ;    // mapQ filter
+    int min_len;     // length filter
+    int fail_flags;
+    int required_flags;
+    stats_aux_t *stats;
+} bam_aux_t;
+
 #if __STDC_VERSION__ >= 199901L
 #define VERTICAL_LINE "\u2502" // BOX DRAWINGS LIGHT VERTICAL
 
@@ -93,7 +91,7 @@ typedef struct {  // auxiliary data structure to hold stats on coverage
 // LOWER ONE EIGHTH BLOCK â¦ FULL BLOCK
 static const char *const BLOCK_CHARS8[8] = {"\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"};
 // In some terminals / with some fonts not all UTF8 block characters are supported (e.g. Putty). Use only half and full block for those
-static const char *const BLOCK_CHARS2[2] = {"\u2584", "\u2588"};
+static const char *const BLOCK_CHARS2[2] = {".", ":"};
 
 #else
 
@@ -104,7 +102,7 @@ static const char *const BLOCK_CHARS8[8] = {
     "\xE2\x96\x81", "\xE2\x96\x82", "\xE2\x96\x83", "\xE2\x96\x84",
     "\xE2\x96\x85", "\xE2\x96\x86", "\xE2\x96\x87", "\xE2\x96\x88" };
 
-static const char *const BLOCK_CHARS2[2] = {"\xE2\x96\x84", "\xE2\x96\x88"};
+static const char *const BLOCK_CHARS2[2] = {".", ":"};
 
 #endif
 
@@ -116,11 +114,14 @@ static int usage() {
             "Input options:\n"
             "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
             "  -l, --min-read-len INT  ignore reads shorter than INT bp [0]\n"
-            "  -q, --min-MQ INT        base quality threshold [0]\n"
-            "  -Q, --min-BQ INT        mapping quality threshold [0]\n"
+            "  -q, --min-MQ INT        mapping quality threshold [0]\n"
+            "  -Q, --min-BQ INT        base quality threshold [0]\n"
             "  --rf <int|str>          required flags: skip reads with mask bits unset []\n"
             "  --ff <int|str>          filter flags: skip reads with mask bits set \n"
             "                                      [UNMAP,SECONDARY,QCFAIL,DUP]\n"
+            "  -d, --depth INT         maximum allowed coverage depth [1000000].\n"
+            "                          If 0, depth is set to the maximum integer value,\n"
+            "                          effectively removing any depth limit.\n"
             "Output options:\n"
             "  -m, --histogram         show histogram instead of tabular output\n"
             "  -A, --ascii             show only ASCII characters in histogram\n"
@@ -173,79 +174,63 @@ static char* readable_bps(double base_pairs, char *buf) {
     return buf;
 }
 
-static void set_read_counts(bam_aux_t **data, stats_aux_t *stats, int n_bam_files) {
-    int i;
-    stats->n_reads = 0;
-    stats->n_selected_reads = 0;
-    stats->summed_mapQ = 0;
-    for (i = 0; i < n_bam_files && data[i]; ++i) {
-        stats->n_reads += data[i]->n_reads;
-        stats->n_selected_reads += data[i]->n_selected_reads;
-        stats->summed_mapQ += data[i]->summed_mapQ;
-        data[i]->n_reads = 0;
-        data[i]->n_selected_reads = 0;
-        data[i]->summed_mapQ = 0;
-    }
-}
-
 // read one alignment from one BAM file
 static int read_bam(void *data, bam1_t *b) {
     bam_aux_t *aux = (bam_aux_t*)data; // data in fact is a pointer to an auxiliary structure
+    int nref = sam_hdr_nref(aux->hdr);
     int ret;
     while (1) {
         if((ret = aux->iter? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->hdr, b)) < 0) break;
-        ++aux->n_reads;
+        if (b->core.tid >= 0 && b->core.tid < nref)
+            aux->stats[b->core.tid].n_reads++;
 
         if ( aux->fail_flags && (b->core.flag & aux->fail_flags) ) continue;
         if ( aux->required_flags && !(b->core.flag & aux->required_flags) ) continue;
         if ( b->core.qual < aux->min_mapQ ) continue;
         if ( aux->min_len && bam_cigar2qlen(b->core.n_cigar, bam_get_cigar(b)) < aux->min_len ) continue;
-        ++aux->n_selected_reads;
-        aux->summed_mapQ += b->core.qual;
+        if (b->core.tid >= 0 && b->core.tid < nref) {
+            aux->stats[b->core.tid].n_selected_reads++;
+            aux->stats[b->core.tid].summed_mapQ += b->core.qual;
+        }
         break;
     }
     return ret;
 }
 
-void print_tabular_line(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats) {
-    fputs(sam_hdr_tid2name(h, stats->tid), file_out);
-    double region_len = (double) stats->end - stats->beg;
+void print_tabular_line(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, int tid) {
+    fputs(sam_hdr_tid2name(h, tid), file_out);
+    double region_len = (double) stats[tid].end - stats[tid].beg;
     fprintf(file_out, "\t%"PRId64"\t%"PRId64"\t%u\t%llu\t%g\t%g\t%.3g\t%.3g\n",
-            stats->beg+1,
-            stats->end,
-            stats->n_selected_reads,
-            stats->n_covered_bases,
-            100.0 * stats->n_covered_bases / region_len,
-            stats->summed_coverage / region_len,
-            stats->summed_coverage > 0? stats->summed_baseQ/(double) stats->summed_coverage : 0,
-            stats->n_selected_reads > 0? stats->summed_mapQ/(double) stats->n_selected_reads : 0
+            stats[tid].beg+1,
+            stats[tid].end,
+            stats[tid].n_selected_reads,
+            stats[tid].n_covered_bases,
+            100.0 * stats[tid].n_covered_bases / region_len,
+            stats[tid].summed_coverage / region_len,
+            stats[tid].summed_coverage > 0? stats[tid].summed_baseQ/(double) stats[tid].summed_coverage : 0,
+            stats[tid].n_selected_reads > 0? stats[tid].summed_mapQ/(double) stats[tid].n_selected_reads : 0
            );
 }
 
-void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, const uint32_t *hist,
+void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, int tid, const uint32_t *hist,
         const int hist_size, const bool full_utf) {
     int i, col;
     bool show_percentiles = false;
     const int n_rows = 10;
     const char * const * BLOCK_CHARS = full_utf? BLOCK_CHARS8 : BLOCK_CHARS2;
     const int blockchar_len = full_utf? 8 : 2;
-    /*
-       if (stats->beg == 0) {
-       stats->end = h->target_len[stats->tid];
-       }
-       */
-    double region_len = stats->end - stats->beg;
+    double region_len = stats[tid].end - stats[tid].beg;
 
     // Calculate histogram that contains percent covered
     double hist_data[hist_size];
     double max_val = 0.0;
     for (i = 0; i < hist_size; ++i) {
-        hist_data[i] = 100 * hist[i] / (double) stats->bin_width;
+        hist_data[i] = 100 * hist[i] / (double) stats[tid].bin_width;
         if (hist_data[i] > max_val) max_val = hist_data[i];
     }
 
     char buf[30];
-    fprintf(file_out, "%s (%sbp)\n", sam_hdr_tid2name(h, stats->tid), readable_bps(sam_hdr_tid2len(h, stats->tid), buf));
+    fprintf(file_out, "%s (%sbp)\n", sam_hdr_tid2name(h, tid), readable_bps(sam_hdr_tid2len(h, tid), buf));
 
     double row_bin_size = max_val / (double) n_rows;
     for (i = n_rows-1; i >= 0; --i) {
@@ -255,7 +240,7 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
         } else {
             fprintf(file_out, ">%7.2f%% ", current_bin);
         }
-        fprintf(file_out, VERTICAL_LINE);
+        fprintf(file_out, full_utf ? VERTICAL_LINE : "|");
         for (col = 0; col < hist_size; ++col) {
             // get the difference in eights, or halfs when full UTF8 is not supported
             int cur_val_diff = round(blockchar_len * (hist_data[col] - current_bin) / row_bin_size) - 1;
@@ -268,22 +253,22 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
                 fprintf(file_out, "%s", BLOCK_CHARS[cur_val_diff]);
             }
         }
-        fprintf(file_out, VERTICAL_LINE);
+        fprintf(file_out, full_utf ? VERTICAL_LINE : "|");
         fputc(' ', file_out);
         switch (i) {
-            case 9: fprintf(file_out, "Number of reads: %i", stats->n_selected_reads); break;
-            case 8: if (stats->n_reads - stats->n_selected_reads > 0) fprintf(file_out, "    (%i filtered)", stats->n_reads - stats->n_selected_reads); break;
-            case 7: fprintf(file_out, "Covered bases:   %sbp", readable_bps(stats->n_covered_bases, buf)); break;
+            case 9: fprintf(file_out, "Number of reads: %i", stats[tid].n_selected_reads); break;
+            case 8: if (stats[tid].n_reads - stats[tid].n_selected_reads > 0) fprintf(file_out, "    (%i filtered)", stats[tid].n_reads - stats[tid].n_selected_reads); break;
+            case 7: fprintf(file_out, "Covered bases:   %sbp", readable_bps(stats[tid].n_covered_bases, buf)); break;
             case 6: fprintf(file_out, "Percent covered: %.4g%%",
-                            100.0 * stats->n_covered_bases / region_len); break;
+                            100.0 * stats[tid].n_covered_bases / region_len); break;
             case 5: fprintf(file_out, "Mean coverage:   %.3gx",
-                            stats->summed_coverage / region_len); break;
+                            stats[tid].summed_coverage / region_len); break;
             case 4: fprintf(file_out, "Mean baseQ:      %.3g",
-                            stats->summed_baseQ/(double) stats->summed_coverage); break;
+                            stats[tid].summed_baseQ/(double) stats[tid].summed_coverage); break;
             case 3: fprintf(file_out, "Mean mapQ:       %.3g",
-                            stats->summed_mapQ/(double) stats->n_selected_reads); break;
+                            stats[tid].summed_mapQ/(double) stats[tid].n_selected_reads); break;
             case 1: fprintf(file_out, "Histo bin width: %sbp",
-                            readable_bps(stats->bin_width, buf)); break;
+                            readable_bps(stats[tid].bin_width, buf)); break;
             case 0: fprintf(file_out, "Histo max bin:   %.5g%%", max_val); break;
         };
         fputc('\n', file_out);
@@ -292,22 +277,22 @@ void print_hist(FILE *file_out, const sam_hdr_t *h, const stats_aux_t *stats, co
     // print x axis. Could be made pretty for widths that are not divisible
     // by 10 by variable spacing of the labels, instead of placing a label every 10 characters
     char buf2[50];
-    fprintf(file_out, "     %s", center_text(readable_bps(stats->beg + 1, buf), buf2, 10));
+    fprintf(file_out, "     %s", center_text(readable_bps(stats[tid].beg + 1, buf), buf2, 10));
     int rest;
     for (rest = 10; rest < 10*(hist_size/10); rest += 10) {
-        fprintf(file_out, "%s", center_text(readable_bps(stats->beg + stats->bin_width*rest, buf), buf2, 10));
+        fprintf(file_out, "%s", center_text(readable_bps(stats[tid].beg + stats[tid].bin_width*rest, buf), buf2, 10));
     }
     int last_padding = hist_size%10;
-    fprintf(file_out, "%*s%s", last_padding, " ", center_text(readable_bps(stats->end, buf), buf2, 10));
+    fprintf(file_out, "%*s%s", last_padding, " ", center_text(readable_bps(stats[tid].end, buf), buf2, 10));
     fprintf(file_out, "\n");
 }
 
 int main_coverage(int argc, char *argv[]) {
     int status = EXIT_SUCCESS;
 
-    int ret, tid, pos, i, j;
+    int ret, tid = -1, old_tid = -1, pos, i, j;
 
-    int max_depth = 0;
+    int max_depth = 1000000;
     int opt_min_baseQ = 0;
     int opt_min_mapQ = 0;
     int opt_min_len = 0;
@@ -332,7 +317,6 @@ int main_coverage(int argc, char *argv[]) {
     bool opt_print_header = true;
     bool opt_print_tabular = true;
     bool opt_print_histogram = false;
-    bool *covered_tids = NULL;
     bool opt_full_utf = true;
 
     FILE *file_out = samtools_stdout;
@@ -345,7 +329,7 @@ int main_coverage(int argc, char *argv[]) {
         {"incl-flags", required_argument, NULL, 1}, // require flag
         {"excl-flags", required_argument, NULL, 2}, // filter flag
         {"bam-list", required_argument, NULL, 'b'},
-        {"min-read-len", required_argument, NULL, 'L'},
+        {"min-read-len", required_argument, NULL, 'l'},
         {"min-MQ", required_argument, NULL, 'q'},
         {"min-mq", required_argument, NULL, 'q'},
         {"min-BQ", required_argument, NULL, 'Q'},
@@ -357,13 +341,14 @@ int main_coverage(int argc, char *argv[]) {
         {"n-bins", required_argument, NULL, 'w'},
         {"region", required_argument, NULL, 'r'},
         {"help", no_argument, NULL, 'h'},
+        {"depth", required_argument, NULL, 'd'},
         { NULL, 0, NULL, 0 }
     };
 
     // parse the command line
     int c;
     opterr = 0;
-    while ((c = getopt_long(argc, argv, "Ao:L:q:Q:hHw:r:b:m", lopts, NULL)) != -1) {
+    while ((c = getopt_long(argc, argv, "Ao:l:q:Q:hHw:r:b:md:", lopts, NULL)) != -1) {
         switch (c) {
             case 1:
                 if ((required_flags = bam_str2flag(optarg)) < 0) {
@@ -374,9 +359,10 @@ int main_coverage(int argc, char *argv[]) {
                     fprintf(samtools_stderr,"Could not parse --ff %s\n", optarg); return EXIT_FAILURE;
                 }; break;
             case 'o': opt_output_file = optarg; opt_full_width = false; break;
-            case 'L': opt_min_len = atoi(optarg); break;
-            case 'q': opt_min_baseQ = atoi(optarg); break;
-            case 'Q': opt_min_mapQ = atoi(optarg); break;
+            case 'l': opt_min_len = atoi(optarg); break;
+            case 'q': opt_min_mapQ = atoi(optarg); break;
+            case 'Q': opt_min_baseQ = atoi(optarg); break;
+            case 'd': max_depth = atoi(optarg); break; // maximum coverage depth
             case 'w': opt_n_bins = atoi(optarg); opt_full_width = false;
                       opt_print_histogram = true; opt_print_tabular = false;
                       break;
@@ -429,7 +415,7 @@ int main_coverage(int argc, char *argv[]) {
             if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
                 columns = csbi.srWindow.Right - csbi.srWindow.Left + 1;
             }
-#else
+#elif defined TIOCGWINSZ
             struct winsize w;
             if (ioctl(2, TIOCGWINSZ, &w) == 0)
                 columns = w.ws_col;
@@ -462,7 +448,7 @@ int main_coverage(int argc, char *argv[]) {
 
     data = (bam_aux_t **)calloc(n_bam_files, sizeof(bam_aux_t*)); // data[i] for the i-th BAM file
     if (!data) {
-        print_error("coverage", "Failed to allocate memory");
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
@@ -471,7 +457,7 @@ int main_coverage(int argc, char *argv[]) {
         int rf;
         data[i] = (bam_aux_t *) calloc(1, sizeof(bam_aux_t));
         if (!data[i]) {
-            print_error("coverage", "Failed to allocate memory");
+            print_error_errno("coverage", "Failed to allocate memory");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
@@ -487,12 +473,12 @@ int main_coverage(int argc, char *argv[]) {
 
         // Set CRAM options on file handle - returns 0 on success
         if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
-            print_error_errno("coverage", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
+            print_error("coverage", "Failed to set CRAM_OPT_REQUIRED_FIELDS value");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
         if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
-            print_error_errno("coverage", "Failed to set CRAM_OPT_DECODE_MD value");
+            print_error("coverage", "Failed to set CRAM_OPT_DECODE_MD value");
             status = EXIT_FAILURE;
             goto coverage_end;
         }
@@ -518,7 +504,7 @@ int main_coverage(int argc, char *argv[]) {
             data[i]->iter = sam_itr_querys(idx, data[i]->hdr, opt_reg); // set the iterator
             hts_idx_destroy(idx); // the index is not needed any more; free the memory
             if (data[i]->iter == NULL) {
-                print_error_errno("coverage", "Failed to parse region \"%s\"", opt_reg);
+                print_error("coverage", "Failed to parse region \"%s\". Check the region format or region name presence in the file \"%s\"", opt_reg, argv[optind+i]);
                 status = EXIT_FAILURE;
                 goto coverage_end;
             }
@@ -530,30 +516,30 @@ int main_coverage(int argc, char *argv[]) {
 
     h = data[0]->hdr; // easy access to the header of the 1st BAM
     int n_targets = sam_hdr_nref(h);
-    covered_tids = calloc(n_targets, sizeof(bool));
-    stats = calloc(1, sizeof(stats_aux_t));
-    if (!covered_tids || !stats) {
-        print_error("coverage", "Failed to allocate memory");
+    stats = calloc(n_targets, sizeof(stats_aux_t));
+    if (!stats) {
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
 
     int64_t n_bins = opt_n_bins;
     if (opt_reg) {
-        stats->tid = data[0]->iter->tid;
-        stats->beg = data[0]->iter->beg; // and to the parsed region coordinates
-        stats->end = data[0]->iter->end;
-        if (stats->end == HTS_POS_MAX) {
-            stats->end = sam_hdr_tid2len(h, stats->tid);
+        stats_aux_t *s = stats + data[0]->iter->tid;
+        s->beg = data[0]->iter->beg; // and to the parsed region coordinates
+        s->end = data[0]->iter->end;
+        if (s->end == HTS_POS_MAX) {
+            s->end = sam_hdr_tid2len(h, data[0]->iter->tid);
         }
-        if (opt_n_bins > stats->end - stats->beg) {
-            n_bins = stats->end - stats->beg;
+        if (opt_n_bins > s->end - s->beg) {
+            n_bins = s->end - s->beg;
         }
-        stats->bin_width = (stats->end-stats->beg) / n_bins;
-    } else {
-        stats->tid = -1;
+        s->bin_width = (s->end-s->beg) / (n_bins > 0 ? n_bins : 1);
     }
 
+    for (i=0; i<n_bam_files; i++)
+        data[i]->stats = stats;
+
     int64_t current_bin = 0;
 
     // the core multi-pileup loop
@@ -569,43 +555,41 @@ int main_coverage(int argc, char *argv[]) {
     n_plp = (int*) calloc(n_bam_files, sizeof(int*)); // n_plp[i] is the number of covering reads from the i-th BAM
     plp = (const bam_pileup1_t**) calloc(n_bam_files, sizeof(bam_pileup1_t*)); // plp[i] points to the array of covering reads (internal in mplp)
     if (!hist || !n_plp || !plp) {
-        print_error("coverage", "Failed to allocate memory");
+        print_error_errno("coverage", "Failed to allocate memory");
         status = EXIT_FAILURE;
         goto coverage_end;
     }
     while ((ret=bam_mplp_auto(mplp, &tid, &pos, n_plp, plp)) > 0) { // come to the next covered position
 
-        if (tid != stats->tid) { // Next target sequence
-            if (stats->tid >= 0) { // It's not the first sequence, print results
-                set_read_counts(data, stats, n_bam_files);
+        if (tid != old_tid) { // Next target sequence
+            if (old_tid >= 0) {
                 if (opt_print_histogram) {
-                    print_hist(file_out, h, stats, hist, n_bins, opt_full_utf);
+                    print_hist(file_out, h, stats, old_tid, hist, n_bins, opt_full_utf);
                     fputc('\n', file_out);
                 } else if (opt_print_tabular) {
-                    print_tabular_line(file_out, h, stats);
+                    print_tabular_line(file_out, h, stats, old_tid);
                 }
 
-                // reset data
-                memset(stats, 0, sizeof(stats_aux_t));
                 if (opt_print_histogram)
                     memset(hist, 0, n_bins*sizeof(uint32_t));
             }
 
-            stats->tid = tid;
-            covered_tids[tid] = true;
+            stats[tid].covered = true;
             if (!opt_reg)
-                stats->end = sam_hdr_tid2len(h, tid);
+                stats[tid].end = sam_hdr_tid2len(h, tid);
 
             if (opt_print_histogram) {
-                n_bins = opt_n_bins > stats->end-stats->beg? stats->end-stats->beg : opt_n_bins;
-                stats->bin_width = (stats->end-stats->beg) / n_bins;
+                n_bins = opt_n_bins > stats[tid].end-stats[tid].beg? stats[tid].end-stats[tid].beg : opt_n_bins;
+                stats[tid].bin_width = (stats[tid].end-stats[tid].beg) / n_bins;
             }
+
+            old_tid = tid;
         }
-        if (pos < stats->beg || pos >= stats->end) continue; // out of range; skip
+        if (pos < stats[tid].beg || pos >= stats[tid].end) continue; // out of range; skip
         if (tid >= n_targets) continue;     // diff number of @SQ lines per file?
 
         if (opt_print_histogram) {
-            current_bin = (pos - stats->beg) / stats->bin_width;
+            current_bin = (pos - stats[tid].beg) / stats[tid].bin_width;
         }
 
         bool count_base = false;
@@ -618,39 +602,40 @@ int main_coverage(int argc, char *argv[]) {
                 else if (p->qpos < p->b->core.l_qseq &&
                         bam_get_qual(p->b)[p->qpos] < opt_min_baseQ) --depth_at_pos; // low base quality
                 else
-                    stats->summed_baseQ += bam_get_qual(p->b)[p->qpos];
+                    stats[tid].summed_baseQ += bam_get_qual(p->b)[p->qpos];
             }
             if (depth_at_pos > 0) {
                 count_base = true;
-                stats->summed_coverage += depth_at_pos;
+                stats[tid].summed_coverage += depth_at_pos;
             }
             // hist[current_bin] += depth_at_pos;  // Add counts to the histogram here to have one based on coverage
             //fprintf(file_out, "\t%d", n_plp[i] - m); // this the depth to output
         }
         if (count_base) {
-            ++(stats->n_covered_bases);
+            stats[tid].n_covered_bases++;
             if (opt_print_histogram && current_bin < n_bins)
                 ++(hist[current_bin]); // Histogram based on breadth of coverage
         }
     }
 
-    if (stats->tid != -1) {
-        set_read_counts(data, stats, n_bam_files);
+    if (tid == -1 && opt_reg && *opt_reg != '*')
+        // Region specified but no data covering it.
+        tid = data[0]->iter->tid;
+
+    if (tid < n_targets && tid >=0) {
         if (opt_print_histogram) {
-            print_hist(file_out, h, stats, hist, n_bins, opt_full_utf);
+            print_hist(file_out, h, stats, tid, hist, n_bins, opt_full_utf);
         } else if (opt_print_tabular) {
-            print_tabular_line(file_out, h, stats);
+            print_tabular_line(file_out, h, stats, tid);
         }
     }
 
 
     if (!opt_reg && opt_print_tabular) {
-        memset(stats, 0, sizeof(stats_aux_t));
         for (i = 0; i < n_targets; ++i) {
-            if (!covered_tids[i]) {
-                stats->tid = i;
-                stats->end = sam_hdr_tid2len(h, i);
-                print_tabular_line(file_out, h, stats);
+            if (!stats[i].covered) {
+                stats[i].end = sam_hdr_tid2len(h, i);
+                print_tabular_line(file_out, h, stats, i);
             }
         }
     }
@@ -660,13 +645,11 @@ int main_coverage(int argc, char *argv[]) {
 coverage_end:
     if (n_plp) free(n_plp);
     if (plp) free(plp);
-    bam_mplp_destroy(mplp);
+    if (mplp) bam_mplp_destroy(mplp);
 
-    if (covered_tids) free(covered_tids);
     if (hist) free(hist);
     if (stats) free(stats);
 
-
     // Close files and free data structures
     if (!(file_out == samtools_stdout || fclose(file_out) == 0)) {
         if (status == EXIT_SUCCESS) {
diff --git a/samtools/cut_target.c b/samtools/cut_target.c
index e59f51b..7c8387c 100644
--- a/samtools/cut_target.c
+++ b/samtools/cut_target.c
@@ -63,7 +63,7 @@ static uint16_t gencns(ct_t *g, int n, const bam_pileup1_t *plp)
     if (n > g->max_bases) { // enlarge g->bases
         g->max_bases = n;
         kroundup32(g->max_bases);
-        g->bases = realloc(g->bases, g->max_bases * 2);
+        g->bases = realloc(g->bases, (size_t) g->max_bases * 2);
     }
     for (i = k = 0; i < n; ++i) {
         const bam_pileup1_t *p = plp + i;
@@ -170,7 +170,7 @@ static int read_aln(void *data, bam1_t *b)
 
 int main_cut_target(int argc, char *argv[])
 {
-    int c, tid, pos, n, lasttid = -1, usage = 0;
+    int c, tid, pos, n, lasttid = -1, usage = 0, status = EXIT_SUCCESS;
     hts_pos_t l, max_l;
     const bam_pileup1_t *p;
     bam_plp_t plp;
@@ -237,6 +237,12 @@ int main_cut_target(int argc, char *argv[])
         cns[pos] = gencns(&g, n, p);
     }
     process_cns(g.h, lasttid, l, cns);
+
+    if (n < 0) {
+        print_error("targetcut", "error reading from \"%s\"", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     free(cns);
     sam_hdr_destroy(g.h);
     bam_plp_destroy(plp);
@@ -247,5 +253,5 @@ int main_cut_target(int argc, char *argv[])
     errmod_destroy(g.em);
     free(g.bases);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/cut_target.c.pysam.c b/samtools/cut_target.c.pysam.c
index bbc2d29..babe42b 100644
--- a/samtools/cut_target.c.pysam.c
+++ b/samtools/cut_target.c.pysam.c
@@ -65,7 +65,7 @@ static uint16_t gencns(ct_t *g, int n, const bam_pileup1_t *plp)
     if (n > g->max_bases) { // enlarge g->bases
         g->max_bases = n;
         kroundup32(g->max_bases);
-        g->bases = realloc(g->bases, g->max_bases * 2);
+        g->bases = realloc(g->bases, (size_t) g->max_bases * 2);
     }
     for (i = k = 0; i < n; ++i) {
         const bam_pileup1_t *p = plp + i;
@@ -172,7 +172,7 @@ static int read_aln(void *data, bam1_t *b)
 
 int main_cut_target(int argc, char *argv[])
 {
-    int c, tid, pos, n, lasttid = -1, usage = 0;
+    int c, tid, pos, n, lasttid = -1, usage = 0, status = EXIT_SUCCESS;
     hts_pos_t l, max_l;
     const bam_pileup1_t *p;
     bam_plp_t plp;
@@ -239,6 +239,12 @@ int main_cut_target(int argc, char *argv[])
         cns[pos] = gencns(&g, n, p);
     }
     process_cns(g.h, lasttid, l, cns);
+
+    if (n < 0) {
+        print_error("targetcut", "error reading from \"%s\"", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     free(cns);
     sam_hdr_destroy(g.h);
     bam_plp_destroy(plp);
@@ -249,5 +255,5 @@ int main_cut_target(int argc, char *argv[])
     errmod_destroy(g.em);
     free(g.bases);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/dict.c b/samtools/dict.c
index c159c24..029d548 100644
--- a/samtools/dict.c
+++ b/samtools/dict.c
@@ -1,6 +1,6 @@
 /*  dict.c -- create a sequence dictionary file.
 
-    Copyright (C) 2015 Genome Research Ltd.
+    Copyright (C) 2015, 2020 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -25,6 +25,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <config.h>
 
 #include <stdio.h>
+#include <string.h>
 #include <unistd.h>
 #include <zlib.h>
 #include <getopt.h>
@@ -37,7 +38,7 @@ typedef struct _args_t
 {
     char *output_fname, *fname;
     char *assembly, *species, *uri;
-    int  header;
+    int  alias, header;
 }
 args_t;
 
@@ -79,6 +80,20 @@ static void write_dict(const char *fn, args_t *args)
         hts_md5_final(digest, md5);
         hts_md5_hex(hex, digest);
         fprintf(out, "@SQ\tSN:%s\tLN:%d\tM5:%s", seq->name.s, k, hex);
+        if (args->alias) {
+            const char *name = seq->name.s;
+            if (strncmp(name, "chr", 3) == 0) {
+                name += 3;
+                fprintf(out, "\tAN:%s", name);
+            }
+            else
+                fprintf(out, "\tAN:chr%s", name);
+
+            if (strcmp(name, "M") == 0)
+                fprintf(out, ",chrMT,MT");
+            else if (strcmp(name, "MT") == 0)
+                fprintf(out, ",chrM,M");
+        }
         if (args->uri)
             fprintf(out, "\tUR:%s", args->uri);
         else if (strcmp(fn, "-") != 0) {
@@ -107,8 +122,10 @@ static int dict_usage(void)
     fprintf(stderr, "About:   Create a sequence dictionary file from a fasta file\n");
     fprintf(stderr, "Usage:   samtools dict [options] <file.fa|file.fa.gz>\n\n");
     fprintf(stderr, "Options: -a, --assembly STR    assembly\n");
+    fprintf(stderr, "         -A, --alias, --alternative-name\n");
+    fprintf(stderr, "                               add AN tag by adding/removing 'chr'\n");
     fprintf(stderr, "         -H, --no-header       do not print @HD line\n");
-    fprintf(stderr, "         -o, --output STR      file to write out dict file [stdout]\n");
+    fprintf(stderr, "         -o, --output FILE     file to write out dict file [stdout]\n");
     fprintf(stderr, "         -s, --species STR     species\n");
     fprintf(stderr, "         -u, --uri STR         URI [file:///abs/path/to/file.fa]\n");
     fprintf(stderr, "\n");
@@ -124,6 +141,8 @@ int dict_main(int argc, char *argv[])
     {
         {"help", no_argument, NULL, 'h'},
         {"no-header", no_argument, NULL, 'H'},
+        {"alias", no_argument, NULL, 'A'},
+        {"alternative-name", no_argument, NULL, 'A'},
         {"assembly", required_argument, NULL, 'a'},
         {"species", required_argument, NULL, 's'},
         {"uri", required_argument, NULL, 'u'},
@@ -131,10 +150,11 @@ int dict_main(int argc, char *argv[])
         {NULL, 0, NULL, 0}
     };
     int c;
-    while ( (c=getopt_long(argc,argv,"?hHa:s:u:o:",loptions,NULL))>0 )
+    while ( (c=getopt_long(argc,argv,"?AhHa:s:u:o:",loptions,NULL))>0 )
     {
         switch (c)
         {
+            case 'A': args->alias = 1; break;
             case 'a': args->assembly = optarg; break;
             case 's': args->species = optarg; break;
             case 'u': args->uri = optarg; break;
diff --git a/samtools/dict.c.pysam.c b/samtools/dict.c.pysam.c
index 87ec1ac..ca54c48 100644
--- a/samtools/dict.c.pysam.c
+++ b/samtools/dict.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  dict.c -- create a sequence dictionary file.
 
-    Copyright (C) 2015 Genome Research Ltd.
+    Copyright (C) 2015, 2020 Genome Research Ltd.
 
     Author: Shane McCarthy <sm15@sanger.ac.uk>
 
@@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE.  */
 #include <config.h>
 
 #include <stdio.h>
+#include <string.h>
 #include <unistd.h>
 #include <zlib.h>
 #include <getopt.h>
@@ -39,7 +40,7 @@ typedef struct _args_t
 {
     char *output_fname, *fname;
     char *assembly, *species, *uri;
-    int  header;
+    int  alias, header;
 }
 args_t;
 
@@ -55,19 +56,19 @@ static void write_dict(const char *fn, args_t *args)
     fp = strcmp(fn, "-") ? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
     if (fp == 0) {
         fprintf(samtools_stderr, "dict: %s: No such file or directory\n", fn);
-        exit(1);
+        samtools_exit(1);
     }
     FILE *out = samtools_stdout;
     if (args->output_fname) {
         out = fopen(args->output_fname, "w");
         if (out == NULL) {
           fprintf(samtools_stderr, "dict: %s: Cannot open file for writing\n", args->output_fname);
-          exit(1);
+          samtools_exit(1);
         }
     }
 
     if (!(md5 = hts_md5_init()))
-        exit(1);
+        samtools_exit(1);
 
     seq = kseq_init(fp);
     if (args->header) fprintf(out, "@HD\tVN:1.0\tSO:unsorted\n");
@@ -81,6 +82,20 @@ static void write_dict(const char *fn, args_t *args)
         hts_md5_final(digest, md5);
         hts_md5_hex(hex, digest);
         fprintf(out, "@SQ\tSN:%s\tLN:%d\tM5:%s", seq->name.s, k, hex);
+        if (args->alias) {
+            const char *name = seq->name.s;
+            if (strncmp(name, "chr", 3) == 0) {
+                name += 3;
+                fprintf(out, "\tAN:%s", name);
+            }
+            else
+                fprintf(out, "\tAN:chr%s", name);
+
+            if (strcmp(name, "M") == 0)
+                fprintf(out, ",chrMT,MT");
+            else if (strcmp(name, "MT") == 0)
+                fprintf(out, ",chrM,M");
+        }
         if (args->uri)
             fprintf(out, "\tUR:%s", args->uri);
         else if (strcmp(fn, "-") != 0) {
@@ -109,8 +124,10 @@ static int dict_usage(void)
     fprintf(samtools_stderr, "About:   Create a sequence dictionary file from a fasta file\n");
     fprintf(samtools_stderr, "Usage:   samtools dict [options] <file.fa|file.fa.gz>\n\n");
     fprintf(samtools_stderr, "Options: -a, --assembly STR    assembly\n");
+    fprintf(samtools_stderr, "         -A, --alias, --alternative-name\n");
+    fprintf(samtools_stderr, "                               add AN tag by adding/removing 'chr'\n");
     fprintf(samtools_stderr, "         -H, --no-header       do not print @HD line\n");
-    fprintf(samtools_stderr, "         -o, --output STR      file to write out dict file [samtools_stdout]\n");
+    fprintf(samtools_stderr, "         -o, --output FILE     file to write out dict file [samtools_stdout]\n");
     fprintf(samtools_stderr, "         -s, --species STR     species\n");
     fprintf(samtools_stderr, "         -u, --uri STR         URI [file:///abs/path/to/file.fa]\n");
     fprintf(samtools_stderr, "\n");
@@ -126,6 +143,8 @@ int dict_main(int argc, char *argv[])
     {
         {"help", no_argument, NULL, 'h'},
         {"no-header", no_argument, NULL, 'H'},
+        {"alias", no_argument, NULL, 'A'},
+        {"alternative-name", no_argument, NULL, 'A'},
         {"assembly", required_argument, NULL, 'a'},
         {"species", required_argument, NULL, 's'},
         {"uri", required_argument, NULL, 'u'},
@@ -133,10 +152,11 @@ int dict_main(int argc, char *argv[])
         {NULL, 0, NULL, 0}
     };
     int c;
-    while ( (c=getopt_long(argc,argv,"?hHa:s:u:o:",loptions,NULL))>0 )
+    while ( (c=getopt_long(argc,argv,"?AhHa:s:u:o:",loptions,NULL))>0 )
     {
         switch (c)
         {
+            case 'A': args->alias = 1; break;
             case 'a': args->assembly = optarg; break;
             case 's': args->species = optarg; break;
             case 'u': args->uri = optarg; break;
diff --git a/samtools/faidx.c b/samtools/faidx.c
index 162233f..03b5d65 100644
--- a/samtools/faidx.c
+++ b/samtools/faidx.c
@@ -1,6 +1,6 @@
 /*  faidx.c -- faidx subcommand.
 
-    Copyright (C) 2008, 2009, 2013, 2016, 2018-2019 Genome Research Ltd.
+    Copyright (C) 2008, 2009, 2013, 2016, 2018-2020 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -198,14 +198,16 @@ static int read_regions_from_file(faidx_t *faid, hFILE *in_file, FILE *file, con
 
 static int usage(FILE *fp, enum fai_format_options format, int exit_status)
 {
-    char *tool, *file_type;
+    char *tool, *file_type, *index_name;
 
     if (format == FAI_FASTA) {
         tool = "faidx <file.fa|file.fa.gz>";
         file_type = "FASTA";
+        index_name = "file.fa";
     } else {
         tool = "fqidx <file.fq|file.fq.gz>";
         file_type = "FASTQ";
+        index_name = "file.fq";
     }
 
     fprintf(fp, "Usage: samtools %s [<reg> [...]]\n", tool);
@@ -219,8 +221,10 @@ static int usage(FILE *fp, enum fai_format_options format, int exit_status)
                 "                          TYPE = rc   for /rc on negative strand (default)\n"
                 "                                 no   for no strand indicator\n"
                 "                                 sign for (+) / (-)\n"
-                "                                 custom,<pos>,<neg> for custom indicator\n",
-                file_type, file_type);
+                "                                 custom,<pos>,<neg> for custom indicator\n"
+                "     --fai-idx      FILE  name of the index file (default %s.fai).\n"
+                "     --gzi-idx      FILE  name of compressed file index (default %s.gz.gzi).\n",
+                file_type, file_type, index_name, index_name);
 
 
     if (format == FAI_FASTA) {
@@ -241,6 +245,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
     char *pos_strand_name = ""; // Extension to add to name for +ve strand
     char *neg_strand_name = "/rc"; // Extension to add to name for -ve strand
     char *strand_names = NULL; // Used for custom strand annotation
+    char *fai_name = NULL; // specified index name
+    char *gzi_name = NULL; // specified compressed index name
     FILE* file_out = stdout;/* output stream */
 
     static const struct option lopts[] = {
@@ -252,6 +258,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
         { "fastq", no_argument,              NULL, 'f' },
         { "reverse-complement", no_argument, NULL, 'i' },
         { "mark-strand", required_argument, NULL, 1000 },
+        { "fai-idx", required_argument,     NULL, 1001 },
+        { "gzi-idx", required_argument,     NULL, 1002 },
         { NULL, 0, NULL, 0 }
     };
 
@@ -300,6 +308,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
                     return usage(stderr, format, EXIT_FAILURE);
                 }
                 break;
+            case 1001: fai_name = optarg; break;
+            case 1002: gzi_name = optarg; break;
             default:  break;
         }
     }
@@ -307,19 +317,40 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
     if ( argc==optind )
         return usage(stdout, format, EXIT_SUCCESS);
 
-    if ( optind+1 == argc && !region_file)
-    {
-        if (fai_build(argv[optind]) != 0) {
-            fprintf(stderr, "[faidx] Could not build fai index %s.fai\n", argv[optind]);
+    if (optind+1 == argc && !region_file) {
+        if (output_file && !fai_name)
+            fai_name = output_file;
+
+        if (fai_build3(argv[optind], fai_name, gzi_name) != 0) {
+            if (fai_name)
+                fprintf(stderr, "[faidx] Could not build fai index %s", fai_name);
+            else
+                fprintf(stderr, "[faidx] Could not build fai index %s.fai", argv[optind]);
+
+            if (gzi_name)
+                fprintf(stderr, " or compressed index %s\n", gzi_name);
+            else
+                fprintf(stderr, "\n");
+
             return EXIT_FAILURE;
         }
+
         return 0;
     }
 
-    faidx_t *fai = fai_load_format(argv[optind], format);
+    faidx_t *fai = fai_load3_format(argv[optind], fai_name, gzi_name, FAI_CREATE, format);
+
+    if (!fai) {
+        if (fai_name)
+            fprintf(stderr, "[faidx] Could not load fai index %s", fai_name);
+        else
+            fprintf(stderr, "[faidx] Could not build fai index %s.fai", argv[optind]);
+
+        if (gzi_name)
+            fprintf(stderr, " or compressed index %s\n", gzi_name);
+        else
+            fprintf(stderr, "\n");
 
-    if ( !fai ) {
-        fprintf(stderr, "[faidx] Could not load fai index of %s\n", argv[optind]);
         return EXIT_FAILURE;
     }
 
diff --git a/samtools/faidx.c.pysam.c b/samtools/faidx.c.pysam.c
index e73e63b..0bc515b 100644
--- a/samtools/faidx.c.pysam.c
+++ b/samtools/faidx.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  faidx.c -- faidx subcommand.
 
-    Copyright (C) 2008, 2009, 2013, 2016, 2018-2019 Genome Research Ltd.
+    Copyright (C) 2008, 2009, 2013, 2016, 2018-2020 Genome Research Ltd.
     Portions copyright (C) 2011 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -200,14 +200,16 @@ static int read_regions_from_file(faidx_t *faid, hFILE *in_file, FILE *file, con
 
 static int usage(FILE *fp, enum fai_format_options format, int exit_status)
 {
-    char *tool, *file_type;
+    char *tool, *file_type, *index_name;
 
     if (format == FAI_FASTA) {
         tool = "faidx <file.fa|file.fa.gz>";
         file_type = "FASTA";
+        index_name = "file.fa";
     } else {
         tool = "fqidx <file.fq|file.fq.gz>";
         file_type = "FASTQ";
+        index_name = "file.fq";
     }
 
     fprintf(fp, "Usage: samtools %s [<reg> [...]]\n", tool);
@@ -221,8 +223,10 @@ static int usage(FILE *fp, enum fai_format_options format, int exit_status)
                 "                          TYPE = rc   for /rc on negative strand (default)\n"
                 "                                 no   for no strand indicator\n"
                 "                                 sign for (+) / (-)\n"
-                "                                 custom,<pos>,<neg> for custom indicator\n",
-                file_type, file_type);
+                "                                 custom,<pos>,<neg> for custom indicator\n"
+                "     --fai-idx      FILE  name of the index file (default %s.fai).\n"
+                "     --gzi-idx      FILE  name of compressed file index (default %s.gz.gzi).\n",
+                file_type, file_type, index_name, index_name);
 
 
     if (format == FAI_FASTA) {
@@ -243,6 +247,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
     char *pos_strand_name = ""; // Extension to add to name for +ve strand
     char *neg_strand_name = "/rc"; // Extension to add to name for -ve strand
     char *strand_names = NULL; // Used for custom strand annotation
+    char *fai_name = NULL; // specified index name
+    char *gzi_name = NULL; // specified compressed index name
     FILE* file_out = samtools_stdout;/* output stream */
 
     static const struct option lopts[] = {
@@ -254,6 +260,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
         { "fastq", no_argument,              NULL, 'f' },
         { "reverse-complement", no_argument, NULL, 'i' },
         { "mark-strand", required_argument, NULL, 1000 },
+        { "fai-idx", required_argument,     NULL, 1001 },
+        { "gzi-idx", required_argument,     NULL, 1002 },
         { NULL, 0, NULL, 0 }
     };
 
@@ -302,6 +310,8 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
                     return usage(samtools_stderr, format, EXIT_FAILURE);
                 }
                 break;
+            case 1001: fai_name = optarg; break;
+            case 1002: gzi_name = optarg; break;
             default:  break;
         }
     }
@@ -309,19 +319,40 @@ int faidx_core(int argc, char *argv[], enum fai_format_options format)
     if ( argc==optind )
         return usage(samtools_stdout, format, EXIT_SUCCESS);
 
-    if ( optind+1 == argc && !region_file)
-    {
-        if (fai_build(argv[optind]) != 0) {
-            fprintf(samtools_stderr, "[faidx] Could not build fai index %s.fai\n", argv[optind]);
+    if (optind+1 == argc && !region_file) {
+        if (output_file && !fai_name)
+            fai_name = output_file;
+
+        if (fai_build3(argv[optind], fai_name, gzi_name) != 0) {
+            if (fai_name)
+                fprintf(samtools_stderr, "[faidx] Could not build fai index %s", fai_name);
+            else
+                fprintf(samtools_stderr, "[faidx] Could not build fai index %s.fai", argv[optind]);
+
+            if (gzi_name)
+                fprintf(samtools_stderr, " or compressed index %s\n", gzi_name);
+            else
+                fprintf(samtools_stderr, "\n");
+
             return EXIT_FAILURE;
         }
+
         return 0;
     }
 
-    faidx_t *fai = fai_load_format(argv[optind], format);
+    faidx_t *fai = fai_load3_format(argv[optind], fai_name, gzi_name, FAI_CREATE, format);
+
+    if (!fai) {
+        if (fai_name)
+            fprintf(samtools_stderr, "[faidx] Could not load fai index %s", fai_name);
+        else
+            fprintf(samtools_stderr, "[faidx] Could not build fai index %s.fai", argv[optind]);
+
+        if (gzi_name)
+            fprintf(samtools_stderr, " or compressed index %s\n", gzi_name);
+        else
+            fprintf(samtools_stderr, "\n");
 
-    if ( !fai ) {
-        fprintf(samtools_stderr, "[faidx] Could not load fai index of %s\n", argv[optind]);
         return EXIT_FAILURE;
     }
 
diff --git a/samtools/htslib-1.10/LICENSE b/samtools/htslib-1.10/LICENSE
deleted file mode 100644
index f70e757..0000000
--- a/samtools/htslib-1.10/LICENSE
+++ /dev/null
@@ -1,69 +0,0 @@
-[Files in this distribution outwith the cram/ subdirectory are distributed
-according to the terms of the following MIT/Expat license.]
-
-The MIT/Expat License
-
-Copyright (C) 2012-2019 Genome Research Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-
-
-[Files within the cram/ subdirectory in this distribution are distributed
-according to the terms of the following Modified 3-Clause BSD license.]
-
-The Modified-BSD License
-
-Copyright (C) 2012-2019 Genome Research Ltd.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute
-   nor the names of its contributors may be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-[The use of a range of years within a copyright notice in this distribution
-should be interpreted as being equivalent to a list of years including the
-first and last year specified and all consecutive years between them.
-
-For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
-2011-2012" should be interpreted as being identical to a notice that reads
-"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
-that reads "Copyright (C) 2005-2012" should be interpreted as being identical
-to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
-2011, 2012".]
diff --git a/samtools/htslib-1.10/README b/samtools/htslib-1.10/README
deleted file mode 100644
index 4225bec..0000000
--- a/samtools/htslib-1.10/README
+++ /dev/null
@@ -1,5 +0,0 @@
-HTSlib is an implementation of a unified C library for accessing common file
-formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing
-data.  It is the core library used by samtools and bcftools.
-
-See INSTALL for building and installation instructions.
diff --git a/samtools/padding.c b/samtools/padding.c
index a769efe..11b098e 100644
--- a/samtools/padding.c
+++ b/samtools/padding.c
@@ -1,7 +1,7 @@
 /*  padding.c -- depad subcommand.
 
     Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2014-2016, 2019 Genome Research Ltd.
+    Copyright (C) 2014-2016, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -38,24 +38,38 @@ DEALINGS IN THE SOFTWARE.  */
 
 #define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
 
-// The one and only function needed from sam.c.
-// Explicitly here to avoid including bam.h translation layer.
-extern char *samfaipath(const char *fn_ref);
-
-static void replace_cigar(bam1_t *b, int n, uint32_t *cigar)
+static int replace_cigar(bam1_t *b, uint32_t n, uint32_t *cigar)
 {
+    int diff = 0;
     if (n != b->core.n_cigar) {
         int o = b->core.l_qname + b->core.n_cigar * 4;
-        if (b->l_data + (n - b->core.n_cigar) * 4 > b->m_data) {
-            b->m_data = b->l_data + (n - b->core.n_cigar) * 4;
-            kroundup32(b->m_data);
-            b->data = (uint8_t*)realloc(b->data, b->m_data);
+        if (n > b->core.n_cigar) {
+            diff = (n - b->core.n_cigar) * 4;
+            if ((INT_MAX - b->l_data)/4 < (n - b->core.n_cigar)) {
+                fprintf(stderr, "[depad] ERROR: BAM record too big\n");
+                return -1;
+            }
+            if (b->l_data + diff > b->m_data) {
+                b->m_data = b->l_data + diff;
+                kroundup32(b->m_data);
+                uint8_t *tmp = (uint8_t*)realloc(b->data, b->m_data);
+                if (!tmp) {
+                    fprintf(stderr, "[depad] ERROR: Memory allocation failure.\n");
+                    return -1;
+                }
+                b->data = tmp;
+            }
+        } else {
+            diff = -(int)((b->core.n_cigar - n) * 4);
         }
         memmove(b->data + b->core.l_qname + n * 4, b->data + o, b->l_data - o);
-        memcpy(b->data + b->core.l_qname, cigar, n * 4);
-        b->l_data += (n - b->core.n_cigar) * 4;
         b->core.n_cigar = n;
-    } else memcpy(b->data + b->core.l_qname, cigar, n * 4);
+    }
+
+    memcpy(b->data + b->core.l_qname, cigar, n * 4);
+    b->l_data += diff;
+
+    return 0;
 }
 
 #define write_cigar(_c, _n, _m, _v) do { \
@@ -195,7 +209,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
     kstring_t r, q;
     int r_tid = -1;
     uint32_t *cigar2 = 0;
-    int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
+    int ret = 0, *posmap = 0;
+    uint32_t n2 = 0, m2 = 0;
 
     b = bam_init1();
     if (!b) {
@@ -242,7 +257,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
                 }
             }
             write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
-            replace_cigar(b, n2, cigar2);
+            if (replace_cigar(b, n2, cigar2) < 0)
+                return -1;
             posmap = update_posmap(posmap, r);
         } else if (b->core.n_cigar > 0) {
             int i, k, op;
@@ -328,7 +344,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
             for (i = k = 0; i < n2; ++i)
                 if (cigar2[i]) cigar2[k++] = cigar2[i];
             n2 = k;
-            replace_cigar(b, n2, cigar2);
+            if (replace_cigar(b, n2, cigar2) < 0)
+                return -1;
         }
         /* Even unmapped reads can have a POS value, e.g. if their mate was mapped */
         if (b->core.pos != -1) b->core.pos = posmap[b->core.pos];
@@ -430,7 +447,7 @@ int main_pad2unpad(int argc, char *argv[])
     sam_hdr_t *h = 0, *h_fix = 0;
     faidx_t *fai = 0;
     int c, compress_level = -1, is_long_help = 0, no_pg = 0;
-    char in_mode[5], out_mode[6], *fn_out = 0, *fn_list = 0, *fn_out_idx = NULL;
+    char in_mode[5], out_mode[6], *fn_out = 0, *fn_fai = 0, *fn_out_idx = NULL;
     int ret=0;
     char *arg_list = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
@@ -477,8 +494,8 @@ int main_pad2unpad(int argc, char *argv[])
 
     // Load FASTA reference (also needed for SAM -> BAM if missing header)
     if (ga.reference) {
-        fn_list = samfaipath(ga.reference);
-        fai = fai_load(ga.reference);
+        fn_fai = fai_path(ga.reference);
+        fai = fai_load3(ga.reference, fn_fai, NULL, FAI_CREATE);
     }
     // open file handlers
     if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
@@ -486,8 +503,8 @@ int main_pad2unpad(int argc, char *argv[])
         ret = 1;
         goto depad_end;
     }
-    if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
-        fprintf(stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
+    if (fn_fai && hts_set_fai_filename(in, fn_fai) != 0) {
+        fprintf(stderr, "[depad] failed to load reference file \"%s\".\n", fn_fai);
         ret = 1;
         goto depad_end;
     }
@@ -570,7 +587,7 @@ depad_end:
         fprintf(stderr, "[depad] error on closing output file.\n");
         ret = 1;
     }
-    free(fn_list); free(fn_out);
+    free(fn_fai); free(fn_out);
     if (fn_out_idx)
         free(fn_out_idx);
     sam_global_args_free(&ga);
diff --git a/samtools/padding.c.pysam.c b/samtools/padding.c.pysam.c
index ecc3691..e90255f 100644
--- a/samtools/padding.c.pysam.c
+++ b/samtools/padding.c.pysam.c
@@ -3,7 +3,7 @@
 /*  padding.c -- depad subcommand.
 
     Copyright (C) 2011, 2012 Broad Institute.
-    Copyright (C) 2014-2016, 2019 Genome Research Ltd.
+    Copyright (C) 2014-2016, 2019-2020 Genome Research Ltd.
     Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -40,24 +40,38 @@ DEALINGS IN THE SOFTWARE.  */
 
 #define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
 
-// The one and only function needed from sam.c.
-// Explicitly here to avoid including bam.h translation layer.
-extern char *samfaipath(const char *fn_ref);
-
-static void replace_cigar(bam1_t *b, int n, uint32_t *cigar)
+static int replace_cigar(bam1_t *b, uint32_t n, uint32_t *cigar)
 {
+    int diff = 0;
     if (n != b->core.n_cigar) {
         int o = b->core.l_qname + b->core.n_cigar * 4;
-        if (b->l_data + (n - b->core.n_cigar) * 4 > b->m_data) {
-            b->m_data = b->l_data + (n - b->core.n_cigar) * 4;
-            kroundup32(b->m_data);
-            b->data = (uint8_t*)realloc(b->data, b->m_data);
+        if (n > b->core.n_cigar) {
+            diff = (n - b->core.n_cigar) * 4;
+            if ((INT_MAX - b->l_data)/4 < (n - b->core.n_cigar)) {
+                fprintf(samtools_stderr, "[depad] ERROR: BAM record too big\n");
+                return -1;
+            }
+            if (b->l_data + diff > b->m_data) {
+                b->m_data = b->l_data + diff;
+                kroundup32(b->m_data);
+                uint8_t *tmp = (uint8_t*)realloc(b->data, b->m_data);
+                if (!tmp) {
+                    fprintf(samtools_stderr, "[depad] ERROR: Memory allocation failure.\n");
+                    return -1;
+                }
+                b->data = tmp;
+            }
+        } else {
+            diff = -(int)((b->core.n_cigar - n) * 4);
         }
         memmove(b->data + b->core.l_qname + n * 4, b->data + o, b->l_data - o);
-        memcpy(b->data + b->core.l_qname, cigar, n * 4);
-        b->l_data += (n - b->core.n_cigar) * 4;
         b->core.n_cigar = n;
-    } else memcpy(b->data + b->core.l_qname, cigar, n * 4);
+    }
+
+    memcpy(b->data + b->core.l_qname, cigar, n * 4);
+    b->l_data += diff;
+
+    return 0;
 }
 
 #define write_cigar(_c, _n, _m, _v) do { \
@@ -197,7 +211,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
     kstring_t r, q;
     int r_tid = -1;
     uint32_t *cigar2 = 0;
-    int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
+    int ret = 0, *posmap = 0;
+    uint32_t n2 = 0, m2 = 0;
 
     b = bam_init1();
     if (!b) {
@@ -244,7 +259,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
                 }
             }
             write_cigar(cigar2, n2, m2, bam_cigar_gen(b->core.l_qseq, BAM_CMATCH));
-            replace_cigar(b, n2, cigar2);
+            if (replace_cigar(b, n2, cigar2) < 0)
+                return -1;
             posmap = update_posmap(posmap, r);
         } else if (b->core.n_cigar > 0) {
             int i, k, op;
@@ -330,7 +346,8 @@ int bam_pad2unpad(samFile *in, samFile *out,  sam_hdr_t *h, faidx_t *fai)
             for (i = k = 0; i < n2; ++i)
                 if (cigar2[i]) cigar2[k++] = cigar2[i];
             n2 = k;
-            replace_cigar(b, n2, cigar2);
+            if (replace_cigar(b, n2, cigar2) < 0)
+                return -1;
         }
         /* Even unmapped reads can have a POS value, e.g. if their mate was mapped */
         if (b->core.pos != -1) b->core.pos = posmap[b->core.pos];
@@ -432,7 +449,7 @@ int main_pad2unpad(int argc, char *argv[])
     sam_hdr_t *h = 0, *h_fix = 0;
     faidx_t *fai = 0;
     int c, compress_level = -1, is_long_help = 0, no_pg = 0;
-    char in_mode[5], out_mode[6], *fn_out = 0, *fn_list = 0, *fn_out_idx = NULL;
+    char in_mode[5], out_mode[6], *fn_out = 0, *fn_fai = 0, *fn_out_idx = NULL;
     int ret=0;
     char *arg_list = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
@@ -479,8 +496,8 @@ int main_pad2unpad(int argc, char *argv[])
 
     // Load FASTA reference (also needed for SAM -> BAM if missing header)
     if (ga.reference) {
-        fn_list = samfaipath(ga.reference);
-        fai = fai_load(ga.reference);
+        fn_fai = fai_path(ga.reference);
+        fai = fai_load3(ga.reference, fn_fai, NULL, FAI_CREATE);
     }
     // open file handlers
     if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
@@ -488,8 +505,8 @@ int main_pad2unpad(int argc, char *argv[])
         ret = 1;
         goto depad_end;
     }
-    if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
-        fprintf(samtools_stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
+    if (fn_fai && hts_set_fai_filename(in, fn_fai) != 0) {
+        fprintf(samtools_stderr, "[depad] failed to load reference file \"%s\".\n", fn_fai);
         ret = 1;
         goto depad_end;
     }
@@ -572,7 +589,7 @@ depad_end:
         fprintf(samtools_stderr, "[depad] error on closing output file.\n");
         ret = 1;
     }
-    free(fn_list); free(fn_out);
+    free(fn_fai); free(fn_out);
     if (fn_out_idx)
         free(fn_out_idx);
     sam_global_args_free(&ga);
diff --git a/samtools/phase.c b/samtools/phase.c
index 871e7c3..50f7a8f 100644
--- a/samtools/phase.c
+++ b/samtools/phase.c
@@ -583,6 +583,7 @@ static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat
 int main_phase(int argc, char *argv[])
 {
     int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
+    int status = EXIT_SUCCESS;
     const bam_pileup1_t *plp;
     bam_plp_t iter;
     nseq_t *seqs;
@@ -785,6 +786,12 @@ int main_phase(int argc, char *argv[])
             return 1;
         }
     }
+
+    if (n < 0) {
+        print_error("phase", "error reading from '%s'", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     sam_hdr_destroy(g.fp_hdr);
     bam_plp_destroy(iter);
     sam_close(g.fp);
@@ -809,5 +816,5 @@ int main_phase(int argc, char *argv[])
     }
     free(g.arg_list);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/phase.c.pysam.c b/samtools/phase.c.pysam.c
index 6357eab..13ab556 100644
--- a/samtools/phase.c.pysam.c
+++ b/samtools/phase.c.pysam.c
@@ -585,6 +585,7 @@ static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat
 int main_phase(int argc, char *argv[])
 {
     int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
+    int status = EXIT_SUCCESS;
     const bam_pileup1_t *plp;
     bam_plp_t iter;
     nseq_t *seqs;
@@ -787,6 +788,12 @@ int main_phase(int argc, char *argv[])
             return 1;
         }
     }
+
+    if (n < 0) {
+        print_error("phase", "error reading from '%s'", argv[optind]);
+        status = EXIT_FAILURE;
+    }
+
     sam_hdr_destroy(g.fp_hdr);
     bam_plp_destroy(iter);
     sam_close(g.fp);
@@ -811,5 +818,5 @@ int main_phase(int argc, char *argv[])
     }
     free(g.arg_list);
     sam_global_args_free(&ga);
-    return 0;
+    return status;
 }
diff --git a/samtools/sam_view.c b/samtools/sam_view.c
index c13aea8..515eaa5 100644
--- a/samtools/sam_view.c
+++ b/samtools/sam_view.c
@@ -1,6 +1,6 @@
 /*  sam_view.c -- SAM<->BAM<->CRAM conversion.
 
-    Copyright (C) 2009-2019 Genome Research Ltd.
+    Copyright (C) 2009-2021 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -37,20 +37,20 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/faidx.h"
 #include "htslib/khash.h"
 #include "htslib/thread_pool.h"
+#include "htslib/hts_expr.h"
 #include "samtools.h"
 #include "sam_opts.h"
 #include "bedidx.h"
 
-KHASH_SET_INIT_STR(rg)
-KHASH_SET_INIT_STR(tv)
+KHASH_SET_INIT_STR(str)
 
-typedef khash_t(rg) *rghash_t;
-typedef khash_t(tv) *tvhash_t;
+typedef khash_t(str) *strhash_t;
 
 // This structure contains the settings for a samview run
 typedef struct samview_settings {
-    rghash_t rghash;
-    tvhash_t tvhash;
+    strhash_t rghash;
+    strhash_t rnhash;
+    strhash_t tvhash;
     int min_mapQ;
     int flag_on;
     int flag_off;
@@ -65,13 +65,15 @@ typedef struct samview_settings {
     char** remove_aux;
     int multi_region;
     char* tag;
+    hts_filter_t *filter;
+    int remove_flag;
+    int add_flag;
 } samview_settings_t;
 
 
 // TODO Add declarations of these to a viable htslib or samtools header
 extern const char *bam_get_library(sam_hdr_t *header, const bam1_t *b);
 extern int bam_remove_B(bam1_t *b);
-extern char *samfaipath(const char *fn_ref);
 
 // Returns 0 to indicate read should be output 1 otherwise
 static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settings)
@@ -98,19 +100,39 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
     if (settings->rghash) {
         uint8_t *s = bam_aux_get(b, "RG");
         if (s) {
-            khint_t k = kh_get(rg, settings->rghash, (char*)(s + 1));
+            khint_t k = kh_get(str, settings->rghash, (char*)(s + 1));
             if (k == kh_end(settings->rghash)) return 1;
         }
     }
-    if (settings->tvhash && settings->tag) {
+    if (settings->tag) {
         uint8_t *s = bam_aux_get(b, settings->tag);
         if (s) {
-            khint_t k = kh_get(tv, settings->tvhash, (char*)(s + 1));
-            if (k == kh_end(settings->tvhash)) return 1;
+            if (settings->tvhash) {
+                char t[32], *val;
+                if (*s == 'i' || *s == 'I' || *s == 's' || *s == 'S' || *s == 'c' || *s == 'C') {
+                    int ret = snprintf(t, 32, "%"PRId64, bam_aux2i(s));
+                    if (ret > 0) val = t;
+                    else return 1;
+                } else if (*s == 'A') {
+                    t[0] = *(s+1);
+                    t[1] = 0;
+                    val = t;
+                } else {
+                    val = (char *)(s+1);
+                }
+                khint_t k = kh_get(str, settings->tvhash, val);
+                if (k == kh_end(settings->tvhash)) return 1;
+            }
         } else {
             return 1;
         }
     }
+    if (settings->rnhash) {
+        const char* rn = bam_get_qname(b);
+        if (!rn || kh_get(str, settings->rnhash, rn) == kh_end(settings->rnhash)) {
+            return 1;
+        }
+    }
     if (settings->library) {
         const char *p = bam_get_library((sam_hdr_t*)h, b);
         if (!p || strcmp(p, settings->library) != 0) return 1;
@@ -124,11 +146,43 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
             }
         }
     }
+
+    if (settings->filter && sam_passes_filter(h, b, settings->filter) < 1)
+        return 1;
+
     return 0;
 }
 
 static int usage(FILE *fp, int exit_status, int is_long_help);
 
+static int populate_lookup_from_file(const char *subcmd, strhash_t lookup, char *fn)
+{
+    FILE *fp;
+    char buf[1024];
+    int ret = 0;
+    fp = fopen(fn, "r");
+    if (fp == NULL) {
+        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
+        return -1;
+    }
+
+    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
+        char *d = strdup(buf);
+        if (d != NULL) {
+            kh_put(str, lookup, d, &ret);
+            if (ret == 0) free(d); /* Duplicate */
+        } else {
+            ret = -1;
+        }
+    }
+    if (ferror(fp)) ret = -1;
+    if (ret == -1) {
+        print_error_errno(subcmd, "failed to read \"%s\"", fn);
+    }
+    fclose(fp);
+    return (ret != -1) ? 0 : -1;
+}
+
 static int add_read_group_single(const char *subcmd, samview_settings_t *settings, char *name)
 {
     char *d = strdup(name);
@@ -137,11 +191,11 @@ static int add_read_group_single(const char *subcmd, samview_settings_t *setting
     if (d == NULL) goto err;
 
     if (settings->rghash == NULL) {
-        settings->rghash = kh_init(rg);
+        settings->rghash = kh_init(str);
         if (settings->rghash == NULL) goto err;
     }
 
-    kh_put(rg, settings->rghash, d, &ret);
+    kh_put(str, settings->rghash, d, &ret);
     if (ret == -1) goto err;
     if (ret ==  0) free(d); /* Duplicate */
     return 0;
@@ -152,40 +206,28 @@ static int add_read_group_single(const char *subcmd, samview_settings_t *setting
     return -1;
 }
 
-static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
+static int add_read_names_file(const char *subcmd, samview_settings_t *settings, char *fn)
 {
-    FILE *fp;
-    char buf[1024];
-    int ret = 0;
-    if (settings->rghash == NULL) {
-        settings->rghash = kh_init(rg);
-        if (settings->rghash == NULL) {
+    if (settings->rnhash == NULL) {
+        settings->rnhash = kh_init(str);
+        if (settings->rnhash == NULL) {
             perror(NULL);
             return -1;
         }
     }
+    return populate_lookup_from_file(subcmd, settings->rnhash, fn);
+}
 
-    fp = fopen(fn, "r");
-    if (fp == NULL) {
-        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
-        return -1;
-    }
-
-    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
-        char *d = strdup(buf);
-        if (d != NULL) {
-            kh_put(rg, settings->rghash, d, &ret);
-            if (ret == 0) free(d); /* Duplicate */
-        } else {
-            ret = -1;
+static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
+{
+    if (settings->rghash == NULL) {
+        settings->rghash = kh_init(str);
+        if (settings->rghash == NULL) {
+            perror(NULL);
+            return -1;
         }
     }
-    if (ferror(fp)) ret = -1;
-    if (ret == -1) {
-        print_error_errno(subcmd, "failed to read \"%s\"", fn);
-    }
-    fclose(fp);
-    return (ret != -1) ? 0 : -1;
+    return populate_lookup_from_file(subcmd, settings->rghash, fn);
 }
 
 static int add_tag_value_single(const char *subcmd, samview_settings_t *settings, char *name)
@@ -196,11 +238,11 @@ static int add_tag_value_single(const char *subcmd, samview_settings_t *settings
     if (d == NULL) goto err;
 
     if (settings->tvhash == NULL) {
-        settings->tvhash = kh_init(tv);
+        settings->tvhash = kh_init(str);
         if (settings->tvhash == NULL) goto err;
     }
 
-    kh_put(tv, settings->tvhash, d, &ret);
+    kh_put(str, settings->tvhash, d, &ret);
     if (ret == -1) goto err;
     if (ret ==  0) free(d); /* Duplicate */
     return 0;
@@ -213,38 +255,14 @@ static int add_tag_value_single(const char *subcmd, samview_settings_t *settings
 
 static int add_tag_values_file(const char *subcmd, samview_settings_t *settings, char *fn)
 {
-    FILE *fp;
-    char buf[1024];
-    int ret = 0;
     if (settings->tvhash == NULL) {
-        settings->tvhash = kh_init(tv);
+        settings->tvhash = kh_init(str);
         if (settings->tvhash == NULL) {
             perror(NULL);
             return -1;
         }
     }
-
-    fp = fopen(fn, "r");
-    if (fp == NULL) {
-        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
-        return -1;
-    }
-
-    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
-        char *d = strdup(buf);
-        if (d != NULL) {
-            kh_put(tv, settings->tvhash, d, &ret);
-            if (ret == 0) free(d); /* Duplicate */
-        } else {
-            ret = -1;
-        }
-    }
-    if (ferror(fp)) ret = -1;
-    if (ret == -1) {
-        print_error_errno(subcmd, "failed to read \"%s\"", fn);
-    }
-    fclose(fp);
-    return (ret != -1) ? 0 : -1;
+    return populate_lookup_from_file(subcmd, settings->tvhash, fn);
 }
 
 static inline int check_sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b, const char *fname, int *retp)
@@ -259,6 +277,18 @@ static inline int check_sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t
     return r;
 }
 
+static inline void change_flag(bam1_t *b, samview_settings_t *settings)
+{
+    if (settings->add_flag)
+        b->core.flag |= settings->add_flag;
+
+    if (settings->remove_flag)
+        b->core.flag &= ~settings->remove_flag;
+}
+
+// Make mnemonic distinct values for longoption-only options
+#define LONGOPT(c)  ((c) + 128)
+
 int main_samview(int argc, char *argv[])
 {
     int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0, has_index_file = 0, no_pg = 0;
@@ -266,8 +296,8 @@ int main_samview(int argc, char *argv[])
     samFile *in = 0, *out = 0, *un_out=0;
     FILE *fp_out = NULL;
     sam_hdr_t *header = NULL;
-    char out_mode[5], out_un_mode[5], *out_format = "";
-    char *fn_in = 0, *fn_idx_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
+    char out_mode[6] = {0}, out_un_mode[6] = {0}, *out_format = "";
+    char *fn_in = 0, *fn_idx_in = 0, *fn_out = 0, *fn_fai = 0, *q, *fn_un_out = 0;
     char *fn_out_idx = NULL, *fn_un_out_idx = NULL, *arg_list = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     htsThreadPool p = {NULL, 0};
@@ -288,12 +318,59 @@ int main_samview(int argc, char *argv[])
         .library = NULL,
         .bed = NULL,
         .multi_region = 0,
-        .tag = NULL
+        .tag = NULL,
+        .filter = NULL,
+        .remove_flag = 0,
+        .add_flag = 0
     };
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T', '@'),
-        {"no-PG", no_argument, NULL, 1},
+        {"add-flags", required_argument, NULL, LONGOPT('a')},
+        {"bam", no_argument, NULL, 'b'},
+        {"count", no_argument, NULL, 'c'},
+        {"cram", no_argument, NULL, 'C'},
+        {"customised-index", no_argument, NULL, 'X'},
+        {"customized-index", no_argument, NULL, 'X'},
+        {"excl-flags", required_argument, NULL, 'F'},
+        {"exclude-flags", required_argument, NULL, 'F'},
+        {"expr", required_argument, NULL, 'e'},
+        {"expression", required_argument, NULL, 'e'},
+        {"fai-reference", required_argument, NULL, 't'},
+        {"fast", no_argument, NULL, '1'},
+        {"header-only", no_argument, NULL, 'H'},
+        {"help", no_argument, NULL, LONGOPT('?')},
+        {"library", required_argument, NULL, 'l'},
+        {"min-mapq", required_argument, NULL, 'q'},
+        {"min-MQ", required_argument, NULL, 'q'},
+        {"min-mq", required_argument, NULL, 'q'},
+        {"min-qlen", required_argument, NULL, 'm'},
+        {"no-header", no_argument, NULL, LONGOPT('H')},
+        {"no-PG", no_argument, NULL, LONGOPT('P')},
+        {"output", required_argument, NULL, 'o'},
+        {"output-unselected", required_argument, NULL, 'U'},
+        {"QNAME-file", required_argument, NULL, 'N'},
+        {"qname-file", required_argument, NULL, 'N'},
+        {"read-group", required_argument, NULL, 'r'},
+        {"read-group-file", required_argument, NULL, 'R'},
+        {"readgroup", required_argument, NULL, 'r'},
+        {"readgroup-file", required_argument, NULL, 'R'},
+        {"region-file", required_argument, NULL, LONGOPT('L')},
+        {"regions-file", required_argument, NULL, LONGOPT('L')},
+        {"remove-B", no_argument, NULL, 'B'},
+        {"remove-flags", required_argument, NULL, LONGOPT('r')},
+        {"remove-tag", required_argument, NULL, 'x'},
+        {"require-flags", required_argument, NULL, 'f'},
+        {"subsample", required_argument, NULL, LONGOPT('s')},
+        {"subsample-seed", required_argument, NULL, LONGOPT('S')},
+        {"tag", required_argument, NULL, 'd'},
+        {"tag-file", required_argument, NULL, 'D'},
+        {"target-file", required_argument, NULL, 'L'},
+        {"targets-file", required_argument, NULL, 'L'},
+        {"uncompressed", no_argument, NULL, 'u'},
+        {"unoutput", required_argument, NULL, 'U'},
+        {"use-index", no_argument, NULL, 'M'},
+        {"with-header", no_argument, NULL, 'h'},
         { NULL, 0, NULL, 0 }
     };
 
@@ -310,16 +387,11 @@ int main_samview(int argc, char *argv[])
     opterr = 0;
 
     while ((c = getopt_long(argc, argv,
-                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:T:R:d:D:L:s:@:m:x:U:MX",
+                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:T:R:N:d:D:L:s:@:m:x:U:MXe:",
                             lopts, NULL)) >= 0) {
         switch (c) {
         case 's':
-            if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
-                // Convert likely user input 0,1,2,... to pseudo-random
-                // values with more entropy and more bits set
-                srand(settings.subsam_seed);
-                settings.subsam_seed = rand();
-            }
+            settings.subsam_seed = strtol(optarg, &q, 10);
             if (q && *q == '.') {
                 settings.subsam_frac = strtod(q, &q);
                 if (*q) ret = 1;
@@ -332,24 +404,36 @@ int main_samview(int argc, char *argv[])
                 goto view_end;
             }
             break;
+        case LONGOPT('s'):
+            settings.subsam_frac = strtod(optarg, &q);
+            if (*q || settings.subsam_frac < 0.0 || settings.subsam_frac > 1.0) {
+                print_error("view", "Incorrect sampling argument \"%s\"", optarg);
+                goto view_end;
+            }
+            break;
+        case LONGOPT('S'): settings.subsam_seed = atoi(optarg); break;
         case 'm': settings.min_qlen = atoi(optarg); break;
         case 'c': is_count = 1; break;
         case 'S': break;
         case 'b': out_format = "b"; break;
         case 'C': out_format = "c"; break;
-        case 't': fn_list = strdup(optarg); break;
+        case 't': fn_fai = strdup(optarg); break;
         case 'h': is_header = 1; break;
         case 'H': is_header_only = 1; break;
+        case LONGOPT('H'): is_header = is_header_only = 0; break;
         case 'o': fn_out = strdup(optarg); break;
         case 'U': fn_un_out = strdup(optarg); break;
         case 'X': has_index_file = 1; break;
-        case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
-        case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
-        case 'G': settings.flag_alloff |= strtol(optarg, 0, 0); break;
+        case 'f': settings.flag_on |= bam_str2flag(optarg); break;
+        case 'F': settings.flag_off |= bam_str2flag(optarg); break;
+        case 'G': settings.flag_alloff |= bam_str2flag(optarg); break;
         case 'q': settings.min_mapQ = atoi(optarg); break;
         case 'u': compress_level = 0; break;
         case '1': compress_level = 1; break;
         case 'l': settings.library = strdup(optarg); break;
+        case LONGOPT('L'):
+            settings.multi_region = 1;
+            // fall through
         case 'L':
             if ((settings.bed = bed_read(optarg)) == NULL) {
                 print_error_errno("view", "Could not read file \"%s\"", optarg);
@@ -369,8 +453,14 @@ int main_samview(int argc, char *argv[])
                 goto view_end;
             }
             break;
+        case 'N':
+            if (add_read_names_file("view", &settings, optarg) != 0) {
+                ret = 1;
+                goto view_end;
+            }
+            break;
         case 'd':
-            if (strlen(optarg) < 4 || optarg[2] != ':') {
+            if (strlen(optarg) < 2 || (strlen(optarg) > 2 && optarg[2] != ':')) {
                 print_error_errno("view", "Invalid \"tag:value\" option: \"%s\"", optarg);
                 ret = 1;
                 goto view_end;
@@ -391,7 +481,8 @@ int main_samview(int argc, char *argv[])
                 memcpy(settings.tag, optarg, 2);
             }
 
-            if (add_tag_value_single("view", &settings, optarg+3) != 0) {
+            if (strlen(optarg) > 3 && add_tag_value_single("view", &settings, optarg+3) != 0) {
+                print_error("view", "Could not add tag:value \"%s\"", optarg);
                 ret = 1;
                 goto view_end;
             }
@@ -399,7 +490,7 @@ int main_samview(int argc, char *argv[])
         case 'D':
             // Allow ";" as delimiter besides ":" to support MinGW CLI POSIX
             // path translation as described at:
-            //   http://www.mingw.org/wiki/Posix_path_conversion
+            // http://www.mingw.org/wiki/Posix_path_conversion
             if (strlen(optarg) < 4 || (optarg[2] != ':' && optarg[2] != ';')) {
                 print_error_errno("view", "Invalid \"tag:file\" option: \"%s\"", optarg);
                 ret = 1;
@@ -430,6 +521,8 @@ int main_samview(int argc, char *argv[])
         //case 'x': out_format = "x"; break;
         //case 'X': out_format = "X"; break;
                  */
+        case LONGOPT('?'):
+            return usage(stdout, EXIT_SUCCESS, 1);
         case '?':
             if (optopt == '?') {  // '-?' appeared on command line
                 return usage(stdout, EXIT_SUCCESS, 1);
@@ -451,7 +544,7 @@ int main_samview(int argc, char *argv[])
         case 'x':
             {
                 if (strlen(optarg) != 2) {
-                    fprintf(stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
+                    print_error("main_samview", "Error parsing -x auxiliary tags should be exactly two characters long.");
                     return usage(stderr, EXIT_FAILURE, 0);
                 }
                 settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
@@ -459,13 +552,22 @@ int main_samview(int argc, char *argv[])
             }
             break;
         case 'M': settings.multi_region = 1; break;
-        case 1: no_pg = 1; break;
+        case LONGOPT('P'): no_pg = 1; break;
+        case 'e':
+            if (!(settings.filter = hts_filter_init(optarg))) {
+                print_error("main_samview", "Couldn't initialise filter");
+                return 1;
+            }
+            break;
+        case LONGOPT('r'): settings.remove_flag |= bam_str2flag(optarg); break;
+        case LONGOPT('a'): settings.add_flag |= bam_str2flag(optarg); break;
         default:
             if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
                 return usage(stderr, EXIT_FAILURE, 0);
             break;
         }
     }
+    if (fn_fai == 0 && ga.reference) fn_fai = fai_path(ga.reference);
     if (compress_level >= 0 && !*out_format) out_format = "b";
     if (is_header_only) is_header = 1;
     // File format auto-detection first
@@ -474,8 +576,7 @@ int main_samview(int argc, char *argv[])
     // Overridden by manual -b, -C
     if (*out_format)
         out_mode[1] = out_un_mode[1] = *out_format;
-    out_mode[2] = out_un_mode[2] = '\0';
-    // out_(un_)mode now 1 or 2 bytes long, followed by nul.
+    // out_(un_)mode now 1, 2 or 3 bytes long, followed by nul.
     if (compress_level >= 0) {
         char tmp[2];
         tmp[0] = compress_level + '0'; tmp[1] = '\0';
@@ -486,20 +587,23 @@ int main_samview(int argc, char *argv[])
         print_error("view", "No input provided or missing option argument.");
         return usage(stderr, EXIT_FAILURE, 0); // potential memory leak...
     }
+    if (settings.subsam_seed != 0) {
+        // Convert likely user input 1,2,... to pseudo-random
+        // values with more entropy and more bits set
+        srand(settings.subsam_seed);
+        settings.subsam_seed = rand();
+    }
 
     fn_in = (optind < argc)? argv[optind] : "-";
-    // generate the fn_list if necessary
-    if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference);
-    // open file handlers
     if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
         print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
         ret = 1;
         goto view_end;
     }
 
-    if (fn_list) {
-        if (hts_set_fai_filename(in, fn_list) != 0) {
-            fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+    if (fn_fai) {
+        if (hts_set_fai_filename(in, fn_fai) != 0) {
+            fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
             ret = 1;
             goto view_end;
         }
@@ -518,9 +622,9 @@ int main_samview(int argc, char *argv[])
             ret = 1;
             goto view_end;
         }
-        if (fn_list) {
-            if (hts_set_fai_filename(out, fn_list) != 0) {
-                fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+        if (fn_fai) {
+            if (hts_set_fai_filename(out, fn_fai) != 0) {
+                fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
                 ret = 1;
                 goto view_end;
             }
@@ -565,9 +669,9 @@ int main_samview(int argc, char *argv[])
                 ret = 1;
                 goto view_end;
             }
-            if (fn_list) {
-                if (hts_set_fai_filename(un_out, fn_list) != 0) {
-                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+            if (fn_fai) {
+                if (hts_set_fai_filename(un_out, fn_fai) != 0) {
+                    fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
                     ret = 1;
                     goto view_end;
                 }
@@ -654,7 +758,10 @@ int main_samview(int argc, char *argv[])
                         // fetch alignments
                         while ((result = sam_itr_multi_next(in, iter, b)) >= 0) {
                             if (!process_aln(header, b, &settings)) {
-                                if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                                if (!is_count) {
+                                    change_flag(b, &settings);
+                                    if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                                }
                                 count++;
                             } else {
                                 if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
@@ -682,16 +789,20 @@ int main_samview(int argc, char *argv[])
         if ((has_index_file && optind >= argc - 2) || (!has_index_file && optind >= argc - 1)) { // convert/print the entire file
             bam1_t *b = bam_init1();
             int r;
+            errno = 0;
             while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
                 if (!process_aln(header, b, &settings)) {
-                    if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                    if (!is_count) {
+                        change_flag(b, &settings);
+                        if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                    }
                     count++;
                 } else {
                     if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
                 }
             }
             if (r < -1) {
-                fprintf(stderr, "[main_samview] truncated file.\n");
+                print_error_errno("view", "error reading file \"%s\"", fn_in);
                 ret = 1;
             }
             bam_destroy1(b);
@@ -722,7 +833,10 @@ int main_samview(int argc, char *argv[])
                 // fetch alignments
                 while ((result = sam_itr_next(in, iter, b)) >= 0) {
                     if (!process_aln(header, b, &settings)) {
-                        if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                        if (!is_count) {
+                            change_flag(b, &settings);
+                            if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                        }
                         count++;
                     } else {
                         if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
@@ -766,7 +880,7 @@ view_end:
     if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);
     if (fp_out) fclose(fp_out);
 
-    free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
+    free(fn_fai); free(fn_out); free(settings.library);  free(fn_un_out);
     sam_global_args_free(&ga);
     if ( header ) sam_hdr_destroy(header);
     if (settings.bed) bed_destroy(settings.bed);
@@ -774,13 +888,19 @@ view_end:
         khint_t k;
         for (k = 0; k < kh_end(settings.rghash); ++k)
             if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
-        kh_destroy(rg, settings.rghash);
+        kh_destroy(str, settings.rghash);
+    }
+    if (settings.rnhash) {
+        khint_t k;
+        for (k = 0; k < kh_end(settings.rnhash); ++k)
+            if (kh_exist(settings.rnhash, k)) free((char*)kh_key(settings.rnhash, k));
+        kh_destroy(str, settings.rnhash);
     }
     if (settings.tvhash) {
         khint_t k;
         for (k = 0; k < kh_end(settings.tvhash); ++k)
             if (kh_exist(settings.tvhash, k)) free((char*)kh_key(settings.tvhash, k));
-        kh_destroy(tv, settings.tvhash);
+        kh_destroy(str, settings.tvhash);
     }
     if (settings.remove_aux_len) {
         free(settings.remove_aux);
@@ -788,6 +908,8 @@ view_end:
     if (settings.tag) {
         free(settings.tag);
     }
+    if (settings.filter)
+        hts_filter_free(settings.filter);
 
     if (p.pool)
         hts_tpool_destroy(p.pool);
@@ -807,47 +929,52 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "\n"
 "Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
 "\n"
-"Options:\n"
-// output options
-"  -b       output BAM\n"
-"  -C       output CRAM (requires -T)\n"
-"  -1       use fast BAM compression (implies -b)\n"
-"  -u       uncompressed BAM output (implies -b)\n"
-"  -h       include header in SAM output\n"
-"  -H       print SAM header only (no alignments)\n"
-"  -c       print only the count of matching records\n"
-"  -o FILE  output file name [stdout]\n"
-"  -U FILE  output reads not selected by filters to FILE [null]\n"
-// extra input
-"  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
-"  -X       include customized index file\n"
-// read filters
-"  -L FILE  only include reads overlapping this BED FILE [null]\n"
-"  -r STR   only include reads in read group STR [null]\n"
-"  -R FILE  only include reads with read group listed in FILE [null]\n"
-"  -d STR:STR\n"
-"           only include reads with tag STR and associated value STR [null]\n"
-"  -D STR:FILE\n"
-"           only include reads with tag STR and associated values listed in\n"
-"           FILE [null]\n"
-"  -q INT   only include reads with mapping quality >= INT [0]\n"
-"  -l STR   only include reads in library STR [null]\n"
-"  -m INT   only include reads with number of CIGAR operations consuming\n"
-"           query sequence >= INT [0]\n"
-"  -f INT   only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
-"  -F INT   only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
-"  -G INT   only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
-"  -s FLOAT subsample reads (given INT.FRAC option value, 0.FRAC is the\n"
-"           fraction of templates/read pairs to keep; INT part sets seed)\n"
-"  -M       use the multi-region iterator (increases the speed, removes\n"
-"           duplicates and outputs the reads as they are ordered in the file)\n"
-// read processing
-"  -x STR   read tag to strip (repeatable) [null]\n"
-"  -B       collapse the backward CIGAR operation\n"
-// general options
-"  -?       print long help, including note about region specification\n"
-"  -S       ignored (input format is auto-detected)\n"
-"  --no-PG  do not add a PG line\n");
+"Output options:\n"
+"  -b, --bam                  Output BAM\n"
+"  -C, --cram                 Output CRAM (requires -T)\n"
+"  -1, --fast                 Use fast BAM compression (implies --bam)\n"
+"  -u, --uncompressed         Uncompressed BAM output (implies --bam)\n"
+"  -h, --with-header          Include header in SAM output\n"
+"  -H, --header-only          Print SAM header only (no alignments)\n"
+"      --no-header            Print SAM alignment records only [default]\n"
+"  -c, --count                Print only the count of matching records\n"
+"  -o, --output FILE          Write output to FILE [standard output]\n"
+"  -U, --unoutput FILE, --output-unselected FILE\n"
+"                             Output reads not selected by filters to FILE\n"
+"Input options:\n"
+"  -t, --fai-reference FILE   FILE listing reference names and lengths\n"
+"  -M, --use-index            Use index and multi-region iterator for regions\n"
+"      --region[s]-file FILE  Use index to include only reads overlapping FILE\n"
+"  -X, --customized-index     Expect extra index file argument after <in.bam>\n"
+"\n"
+"Filtering options (Only include in output reads that...):\n"
+"  -L, --target[s]-file FILE  ...overlap (BED) regions in FILE\n"
+"  -r, --read-group STR       ...are in read group STR\n"
+"  -R, --read-group-file FILE ...are in a read group listed in FILE\n"
+"  -N, --qname-file FILE      ...whose read name is listed in FILE\n"
+"  -d, --tag STR1[:STR2]      ...have a tag STR1 (with associated value STR2)\n"
+"  -D, --tag-file STR:FILE    ...have a tag STR whose value is listed in FILE\n"
+"  -q, --min-MQ INT           ...have mapping quality >= INT\n"
+"  -l, --library STR          ...are in library STR\n"
+"  -m, --min-qlen INT         ...cover >= INT query bases (as measured via CIGAR)\n"
+"  -e, --expr STR             ...match the filter expression STR\n"
+"  -f, --require-flags FLAG   ...have all of the FLAGs present\n"             //   F&x == x
+"  -F, --excl[ude]-flags FLAG ...have none of the FLAGs present\n"            //   F&x == 0
+"  -G FLAG                    EXCLUDE reads with all of the FLAGs present\n"  // !(F&x == x)  TODO long option
+"      --subsample FLOAT      Keep only FLOAT fraction of templates/read pairs\n"
+"      --subsample-seed INT   Influence WHICH reads are kept in subsampling [0]\n"
+"  -s INT.FRAC                Same as --subsample 0.FRAC --subsample-seed INT\n"
+"\n"
+"Processing options:\n"
+"      --add-flags FLAG       Add FLAGs to reads\n"
+"      --remove-flags FLAG    Remove FLAGs from reads\n"
+"  -x, --remove-tag STR       Strip tag STR from reads (option may be repeated)\n"
+"  -B, --remove-B             Collapse the backward CIGAR operation\n"
+"\n"
+"General options:\n"
+"  -?, --help   Print long help, including note about region specification\n"
+"  -S           Ignored (input format is auto-detected)\n"
+"      --no-PG  Do not add a PG line\n");
 
     sam_global_opt_help(fp, "-.O.T@..");
     fprintf(fp, "\n");
@@ -887,23 +1014,16 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "\n"
 "6. Option `-u' is preferred over `-b' when the output is piped to\n"
 "   another samtools command.\n"
+"\n"
+"7. Option `-M`/`--use-index` causes overlaps with `-L` BED file regions and\n"
+"   command-line region arguments to be computed using the multi-region iterator\n"
+"   and an index. This increases speed, omits duplicates, and outputs the reads\n"
+"   as they are ordered in the input SAM/BAM/CRAM file.\n"
+"\n"
+"8. Options `-L`/`--target[s]-file` and `--region[s]-file` may not be used\n"
+"   together. `--region[s]-file FILE` is simply equivalent to `-M -L FILE`,\n"
+"   so using both causes one of the specified BED files to be ignored.\n"
 "\n");
 
     return exit_status;
 }
-
-int main_import(int argc, char *argv[])
-{
-    int argc2, ret;
-    char **argv2;
-    if (argc != 4) {
-        fprintf(stderr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
-        return 1;
-    }
-    argc2 = 6;
-    argv2 = calloc(6, sizeof(char*));
-    argv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];
-    ret = main_samview(argc2, argv2);
-    free(argv2);
-    return ret;
-}
diff --git a/samtools/sam_view.c.pysam.c b/samtools/sam_view.c.pysam.c
index 6153ee8..42c42e4 100644
--- a/samtools/sam_view.c.pysam.c
+++ b/samtools/sam_view.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  sam_view.c -- SAM<->BAM<->CRAM conversion.
 
-    Copyright (C) 2009-2019 Genome Research Ltd.
+    Copyright (C) 2009-2021 Genome Research Ltd.
     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -39,20 +39,20 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/faidx.h"
 #include "htslib/khash.h"
 #include "htslib/thread_pool.h"
+#include "htslib/hts_expr.h"
 #include "samtools.h"
 #include "sam_opts.h"
 #include "bedidx.h"
 
-KHASH_SET_INIT_STR(rg)
-KHASH_SET_INIT_STR(tv)
+KHASH_SET_INIT_STR(str)
 
-typedef khash_t(rg) *rghash_t;
-typedef khash_t(tv) *tvhash_t;
+typedef khash_t(str) *strhash_t;
 
 // This structure contains the settings for a samview run
 typedef struct samview_settings {
-    rghash_t rghash;
-    tvhash_t tvhash;
+    strhash_t rghash;
+    strhash_t rnhash;
+    strhash_t tvhash;
     int min_mapQ;
     int flag_on;
     int flag_off;
@@ -67,13 +67,15 @@ typedef struct samview_settings {
     char** remove_aux;
     int multi_region;
     char* tag;
+    hts_filter_t *filter;
+    int remove_flag;
+    int add_flag;
 } samview_settings_t;
 
 
 // TODO Add declarations of these to a viable htslib or samtools header
 extern const char *bam_get_library(sam_hdr_t *header, const bam1_t *b);
 extern int bam_remove_B(bam1_t *b);
-extern char *samfaipath(const char *fn_ref);
 
 // Returns 0 to indicate read should be output 1 otherwise
 static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settings)
@@ -100,19 +102,39 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
     if (settings->rghash) {
         uint8_t *s = bam_aux_get(b, "RG");
         if (s) {
-            khint_t k = kh_get(rg, settings->rghash, (char*)(s + 1));
+            khint_t k = kh_get(str, settings->rghash, (char*)(s + 1));
             if (k == kh_end(settings->rghash)) return 1;
         }
     }
-    if (settings->tvhash && settings->tag) {
+    if (settings->tag) {
         uint8_t *s = bam_aux_get(b, settings->tag);
         if (s) {
-            khint_t k = kh_get(tv, settings->tvhash, (char*)(s + 1));
-            if (k == kh_end(settings->tvhash)) return 1;
+            if (settings->tvhash) {
+                char t[32], *val;
+                if (*s == 'i' || *s == 'I' || *s == 's' || *s == 'S' || *s == 'c' || *s == 'C') {
+                    int ret = snprintf(t, 32, "%"PRId64, bam_aux2i(s));
+                    if (ret > 0) val = t;
+                    else return 1;
+                } else if (*s == 'A') {
+                    t[0] = *(s+1);
+                    t[1] = 0;
+                    val = t;
+                } else {
+                    val = (char *)(s+1);
+                }
+                khint_t k = kh_get(str, settings->tvhash, val);
+                if (k == kh_end(settings->tvhash)) return 1;
+            }
         } else {
             return 1;
         }
     }
+    if (settings->rnhash) {
+        const char* rn = bam_get_qname(b);
+        if (!rn || kh_get(str, settings->rnhash, rn) == kh_end(settings->rnhash)) {
+            return 1;
+        }
+    }
     if (settings->library) {
         const char *p = bam_get_library((sam_hdr_t*)h, b);
         if (!p || strcmp(p, settings->library) != 0) return 1;
@@ -126,11 +148,43 @@ static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settin
             }
         }
     }
+
+    if (settings->filter && sam_passes_filter(h, b, settings->filter) < 1)
+        return 1;
+
     return 0;
 }
 
 static int usage(FILE *fp, int exit_status, int is_long_help);
 
+static int populate_lookup_from_file(const char *subcmd, strhash_t lookup, char *fn)
+{
+    FILE *fp;
+    char buf[1024];
+    int ret = 0;
+    fp = fopen(fn, "r");
+    if (fp == NULL) {
+        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
+        return -1;
+    }
+
+    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
+        char *d = strdup(buf);
+        if (d != NULL) {
+            kh_put(str, lookup, d, &ret);
+            if (ret == 0) free(d); /* Duplicate */
+        } else {
+            ret = -1;
+        }
+    }
+    if (ferror(fp)) ret = -1;
+    if (ret == -1) {
+        print_error_errno(subcmd, "failed to read \"%s\"", fn);
+    }
+    fclose(fp);
+    return (ret != -1) ? 0 : -1;
+}
+
 static int add_read_group_single(const char *subcmd, samview_settings_t *settings, char *name)
 {
     char *d = strdup(name);
@@ -139,11 +193,11 @@ static int add_read_group_single(const char *subcmd, samview_settings_t *setting
     if (d == NULL) goto err;
 
     if (settings->rghash == NULL) {
-        settings->rghash = kh_init(rg);
+        settings->rghash = kh_init(str);
         if (settings->rghash == NULL) goto err;
     }
 
-    kh_put(rg, settings->rghash, d, &ret);
+    kh_put(str, settings->rghash, d, &ret);
     if (ret == -1) goto err;
     if (ret ==  0) free(d); /* Duplicate */
     return 0;
@@ -154,40 +208,28 @@ static int add_read_group_single(const char *subcmd, samview_settings_t *setting
     return -1;
 }
 
-static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
+static int add_read_names_file(const char *subcmd, samview_settings_t *settings, char *fn)
 {
-    FILE *fp;
-    char buf[1024];
-    int ret = 0;
-    if (settings->rghash == NULL) {
-        settings->rghash = kh_init(rg);
-        if (settings->rghash == NULL) {
+    if (settings->rnhash == NULL) {
+        settings->rnhash = kh_init(str);
+        if (settings->rnhash == NULL) {
             perror(NULL);
             return -1;
         }
     }
+    return populate_lookup_from_file(subcmd, settings->rnhash, fn);
+}
 
-    fp = fopen(fn, "r");
-    if (fp == NULL) {
-        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
-        return -1;
-    }
-
-    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
-        char *d = strdup(buf);
-        if (d != NULL) {
-            kh_put(rg, settings->rghash, d, &ret);
-            if (ret == 0) free(d); /* Duplicate */
-        } else {
-            ret = -1;
+static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
+{
+    if (settings->rghash == NULL) {
+        settings->rghash = kh_init(str);
+        if (settings->rghash == NULL) {
+            perror(NULL);
+            return -1;
         }
     }
-    if (ferror(fp)) ret = -1;
-    if (ret == -1) {
-        print_error_errno(subcmd, "failed to read \"%s\"", fn);
-    }
-    fclose(fp);
-    return (ret != -1) ? 0 : -1;
+    return populate_lookup_from_file(subcmd, settings->rghash, fn);
 }
 
 static int add_tag_value_single(const char *subcmd, samview_settings_t *settings, char *name)
@@ -198,11 +240,11 @@ static int add_tag_value_single(const char *subcmd, samview_settings_t *settings
     if (d == NULL) goto err;
 
     if (settings->tvhash == NULL) {
-        settings->tvhash = kh_init(tv);
+        settings->tvhash = kh_init(str);
         if (settings->tvhash == NULL) goto err;
     }
 
-    kh_put(tv, settings->tvhash, d, &ret);
+    kh_put(str, settings->tvhash, d, &ret);
     if (ret == -1) goto err;
     if (ret ==  0) free(d); /* Duplicate */
     return 0;
@@ -215,38 +257,14 @@ static int add_tag_value_single(const char *subcmd, samview_settings_t *settings
 
 static int add_tag_values_file(const char *subcmd, samview_settings_t *settings, char *fn)
 {
-    FILE *fp;
-    char buf[1024];
-    int ret = 0;
     if (settings->tvhash == NULL) {
-        settings->tvhash = kh_init(tv);
+        settings->tvhash = kh_init(str);
         if (settings->tvhash == NULL) {
             perror(NULL);
             return -1;
         }
     }
-
-    fp = fopen(fn, "r");
-    if (fp == NULL) {
-        print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
-        return -1;
-    }
-
-    while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
-        char *d = strdup(buf);
-        if (d != NULL) {
-            kh_put(tv, settings->tvhash, d, &ret);
-            if (ret == 0) free(d); /* Duplicate */
-        } else {
-            ret = -1;
-        }
-    }
-    if (ferror(fp)) ret = -1;
-    if (ret == -1) {
-        print_error_errno(subcmd, "failed to read \"%s\"", fn);
-    }
-    fclose(fp);
-    return (ret != -1) ? 0 : -1;
+    return populate_lookup_from_file(subcmd, settings->tvhash, fn);
 }
 
 static inline int check_sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b, const char *fname, int *retp)
@@ -261,6 +279,18 @@ static inline int check_sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t
     return r;
 }
 
+static inline void change_flag(bam1_t *b, samview_settings_t *settings)
+{
+    if (settings->add_flag)
+        b->core.flag |= settings->add_flag;
+
+    if (settings->remove_flag)
+        b->core.flag &= ~settings->remove_flag;
+}
+
+// Make mnemonic distinct values for longoption-only options
+#define LONGOPT(c)  ((c) + 128)
+
 int main_samview(int argc, char *argv[])
 {
     int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0, has_index_file = 0, no_pg = 0;
@@ -268,8 +298,8 @@ int main_samview(int argc, char *argv[])
     samFile *in = 0, *out = 0, *un_out=0;
     FILE *fp_out = NULL;
     sam_hdr_t *header = NULL;
-    char out_mode[5], out_un_mode[5], *out_format = "";
-    char *fn_in = 0, *fn_idx_in = 0, *fn_out = 0, *fn_list = 0, *q, *fn_un_out = 0;
+    char out_mode[6] = {0}, out_un_mode[6] = {0}, *out_format = "";
+    char *fn_in = 0, *fn_idx_in = 0, *fn_out = 0, *fn_fai = 0, *q, *fn_un_out = 0;
     char *fn_out_idx = NULL, *fn_un_out_idx = NULL, *arg_list = NULL;
     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
     htsThreadPool p = {NULL, 0};
@@ -290,12 +320,59 @@ int main_samview(int argc, char *argv[])
         .library = NULL,
         .bed = NULL,
         .multi_region = 0,
-        .tag = NULL
+        .tag = NULL,
+        .filter = NULL,
+        .remove_flag = 0,
+        .add_flag = 0
     };
 
     static const struct option lopts[] = {
         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T', '@'),
-        {"no-PG", no_argument, NULL, 1},
+        {"add-flags", required_argument, NULL, LONGOPT('a')},
+        {"bam", no_argument, NULL, 'b'},
+        {"count", no_argument, NULL, 'c'},
+        {"cram", no_argument, NULL, 'C'},
+        {"customised-index", no_argument, NULL, 'X'},
+        {"customized-index", no_argument, NULL, 'X'},
+        {"excl-flags", required_argument, NULL, 'F'},
+        {"exclude-flags", required_argument, NULL, 'F'},
+        {"expr", required_argument, NULL, 'e'},
+        {"expression", required_argument, NULL, 'e'},
+        {"fai-reference", required_argument, NULL, 't'},
+        {"fast", no_argument, NULL, '1'},
+        {"header-only", no_argument, NULL, 'H'},
+        {"help", no_argument, NULL, LONGOPT('?')},
+        {"library", required_argument, NULL, 'l'},
+        {"min-mapq", required_argument, NULL, 'q'},
+        {"min-MQ", required_argument, NULL, 'q'},
+        {"min-mq", required_argument, NULL, 'q'},
+        {"min-qlen", required_argument, NULL, 'm'},
+        {"no-header", no_argument, NULL, LONGOPT('H')},
+        {"no-PG", no_argument, NULL, LONGOPT('P')},
+        {"output", required_argument, NULL, 'o'},
+        {"output-unselected", required_argument, NULL, 'U'},
+        {"QNAME-file", required_argument, NULL, 'N'},
+        {"qname-file", required_argument, NULL, 'N'},
+        {"read-group", required_argument, NULL, 'r'},
+        {"read-group-file", required_argument, NULL, 'R'},
+        {"readgroup", required_argument, NULL, 'r'},
+        {"readgroup-file", required_argument, NULL, 'R'},
+        {"region-file", required_argument, NULL, LONGOPT('L')},
+        {"regions-file", required_argument, NULL, LONGOPT('L')},
+        {"remove-B", no_argument, NULL, 'B'},
+        {"remove-flags", required_argument, NULL, LONGOPT('r')},
+        {"remove-tag", required_argument, NULL, 'x'},
+        {"require-flags", required_argument, NULL, 'f'},
+        {"subsample", required_argument, NULL, LONGOPT('s')},
+        {"subsample-seed", required_argument, NULL, LONGOPT('S')},
+        {"tag", required_argument, NULL, 'd'},
+        {"tag-file", required_argument, NULL, 'D'},
+        {"target-file", required_argument, NULL, 'L'},
+        {"targets-file", required_argument, NULL, 'L'},
+        {"uncompressed", no_argument, NULL, 'u'},
+        {"unoutput", required_argument, NULL, 'U'},
+        {"use-index", no_argument, NULL, 'M'},
+        {"with-header", no_argument, NULL, 'h'},
         { NULL, 0, NULL, 0 }
     };
 
@@ -312,16 +389,11 @@ int main_samview(int argc, char *argv[])
     opterr = 0;
 
     while ((c = getopt_long(argc, argv,
-                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:T:R:d:D:L:s:@:m:x:U:MX",
+                            "SbBcCt:h1Ho:O:q:f:F:G:ul:r:T:R:N:d:D:L:s:@:m:x:U:MXe:",
                             lopts, NULL)) >= 0) {
         switch (c) {
         case 's':
-            if ((settings.subsam_seed = strtol(optarg, &q, 10)) != 0) {
-                // Convert likely user input 0,1,2,... to pseudo-random
-                // values with more entropy and more bits set
-                srand(settings.subsam_seed);
-                settings.subsam_seed = rand();
-            }
+            settings.subsam_seed = strtol(optarg, &q, 10);
             if (q && *q == '.') {
                 settings.subsam_frac = strtod(q, &q);
                 if (*q) ret = 1;
@@ -334,24 +406,36 @@ int main_samview(int argc, char *argv[])
                 goto view_end;
             }
             break;
+        case LONGOPT('s'):
+            settings.subsam_frac = strtod(optarg, &q);
+            if (*q || settings.subsam_frac < 0.0 || settings.subsam_frac > 1.0) {
+                print_error("view", "Incorrect sampling argument \"%s\"", optarg);
+                goto view_end;
+            }
+            break;
+        case LONGOPT('S'): settings.subsam_seed = atoi(optarg); break;
         case 'm': settings.min_qlen = atoi(optarg); break;
         case 'c': is_count = 1; break;
         case 'S': break;
         case 'b': out_format = "b"; break;
         case 'C': out_format = "c"; break;
-        case 't': fn_list = strdup(optarg); break;
+        case 't': fn_fai = strdup(optarg); break;
         case 'h': is_header = 1; break;
         case 'H': is_header_only = 1; break;
+        case LONGOPT('H'): is_header = is_header_only = 0; break;
         case 'o': fn_out = strdup(optarg); break;
         case 'U': fn_un_out = strdup(optarg); break;
         case 'X': has_index_file = 1; break;
-        case 'f': settings.flag_on |= strtol(optarg, 0, 0); break;
-        case 'F': settings.flag_off |= strtol(optarg, 0, 0); break;
-        case 'G': settings.flag_alloff |= strtol(optarg, 0, 0); break;
+        case 'f': settings.flag_on |= bam_str2flag(optarg); break;
+        case 'F': settings.flag_off |= bam_str2flag(optarg); break;
+        case 'G': settings.flag_alloff |= bam_str2flag(optarg); break;
         case 'q': settings.min_mapQ = atoi(optarg); break;
         case 'u': compress_level = 0; break;
         case '1': compress_level = 1; break;
         case 'l': settings.library = strdup(optarg); break;
+        case LONGOPT('L'):
+            settings.multi_region = 1;
+            // fall through
         case 'L':
             if ((settings.bed = bed_read(optarg)) == NULL) {
                 print_error_errno("view", "Could not read file \"%s\"", optarg);
@@ -371,8 +455,14 @@ int main_samview(int argc, char *argv[])
                 goto view_end;
             }
             break;
+        case 'N':
+            if (add_read_names_file("view", &settings, optarg) != 0) {
+                ret = 1;
+                goto view_end;
+            }
+            break;
         case 'd':
-            if (strlen(optarg) < 4 || optarg[2] != ':') {
+            if (strlen(optarg) < 2 || (strlen(optarg) > 2 && optarg[2] != ':')) {
                 print_error_errno("view", "Invalid \"tag:value\" option: \"%s\"", optarg);
                 ret = 1;
                 goto view_end;
@@ -393,7 +483,8 @@ int main_samview(int argc, char *argv[])
                 memcpy(settings.tag, optarg, 2);
             }
 
-            if (add_tag_value_single("view", &settings, optarg+3) != 0) {
+            if (strlen(optarg) > 3 && add_tag_value_single("view", &settings, optarg+3) != 0) {
+                print_error("view", "Could not add tag:value \"%s\"", optarg);
                 ret = 1;
                 goto view_end;
             }
@@ -401,7 +492,7 @@ int main_samview(int argc, char *argv[])
         case 'D':
             // Allow ";" as delimiter besides ":" to support MinGW CLI POSIX
             // path translation as described at:
-            //   http://www.mingw.org/wiki/Posix_path_conversion
+            // http://www.mingw.org/wiki/Posix_path_conversion
             if (strlen(optarg) < 4 || (optarg[2] != ':' && optarg[2] != ';')) {
                 print_error_errno("view", "Invalid \"tag:file\" option: \"%s\"", optarg);
                 ret = 1;
@@ -432,6 +523,8 @@ int main_samview(int argc, char *argv[])
         //case 'x': out_format = "x"; break;
         //case 'X': out_format = "X"; break;
                  */
+        case LONGOPT('?'):
+            return usage(samtools_stdout, EXIT_SUCCESS, 1);
         case '?':
             if (optopt == '?') {  // '-?' appeared on command line
                 return usage(samtools_stdout, EXIT_SUCCESS, 1);
@@ -453,7 +546,7 @@ int main_samview(int argc, char *argv[])
         case 'x':
             {
                 if (strlen(optarg) != 2) {
-                    fprintf(samtools_stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
+                    print_error("main_samview", "Error parsing -x auxiliary tags should be exactly two characters long.");
                     return usage(samtools_stderr, EXIT_FAILURE, 0);
                 }
                 settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
@@ -461,13 +554,22 @@ int main_samview(int argc, char *argv[])
             }
             break;
         case 'M': settings.multi_region = 1; break;
-        case 1: no_pg = 1; break;
+        case LONGOPT('P'): no_pg = 1; break;
+        case 'e':
+            if (!(settings.filter = hts_filter_init(optarg))) {
+                print_error("main_samview", "Couldn't initialise filter");
+                return 1;
+            }
+            break;
+        case LONGOPT('r'): settings.remove_flag |= bam_str2flag(optarg); break;
+        case LONGOPT('a'): settings.add_flag |= bam_str2flag(optarg); break;
         default:
             if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
                 return usage(samtools_stderr, EXIT_FAILURE, 0);
             break;
         }
     }
+    if (fn_fai == 0 && ga.reference) fn_fai = fai_path(ga.reference);
     if (compress_level >= 0 && !*out_format) out_format = "b";
     if (is_header_only) is_header = 1;
     // File format auto-detection first
@@ -476,8 +578,7 @@ int main_samview(int argc, char *argv[])
     // Overridden by manual -b, -C
     if (*out_format)
         out_mode[1] = out_un_mode[1] = *out_format;
-    out_mode[2] = out_un_mode[2] = '\0';
-    // out_(un_)mode now 1 or 2 bytes long, followed by nul.
+    // out_(un_)mode now 1, 2 or 3 bytes long, followed by nul.
     if (compress_level >= 0) {
         char tmp[2];
         tmp[0] = compress_level + '0'; tmp[1] = '\0';
@@ -488,20 +589,23 @@ int main_samview(int argc, char *argv[])
         print_error("view", "No input provided or missing option argument.");
         return usage(samtools_stderr, EXIT_FAILURE, 0); // potential memory leak...
     }
+    if (settings.subsam_seed != 0) {
+        // Convert likely user input 1,2,... to pseudo-random
+        // values with more entropy and more bits set
+        srand(settings.subsam_seed);
+        settings.subsam_seed = rand();
+    }
 
     fn_in = (optind < argc)? argv[optind] : "-";
-    // generate the fn_list if necessary
-    if (fn_list == 0 && ga.reference) fn_list = samfaipath(ga.reference);
-    // open file handlers
     if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
         print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
         ret = 1;
         goto view_end;
     }
 
-    if (fn_list) {
-        if (hts_set_fai_filename(in, fn_list) != 0) {
-            fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+    if (fn_fai) {
+        if (hts_set_fai_filename(in, fn_fai) != 0) {
+            fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
             ret = 1;
             goto view_end;
         }
@@ -520,9 +624,9 @@ int main_samview(int argc, char *argv[])
             ret = 1;
             goto view_end;
         }
-        if (fn_list) {
-            if (hts_set_fai_filename(out, fn_list) != 0) {
-                fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+        if (fn_fai) {
+            if (hts_set_fai_filename(out, fn_fai) != 0) {
+                fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
                 ret = 1;
                 goto view_end;
             }
@@ -567,9 +671,9 @@ int main_samview(int argc, char *argv[])
                 ret = 1;
                 goto view_end;
             }
-            if (fn_list) {
-                if (hts_set_fai_filename(un_out, fn_list) != 0) {
-                    fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+            if (fn_fai) {
+                if (hts_set_fai_filename(un_out, fn_fai) != 0) {
+                    fprintf(samtools_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
                     ret = 1;
                     goto view_end;
                 }
@@ -656,7 +760,10 @@ int main_samview(int argc, char *argv[])
                         // fetch alignments
                         while ((result = sam_itr_multi_next(in, iter, b)) >= 0) {
                             if (!process_aln(header, b, &settings)) {
-                                if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                                if (!is_count) {
+                                    change_flag(b, &settings);
+                                    if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                                }
                                 count++;
                             } else {
                                 if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
@@ -684,16 +791,20 @@ int main_samview(int argc, char *argv[])
         if ((has_index_file && optind >= argc - 2) || (!has_index_file && optind >= argc - 1)) { // convert/print the entire file
             bam1_t *b = bam_init1();
             int r;
+            errno = 0;
             while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
                 if (!process_aln(header, b, &settings)) {
-                    if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                    if (!is_count) {
+                        change_flag(b, &settings);
+                        if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                    }
                     count++;
                 } else {
                     if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
                 }
             }
             if (r < -1) {
-                fprintf(samtools_stderr, "[main_samview] truncated file.\n");
+                print_error_errno("view", "error reading file \"%s\"", fn_in);
                 ret = 1;
             }
             bam_destroy1(b);
@@ -724,7 +835,10 @@ int main_samview(int argc, char *argv[])
                 // fetch alignments
                 while ((result = sam_itr_next(in, iter, b)) >= 0) {
                     if (!process_aln(header, b, &settings)) {
-                        if (!is_count) { if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break; }
+                        if (!is_count) {
+                            change_flag(b, &settings);
+                            if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
+                        }
                         count++;
                     } else {
                         if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
@@ -768,7 +882,7 @@ view_end:
     if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);
     if (fp_out) fclose(fp_out);
 
-    free(fn_list); free(fn_out); free(settings.library);  free(fn_un_out);
+    free(fn_fai); free(fn_out); free(settings.library);  free(fn_un_out);
     sam_global_args_free(&ga);
     if ( header ) sam_hdr_destroy(header);
     if (settings.bed) bed_destroy(settings.bed);
@@ -776,13 +890,19 @@ view_end:
         khint_t k;
         for (k = 0; k < kh_end(settings.rghash); ++k)
             if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
-        kh_destroy(rg, settings.rghash);
+        kh_destroy(str, settings.rghash);
+    }
+    if (settings.rnhash) {
+        khint_t k;
+        for (k = 0; k < kh_end(settings.rnhash); ++k)
+            if (kh_exist(settings.rnhash, k)) free((char*)kh_key(settings.rnhash, k));
+        kh_destroy(str, settings.rnhash);
     }
     if (settings.tvhash) {
         khint_t k;
         for (k = 0; k < kh_end(settings.tvhash); ++k)
             if (kh_exist(settings.tvhash, k)) free((char*)kh_key(settings.tvhash, k));
-        kh_destroy(tv, settings.tvhash);
+        kh_destroy(str, settings.tvhash);
     }
     if (settings.remove_aux_len) {
         free(settings.remove_aux);
@@ -790,6 +910,8 @@ view_end:
     if (settings.tag) {
         free(settings.tag);
     }
+    if (settings.filter)
+        hts_filter_free(settings.filter);
 
     if (p.pool)
         hts_tpool_destroy(p.pool);
@@ -809,47 +931,52 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "\n"
 "Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
 "\n"
-"Options:\n"
-// output options
-"  -b       output BAM\n"
-"  -C       output CRAM (requires -T)\n"
-"  -1       use fast BAM compression (implies -b)\n"
-"  -u       uncompressed BAM output (implies -b)\n"
-"  -h       include header in SAM output\n"
-"  -H       print SAM header only (no alignments)\n"
-"  -c       print only the count of matching records\n"
-"  -o FILE  output file name [samtools_stdout]\n"
-"  -U FILE  output reads not selected by filters to FILE [null]\n"
-// extra input
-"  -t FILE  FILE listing reference names and lengths (see long help) [null]\n"
-"  -X       include customized index file\n"
-// read filters
-"  -L FILE  only include reads overlapping this BED FILE [null]\n"
-"  -r STR   only include reads in read group STR [null]\n"
-"  -R FILE  only include reads with read group listed in FILE [null]\n"
-"  -d STR:STR\n"
-"           only include reads with tag STR and associated value STR [null]\n"
-"  -D STR:FILE\n"
-"           only include reads with tag STR and associated values listed in\n"
-"           FILE [null]\n"
-"  -q INT   only include reads with mapping quality >= INT [0]\n"
-"  -l STR   only include reads in library STR [null]\n"
-"  -m INT   only include reads with number of CIGAR operations consuming\n"
-"           query sequence >= INT [0]\n"
-"  -f INT   only include reads with all  of the FLAGs in INT present [0]\n"       //   F&x == x
-"  -F INT   only include reads with none of the FLAGS in INT present [0]\n"       //   F&x == 0
-"  -G INT   only EXCLUDE reads with all  of the FLAGs in INT present [0]\n"       // !(F&x == x)
-"  -s FLOAT subsample reads (given INT.FRAC option value, 0.FRAC is the\n"
-"           fraction of templates/read pairs to keep; INT part sets seed)\n"
-"  -M       use the multi-region iterator (increases the speed, removes\n"
-"           duplicates and outputs the reads as they are ordered in the file)\n"
-// read processing
-"  -x STR   read tag to strip (repeatable) [null]\n"
-"  -B       collapse the backward CIGAR operation\n"
-// general options
-"  -?       print long help, including note about region specification\n"
-"  -S       ignored (input format is auto-detected)\n"
-"  --no-PG  do not add a PG line\n");
+"Output options:\n"
+"  -b, --bam                  Output BAM\n"
+"  -C, --cram                 Output CRAM (requires -T)\n"
+"  -1, --fast                 Use fast BAM compression (implies --bam)\n"
+"  -u, --uncompressed         Uncompressed BAM output (implies --bam)\n"
+"  -h, --with-header          Include header in SAM output\n"
+"  -H, --header-only          Print SAM header only (no alignments)\n"
+"      --no-header            Print SAM alignment records only [default]\n"
+"  -c, --count                Print only the count of matching records\n"
+"  -o, --output FILE          Write output to FILE [standard output]\n"
+"  -U, --unoutput FILE, --output-unselected FILE\n"
+"                             Output reads not selected by filters to FILE\n"
+"Input options:\n"
+"  -t, --fai-reference FILE   FILE listing reference names and lengths\n"
+"  -M, --use-index            Use index and multi-region iterator for regions\n"
+"      --region[s]-file FILE  Use index to include only reads overlapping FILE\n"
+"  -X, --customized-index     Expect extra index file argument after <in.bam>\n"
+"\n"
+"Filtering options (Only include in output reads that...):\n"
+"  -L, --target[s]-file FILE  ...overlap (BED) regions in FILE\n"
+"  -r, --read-group STR       ...are in read group STR\n"
+"  -R, --read-group-file FILE ...are in a read group listed in FILE\n"
+"  -N, --qname-file FILE      ...whose read name is listed in FILE\n"
+"  -d, --tag STR1[:STR2]      ...have a tag STR1 (with associated value STR2)\n"
+"  -D, --tag-file STR:FILE    ...have a tag STR whose value is listed in FILE\n"
+"  -q, --min-MQ INT           ...have mapping quality >= INT\n"
+"  -l, --library STR          ...are in library STR\n"
+"  -m, --min-qlen INT         ...cover >= INT query bases (as measured via CIGAR)\n"
+"  -e, --expr STR             ...match the filter expression STR\n"
+"  -f, --require-flags FLAG   ...have all of the FLAGs present\n"             //   F&x == x
+"  -F, --excl[ude]-flags FLAG ...have none of the FLAGs present\n"            //   F&x == 0
+"  -G FLAG                    EXCLUDE reads with all of the FLAGs present\n"  // !(F&x == x)  TODO long option
+"      --subsample FLOAT      Keep only FLOAT fraction of templates/read pairs\n"
+"      --subsample-seed INT   Influence WHICH reads are kept in subsampling [0]\n"
+"  -s INT.FRAC                Same as --subsample 0.FRAC --subsample-seed INT\n"
+"\n"
+"Processing options:\n"
+"      --add-flags FLAG       Add FLAGs to reads\n"
+"      --remove-flags FLAG    Remove FLAGs from reads\n"
+"  -x, --remove-tag STR       Strip tag STR from reads (option may be repeated)\n"
+"  -B, --remove-B             Collapse the backward CIGAR operation\n"
+"\n"
+"General options:\n"
+"  -?, --help   Print long help, including note about region specification\n"
+"  -S           Ignored (input format is auto-detected)\n"
+"      --no-PG  Do not add a PG line\n");
 
     sam_global_opt_help(fp, "-.O.T@..");
     fprintf(fp, "\n");
@@ -889,23 +1016,16 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
 "\n"
 "6. Option `-u' is preferred over `-b' when the output is piped to\n"
 "   another samtools command.\n"
+"\n"
+"7. Option `-M`/`--use-index` causes overlaps with `-L` BED file regions and\n"
+"   command-line region arguments to be computed using the multi-region iterator\n"
+"   and an index. This increases speed, omits duplicates, and outputs the reads\n"
+"   as they are ordered in the input SAM/BAM/CRAM file.\n"
+"\n"
+"8. Options `-L`/`--target[s]-file` and `--region[s]-file` may not be used\n"
+"   together. `--region[s]-file FILE` is simply equivalent to `-M -L FILE`,\n"
+"   so using both causes one of the specified BED files to be ignored.\n"
 "\n");
 
     return exit_status;
 }
-
-int main_import(int argc, char *argv[])
-{
-    int argc2, ret;
-    char **argv2;
-    if (argc != 4) {
-        fprintf(samtools_stderr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
-        return 1;
-    }
-    argc2 = 6;
-    argv2 = calloc(6, sizeof(char*));
-    argv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];
-    ret = main_samview(argc2, argv2);
-    free(argv2);
-    return ret;
-}
diff --git a/samtools/samtools.pysam.c b/samtools/samtools.pysam.c
index b26f892..7044603 100644
--- a/samtools/samtools.pysam.c
+++ b/samtools/samtools.pysam.c
@@ -1,6 +1,7 @@
 #include <ctype.h>
 #include <assert.h>
 #include <unistd.h>
+#include <setjmp.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -55,6 +56,25 @@ int samtools_puts(const char *s)
   return putc('\n', samtools_stdout);
 }
 
+
+static jmp_buf samtools_jmpbuf;
+static int samtools_status = 0;
+
+int samtools_dispatch(int argc, char *argv[])
+{
+  if (setjmp(samtools_jmpbuf) == 0)
+    return samtools_main(argc, argv);
+  else
+    return samtools_status;
+}
+
+void samtools_exit(int status)
+{
+  samtools_status = status;
+  longjmp(samtools_jmpbuf, 1);
+}
+
+
 void samtools_set_optind(int val)
 {
   // setting this in cython via 
diff --git a/samtools/samtools.pysam.h b/samtools/samtools.pysam.h
index df8fd01..9d20ecb 100644
--- a/samtools/samtools.pysam.h
+++ b/samtools/samtools.pysam.h
@@ -3,6 +3,17 @@
 
 #include <stdio.h>
 
+#ifndef __has_attribute
+#define __has_attribute(attribute) 0
+#endif
+#ifndef PYSAM_NORETURN
+#if __has_attribute(__noreturn__) || __GNUC__ >= 3
+#define PYSAM_NORETURN __attribute__((__noreturn__))
+#else
+#define PYSAM_NORETURN
+#endif
+#endif
+
 extern FILE * samtools_stderr;
 
 extern FILE * samtools_stdout;
@@ -40,6 +51,8 @@ int samtools_puts(const char *s);
 
 int samtools_dispatch(int argc, char *argv[]);
 
+void PYSAM_NORETURN samtools_exit(int status);
+
 void samtools_set_optind(int);
 
 extern int samtools_main(int argc, char *argv[]);
diff --git a/samtools/stats.c b/samtools/stats.c
index 55ede4c..f030cf5 100644
--- a/samtools/stats.c
+++ b/samtools/stats.c
@@ -1,6 +1,6 @@
 /*  stats.c -- This is the former bamcheck integrated into samtools/htslib.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
     Author: Sam Nicholls <sam@samnicholls.net>
@@ -175,8 +175,8 @@ typedef struct
     // Arrays for the histogram data
     uint64_t *quals_1st, *quals_2nd;
     uint64_t *gc_1st, *gc_2nd;
-    acgtno_count_t *acgtno_cycles_1st;
-    acgtno_count_t *acgtno_cycles_2nd;
+    acgtno_count_t *acgtno_cycles_1st, *acgtno_cycles_2nd;
+    acgtno_count_t *acgtno_revcomp;
     uint64_t *read_lengths, *read_lengths_1st, *read_lengths_2nd;
     uint64_t *insertions, *deletions;
     uint64_t *ins_cycles_1st, *ins_cycles_2nd, *del_cycles_1st, *del_cycles_2nd;
@@ -210,7 +210,7 @@ typedef struct
     uint64_t nbases_mapped_cigar;
     uint64_t nbases_trimmed;  // bwa trimmed bases
     uint64_t nmismatches;
-    uint64_t nreads_QCfailed, nreads_secondary;
+    uint64_t nreads_QCfailed, nreads_secondary, nreads_supplementary;
     struct {
         uint32_t names, reads, quals;
     } checksum;
@@ -250,7 +250,7 @@ typedef struct
     uint32_t nchunks;
 
     uint32_t pair_count;          // Number of active pairs in the pairing hash table
-    uint32_t target_count;        // Number of bases covered by the target file
+    uint64_t target_count;        // Number of bases covered by the target file
     uint32_t last_pair_tid;
     uint32_t last_read_flush;
 
@@ -647,6 +647,11 @@ void realloc_buffers(stats_t *stats, int seq_len)
         error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len, n*sizeof(acgtno_count_t));
     memset(stats->acgtno_cycles_2nd + stats->nbases, 0, (n-stats->nbases)*sizeof(acgtno_count_t));
 
+    stats->acgtno_revcomp = realloc(stats->acgtno_revcomp, n*sizeof(acgtno_count_t));
+    if ( !stats->acgtno_revcomp )
+        error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len, n*sizeof(acgtno_count_t));
+    memset(stats->acgtno_revcomp + stats->nbases, 0, (n-stats->nbases)*sizeof(acgtno_count_t));
+
     stats->read_lengths = realloc(stats->read_lengths, n*sizeof(uint64_t));
     if ( !stats->read_lengths )
         error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t));
@@ -870,16 +875,20 @@ void collect_orig_read_stats(bam1_t *bam_line, stats_t *stats, int* gc_count_out
             switch (bam_seqi(seq, i)) {
             case 1:
                 acgtno_cycles[ read_cycle ].a++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].t++ : stats->acgtno_revcomp[ read_cycle ].a++;
                 break;
             case 2:
                 acgtno_cycles[ read_cycle ].c++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].g++ : stats->acgtno_revcomp[ read_cycle ].c++;
                 gc_count++;
                 break;
             case 4:
                 acgtno_cycles[ read_cycle ].g++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].c++ : stats->acgtno_revcomp[ read_cycle ].g++;
                 gc_count++;
                 break;
             case 8:
+                reverse ? stats->acgtno_revcomp[ read_cycle ].a++ : stats->acgtno_revcomp[ read_cycle ].t++;
                 acgtno_cycles[ read_cycle ].t++;
                 break;
             case 15:
@@ -1129,6 +1138,8 @@ static void remove_overlaps(bam1_t *bam_line, khash_t(qn2pair) *read_pairs, stat
 
 void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pairs)
 {
+    if ( !is_in_regions(bam_line,stats) )
+        return;
     if ( stats->rg_hash )
     {
         const uint8_t *rg = bam_aux_get(bam_line, "RG");
@@ -1145,8 +1156,6 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
         stats->nreads_filtered++;
         return;
     }
-    if ( !is_in_regions(bam_line,stats) )
-        return;
     if ( stats->info->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->info->filter_readlen )
         return;
 
@@ -1159,6 +1168,11 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
         return;
     }
 
+    if ( bam_line->core.flag & BAM_FSUPPLEMENTARY )
+    {
+        stats->nreads_supplementary++;
+    }
+
     // If line has no sequence cannot continue
     int seq_len = bam_line->core.l_qseq;
     if ( !seq_len ) return;
@@ -1187,8 +1201,7 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
 
     // These stats should only be calculated for the original reads ignoring supplementary artificial reads
     // otherwise we'll accidentally double count
-    if ( IS_ORIGINAL(bam_line) )
-    {
+    if ( IS_ORIGINAL(bam_line) ) {
         stats->read_lengths[read_len]++;
         if ( order == READ_ORDER_FIRST ) stats->read_lengths_1st[read_len]++;
         if ( order == READ_ORDER_LAST ) stats->read_lengths_2nd[read_len]++;
@@ -1200,7 +1213,7 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
 
     count_indels(stats, bam_line);
 
-    if ( IS_PAIRED_AND_MAPPED(bam_line) )
+    if ( IS_PAIRED_AND_MAPPED(bam_line) && IS_ORIGINAL(bam_line) )
     {
         // The insert size is tricky, because for long inserts the libraries are
         // prepared differently and the pairs point in other direction. BWA does
@@ -1495,7 +1508,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)\n");
     fprintf(to, "CHK\t%08x\t%08x\t%08x\n", stats->checksum.names,stats->checksum.reads,stats->checksum.quals);
     fprintf(to, "# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n");
-    fprintf(to, "SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));  // not counting excluded seqs (and none of the below)
+    fprintf(to, "SN\traw total sequences:\t%ld\t# excluding supplementary and secondary reads\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));  // not counting excluded seqs (and none of the below)
     fprintf(to, "SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered);
     fprintf(to, "SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));
     fprintf(to, "SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0);
@@ -1510,6 +1523,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "SN\treads MQ0:\t%ld\t# mapped and MQ=0\n", (long)stats->nreads_mq0);
     fprintf(to, "SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
     fprintf(to, "SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
+    fprintf(to, "SN\tsupplementary alignments:\t%ld\n", (long)stats->nreads_supplementary);
     fprintf(to, "SN\ttotal length:\t%ld\t# ignores clipping\n", (long)stats->total_len);
     fprintf(to, "SN\ttotal first fragment length:\t%ld\t# ignores clipping\n", (long)stats->total_len_1st);
     fprintf(to, "SN\ttotal last fragment length:\t%ld\t# ignores clipping\n", (long)stats->total_len_2nd);
@@ -1535,7 +1549,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2);
     fprintf(to, "SN\tpercentage of properly paired reads (%%):\t%.1f\n", (stats->nreads_1st+stats->nreads_2nd+stats->nreads_other)? (float)(100*stats->nreads_properly_paired)/(stats->nreads_1st+stats->nreads_2nd+stats->nreads_other):0);
     if ( stats->target_count ) {
-        fprintf(to, "SN\tbases inside the target:\t%u\n", stats->target_count);
+        fprintf(to, "SN\tbases inside the target:\t%" PRIu64 "\n", stats->target_count);
         for (icov=stats->info->cov_threshold+1; icov<stats->ncov; icov++)
             cov_sum += stats->cov[icov];
         fprintf(to, "SN\tpercentage of target genome with coverage > %d (%%):\t%.2f\n", stats->info->cov_threshold, (float)(100*cov_sum)/stats->target_count);
@@ -1612,7 +1626,18 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
                 100.*(acgtno_count_1st->t + acgtno_count_2nd->t)/acgt_sum,
                 100.*(acgtno_count_1st->n + acgtno_count_2nd->n)/acgt_sum,
                 100.*(acgtno_count_1st->other + acgtno_count_2nd->other)/acgt_sum);
-
+    }
+    fprintf(to, "# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%%]\n");
+    for (ibase=0; ibase<stats->max_len; ibase++)
+    {
+        acgtno_count_t *acgtno_count = &(stats->acgtno_revcomp[ibase]);
+        uint64_t acgt_sum = acgtno_count->a + acgtno_count->c + acgtno_count->g + acgtno_count->t;
+        if ( ! acgt_sum ) continue;
+        fprintf(to, "GCT\t%d\t%.2f\t%.2f\t%.2f\t%.2f\n", ibase+1,
+                100.*(acgtno_count->a)/acgt_sum,
+                100.*(acgtno_count->c)/acgt_sum,
+                100.*(acgtno_count->g)/acgt_sum,
+                100.*(acgtno_count->t)/acgt_sum);
     }
 
     uint64_t tA=0, tC=0, tG=0, tT=0, tN=0;
@@ -1800,7 +1825,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     }
 }
 
-static void init_regions(stats_t *stats, const char *file)
+static void init_regions(stats_t *stats, const char *file, stats_info_t* info)
 {
     FILE *fp = fopen(file,"r");
     if ( !fp ) error("%s: %s\n",file,strerror(errno));
@@ -1877,8 +1902,15 @@ static void init_regions(stats_t *stats, const char *file)
             }
             reg->npos = ++new_p;
         }
-        for (p = 0; p < reg->npos; p++)
-            stats->target_count += (reg->pos[p].end - reg->pos[p].beg + 1);
+        for (p = 0; p < reg->npos; p++) {
+            if (reg->pos[p].end < HTS_POS_MAX) {
+                stats->target_count += (reg->pos[p].end - reg->pos[p].beg + 1);
+            } else {
+                uint64_t hdr_end = sam_hdr_tid2len(info->sam_header, r);
+                if (hdr_end)
+                    stats->target_count += (hdr_end - reg->pos[p].beg + 1);
+            }
+        }
     }
 
     if (!(stats->chunks = calloc(stats->nchunks, sizeof(hts_pair_pos_t))))
@@ -1941,7 +1973,7 @@ int is_in_regions(bam1_t *bam_line, stats_t *stats)
     return 1;
 }
 
-int replicate_regions(stats_t *stats, hts_itr_multi_t *iter) {
+int replicate_regions(stats_t *stats, hts_itr_multi_t *iter, stats_info_t *info) {
     if ( !stats || !iter)
         return 1;
 
@@ -1975,8 +2007,13 @@ int replicate_regions(stats_t *stats, hts_itr_multi_t *iter) {
         for (j = 0; j < stats->regions[tid].npos; j++) {
             stats->regions[tid].pos[j].beg = iter->reg_list[i].intervals[j].beg+1;
             stats->regions[tid].pos[j].end = iter->reg_list[i].intervals[j].end;
-
-            stats->target_count += (stats->regions[tid].pos[j].end - stats->regions[tid].pos[j].beg + 1);
+            if (stats->regions[tid].pos[j].end < HTS_POS_MAX) {
+                stats->target_count += (stats->regions[tid].pos[j].end - stats->regions[tid].pos[j].beg + 1);
+            } else {
+                uint64_t hdr_end = sam_hdr_tid2len(info->sam_header, tid);
+                if (hdr_end)
+                    stats->target_count += (hdr_end - stats->regions[tid].pos[j].beg + 1);
+            }
         }
     }
 
@@ -2073,6 +2110,7 @@ void cleanup_stats(stats_t* stats)
     free(stats->mpc_buf);
     free(stats->acgtno_cycles_1st);
     free(stats->acgtno_cycles_2nd);
+    free(stats->acgtno_revcomp);
     free(stats->read_lengths);
     free(stats->read_lengths_1st);
     free(stats->read_lengths_2nd);
@@ -2257,6 +2295,8 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
     if (!stats->acgtno_cycles_1st) goto nomem;
     stats->acgtno_cycles_2nd  = calloc(stats->nbases,sizeof(acgtno_count_t));
     if (!stats->acgtno_cycles_2nd) goto nomem;
+    stats->acgtno_revcomp  = calloc(stats->nbases,sizeof(acgtno_count_t));
+    if (!stats->acgtno_revcomp) goto nomem;
     stats->read_lengths   = calloc(stats->nbases,sizeof(uint64_t));
     if (!stats->read_lengths)     goto nomem;
     stats->read_lengths_1st   = calloc(stats->nbases,sizeof(uint64_t));
@@ -2279,7 +2319,7 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
         goto nomem;
     realloc_rseq_buffer(stats);
     if ( targets )
-        init_regions(stats, targets);
+        init_regions(stats, targets, info);
     return;
  nomem:
     error("Out of memory");
@@ -2459,7 +2499,7 @@ int main_stats(int argc, char *argv[])
             if (iter) {
                 if (!targets) {
                     all_stats->nchunks = argc-optind;
-                    if (replicate_regions(all_stats, iter))
+                    if (replicate_regions(all_stats, iter, info))
                         fprintf(stderr, "Replications of the regions failed\n");
                 }
 
diff --git a/samtools/stats.c.pysam.c b/samtools/stats.c.pysam.c
index 3d126a7..9e8165d 100644
--- a/samtools/stats.c.pysam.c
+++ b/samtools/stats.c.pysam.c
@@ -2,7 +2,7 @@
 
 /*  stats.c -- This is the former bamcheck integrated into samtools/htslib.
 
-    Copyright (C) 2012-2019 Genome Research Ltd.
+    Copyright (C) 2012-2021 Genome Research Ltd.
 
     Author: Petr Danecek <pd3@sanger.ac.uk>
     Author: Sam Nicholls <sam@samnicholls.net>
@@ -177,8 +177,8 @@ typedef struct
     // Arrays for the histogram data
     uint64_t *quals_1st, *quals_2nd;
     uint64_t *gc_1st, *gc_2nd;
-    acgtno_count_t *acgtno_cycles_1st;
-    acgtno_count_t *acgtno_cycles_2nd;
+    acgtno_count_t *acgtno_cycles_1st, *acgtno_cycles_2nd;
+    acgtno_count_t *acgtno_revcomp;
     uint64_t *read_lengths, *read_lengths_1st, *read_lengths_2nd;
     uint64_t *insertions, *deletions;
     uint64_t *ins_cycles_1st, *ins_cycles_2nd, *del_cycles_1st, *del_cycles_2nd;
@@ -212,7 +212,7 @@ typedef struct
     uint64_t nbases_mapped_cigar;
     uint64_t nbases_trimmed;  // bwa trimmed bases
     uint64_t nmismatches;
-    uint64_t nreads_QCfailed, nreads_secondary;
+    uint64_t nreads_QCfailed, nreads_secondary, nreads_supplementary;
     struct {
         uint32_t names, reads, quals;
     } checksum;
@@ -252,7 +252,7 @@ typedef struct
     uint32_t nchunks;
 
     uint32_t pair_count;          // Number of active pairs in the pairing hash table
-    uint32_t target_count;        // Number of bases covered by the target file
+    uint64_t target_count;        // Number of bases covered by the target file
     uint32_t last_pair_tid;
     uint32_t last_read_flush;
 
@@ -649,6 +649,11 @@ void realloc_buffers(stats_t *stats, int seq_len)
         error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len, n*sizeof(acgtno_count_t));
     memset(stats->acgtno_cycles_2nd + stats->nbases, 0, (n-stats->nbases)*sizeof(acgtno_count_t));
 
+    stats->acgtno_revcomp = realloc(stats->acgtno_revcomp, n*sizeof(acgtno_count_t));
+    if ( !stats->acgtno_revcomp )
+        error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len, n*sizeof(acgtno_count_t));
+    memset(stats->acgtno_revcomp + stats->nbases, 0, (n-stats->nbases)*sizeof(acgtno_count_t));
+
     stats->read_lengths = realloc(stats->read_lengths, n*sizeof(uint64_t));
     if ( !stats->read_lengths )
         error("Could not realloc buffers, the sequence too long: %d (%ld)\n", seq_len,n*sizeof(uint64_t));
@@ -872,16 +877,20 @@ void collect_orig_read_stats(bam1_t *bam_line, stats_t *stats, int* gc_count_out
             switch (bam_seqi(seq, i)) {
             case 1:
                 acgtno_cycles[ read_cycle ].a++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].t++ : stats->acgtno_revcomp[ read_cycle ].a++;
                 break;
             case 2:
                 acgtno_cycles[ read_cycle ].c++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].g++ : stats->acgtno_revcomp[ read_cycle ].c++;
                 gc_count++;
                 break;
             case 4:
                 acgtno_cycles[ read_cycle ].g++;
+                reverse ? stats->acgtno_revcomp[ read_cycle ].c++ : stats->acgtno_revcomp[ read_cycle ].g++;
                 gc_count++;
                 break;
             case 8:
+                reverse ? stats->acgtno_revcomp[ read_cycle ].a++ : stats->acgtno_revcomp[ read_cycle ].t++;
                 acgtno_cycles[ read_cycle ].t++;
                 break;
             case 15:
@@ -1131,6 +1140,8 @@ static void remove_overlaps(bam1_t *bam_line, khash_t(qn2pair) *read_pairs, stat
 
 void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pairs)
 {
+    if ( !is_in_regions(bam_line,stats) )
+        return;
     if ( stats->rg_hash )
     {
         const uint8_t *rg = bam_aux_get(bam_line, "RG");
@@ -1147,8 +1158,6 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
         stats->nreads_filtered++;
         return;
     }
-    if ( !is_in_regions(bam_line,stats) )
-        return;
     if ( stats->info->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->info->filter_readlen )
         return;
 
@@ -1161,6 +1170,11 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
         return;
     }
 
+    if ( bam_line->core.flag & BAM_FSUPPLEMENTARY )
+    {
+        stats->nreads_supplementary++;
+    }
+
     // If line has no sequence cannot continue
     int seq_len = bam_line->core.l_qseq;
     if ( !seq_len ) return;
@@ -1189,8 +1203,7 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
 
     // These stats should only be calculated for the original reads ignoring supplementary artificial reads
     // otherwise we'll accidentally double count
-    if ( IS_ORIGINAL(bam_line) )
-    {
+    if ( IS_ORIGINAL(bam_line) ) {
         stats->read_lengths[read_len]++;
         if ( order == READ_ORDER_FIRST ) stats->read_lengths_1st[read_len]++;
         if ( order == READ_ORDER_LAST ) stats->read_lengths_2nd[read_len]++;
@@ -1202,7 +1215,7 @@ void collect_stats(bam1_t *bam_line, stats_t *stats, khash_t(qn2pair) *read_pair
 
     count_indels(stats, bam_line);
 
-    if ( IS_PAIRED_AND_MAPPED(bam_line) )
+    if ( IS_PAIRED_AND_MAPPED(bam_line) && IS_ORIGINAL(bam_line) )
     {
         // The insert size is tricky, because for long inserts the libraries are
         // prepared differently and the pairs point in other direction. BWA does
@@ -1497,7 +1510,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "# CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow)\n");
     fprintf(to, "CHK\t%08x\t%08x\t%08x\n", stats->checksum.names,stats->checksum.reads,stats->checksum.quals);
     fprintf(to, "# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n");
-    fprintf(to, "SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));  // not counting excluded seqs (and none of the below)
+    fprintf(to, "SN\traw total sequences:\t%ld\t# excluding supplementary and secondary reads\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));  // not counting excluded seqs (and none of the below)
     fprintf(to, "SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered);
     fprintf(to, "SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd+stats->nreads_other));
     fprintf(to, "SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0);
@@ -1512,6 +1525,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "SN\treads MQ0:\t%ld\t# mapped and MQ=0\n", (long)stats->nreads_mq0);
     fprintf(to, "SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
     fprintf(to, "SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
+    fprintf(to, "SN\tsupplementary alignments:\t%ld\n", (long)stats->nreads_supplementary);
     fprintf(to, "SN\ttotal length:\t%ld\t# ignores clipping\n", (long)stats->total_len);
     fprintf(to, "SN\ttotal first fragment length:\t%ld\t# ignores clipping\n", (long)stats->total_len_1st);
     fprintf(to, "SN\ttotal last fragment length:\t%ld\t# ignores clipping\n", (long)stats->total_len_2nd);
@@ -1537,7 +1551,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     fprintf(to, "SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2);
     fprintf(to, "SN\tpercentage of properly paired reads (%%):\t%.1f\n", (stats->nreads_1st+stats->nreads_2nd+stats->nreads_other)? (float)(100*stats->nreads_properly_paired)/(stats->nreads_1st+stats->nreads_2nd+stats->nreads_other):0);
     if ( stats->target_count ) {
-        fprintf(to, "SN\tbases inside the target:\t%u\n", stats->target_count);
+        fprintf(to, "SN\tbases inside the target:\t%" PRIu64 "\n", stats->target_count);
         for (icov=stats->info->cov_threshold+1; icov<stats->ncov; icov++)
             cov_sum += stats->cov[icov];
         fprintf(to, "SN\tpercentage of target genome with coverage > %d (%%):\t%.2f\n", stats->info->cov_threshold, (float)(100*cov_sum)/stats->target_count);
@@ -1614,7 +1628,18 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
                 100.*(acgtno_count_1st->t + acgtno_count_2nd->t)/acgt_sum,
                 100.*(acgtno_count_1st->n + acgtno_count_2nd->n)/acgt_sum,
                 100.*(acgtno_count_1st->other + acgtno_count_2nd->other)/acgt_sum);
-
+    }
+    fprintf(to, "# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%%]\n");
+    for (ibase=0; ibase<stats->max_len; ibase++)
+    {
+        acgtno_count_t *acgtno_count = &(stats->acgtno_revcomp[ibase]);
+        uint64_t acgt_sum = acgtno_count->a + acgtno_count->c + acgtno_count->g + acgtno_count->t;
+        if ( ! acgt_sum ) continue;
+        fprintf(to, "GCT\t%d\t%.2f\t%.2f\t%.2f\t%.2f\n", ibase+1,
+                100.*(acgtno_count->a)/acgt_sum,
+                100.*(acgtno_count->c)/acgt_sum,
+                100.*(acgtno_count->g)/acgt_sum,
+                100.*(acgtno_count->t)/acgt_sum);
     }
 
     uint64_t tA=0, tC=0, tG=0, tT=0, tN=0;
@@ -1802,7 +1827,7 @@ void output_stats(FILE *to, stats_t *stats, int sparse)
     }
 }
 
-static void init_regions(stats_t *stats, const char *file)
+static void init_regions(stats_t *stats, const char *file, stats_info_t* info)
 {
     FILE *fp = fopen(file,"r");
     if ( !fp ) error("%s: %s\n",file,strerror(errno));
@@ -1879,8 +1904,15 @@ static void init_regions(stats_t *stats, const char *file)
             }
             reg->npos = ++new_p;
         }
-        for (p = 0; p < reg->npos; p++)
-            stats->target_count += (reg->pos[p].end - reg->pos[p].beg + 1);
+        for (p = 0; p < reg->npos; p++) {
+            if (reg->pos[p].end < HTS_POS_MAX) {
+                stats->target_count += (reg->pos[p].end - reg->pos[p].beg + 1);
+            } else {
+                uint64_t hdr_end = sam_hdr_tid2len(info->sam_header, r);
+                if (hdr_end)
+                    stats->target_count += (hdr_end - reg->pos[p].beg + 1);
+            }
+        }
     }
 
     if (!(stats->chunks = calloc(stats->nchunks, sizeof(hts_pair_pos_t))))
@@ -1943,7 +1975,7 @@ int is_in_regions(bam1_t *bam_line, stats_t *stats)
     return 1;
 }
 
-int replicate_regions(stats_t *stats, hts_itr_multi_t *iter) {
+int replicate_regions(stats_t *stats, hts_itr_multi_t *iter, stats_info_t *info) {
     if ( !stats || !iter)
         return 1;
 
@@ -1977,8 +2009,13 @@ int replicate_regions(stats_t *stats, hts_itr_multi_t *iter) {
         for (j = 0; j < stats->regions[tid].npos; j++) {
             stats->regions[tid].pos[j].beg = iter->reg_list[i].intervals[j].beg+1;
             stats->regions[tid].pos[j].end = iter->reg_list[i].intervals[j].end;
-
-            stats->target_count += (stats->regions[tid].pos[j].end - stats->regions[tid].pos[j].beg + 1);
+            if (stats->regions[tid].pos[j].end < HTS_POS_MAX) {
+                stats->target_count += (stats->regions[tid].pos[j].end - stats->regions[tid].pos[j].beg + 1);
+            } else {
+                uint64_t hdr_end = sam_hdr_tid2len(info->sam_header, tid);
+                if (hdr_end)
+                    stats->target_count += (hdr_end - stats->regions[tid].pos[j].beg + 1);
+            }
         }
     }
 
@@ -2054,7 +2091,7 @@ static void HTS_NORETURN error(const char *format, ...)
         vfprintf(samtools_stderr, format, ap);
         va_end(ap);
     }
-    exit(1);
+    samtools_exit(1);
 }
 
 void cleanup_stats_info(stats_info_t* info){
@@ -2075,6 +2112,7 @@ void cleanup_stats(stats_t* stats)
     free(stats->mpc_buf);
     free(stats->acgtno_cycles_1st);
     free(stats->acgtno_cycles_2nd);
+    free(stats->acgtno_revcomp);
     free(stats->read_lengths);
     free(stats->read_lengths_1st);
     free(stats->read_lengths_2nd);
@@ -2259,6 +2297,8 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
     if (!stats->acgtno_cycles_1st) goto nomem;
     stats->acgtno_cycles_2nd  = calloc(stats->nbases,sizeof(acgtno_count_t));
     if (!stats->acgtno_cycles_2nd) goto nomem;
+    stats->acgtno_revcomp  = calloc(stats->nbases,sizeof(acgtno_count_t));
+    if (!stats->acgtno_revcomp) goto nomem;
     stats->read_lengths   = calloc(stats->nbases,sizeof(uint64_t));
     if (!stats->read_lengths)     goto nomem;
     stats->read_lengths_1st   = calloc(stats->nbases,sizeof(uint64_t));
@@ -2281,7 +2321,7 @@ static void init_stat_structs(stats_t* stats, stats_info_t* info, const char* gr
         goto nomem;
     realloc_rseq_buffer(stats);
     if ( targets )
-        init_regions(stats, targets);
+        init_regions(stats, targets, info);
     return;
  nomem:
     error("Out of memory");
@@ -2461,7 +2501,7 @@ int main_stats(int argc, char *argv[])
             if (iter) {
                 if (!targets) {
                     all_stats->nchunks = argc-optind;
-                    if (replicate_regions(all_stats, iter))
+                    if (replicate_regions(all_stats, iter, info))
                         fprintf(samtools_stderr, "Replications of the regions failed\n");
                 }
 
diff --git a/samtools/stats_isize.c.pysam.c b/samtools/stats_isize.c.pysam.c
index 96feb90..1bb2bd4 100644
--- a/samtools/stats_isize.c.pysam.c
+++ b/samtools/stats_isize.c.pysam.c
@@ -97,7 +97,7 @@ static void sparse_set_f(isize_data_t data, int at, isize_insert_t field, uint64
             a->max = max(at, a->max);
         } else {
             fprintf(samtools_stderr, "%s\n", "Failed to allocate memory for isize_sparse_record_t");
-            exit(11);
+            samtools_exit(11);
         }
     } else {
         return;
diff --git a/samtools/tmp_file.h b/samtools/tmp_file.h
index 15d088e..4f2647c 100644
--- a/samtools/tmp_file.h
+++ b/samtools/tmp_file.h
@@ -31,7 +31,7 @@ DEALINGS IN THE SOFTWARE
 #include <lz4.h>
 #include "htslib/sam.h"
 
-#ifdef _cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
diff --git a/samtools/version.sh b/samtools/version.sh
index 5ccd9bb..9d28100 100755
--- a/samtools/version.sh
+++ b/samtools/version.sh
@@ -24,7 +24,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # Master version, for use in tarballs or non-git source copies
-VERSION=1.10
+VERSION=1.13
 
 # If we have a git clone, then check against the current tag
 if [ -e .git ]
diff --git a/setup.py b/setup.py
index 072ed8a..5f2bb00 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,10 @@ import subprocess
 import sys
 import sysconfig
 from contextlib import contextmanager
-from setuptools import setup
+from distutils import log
+from setuptools import setup, Command
+from setuptools.command.sdist import sdist
+
 from cy_build import CyExtension as Extension, cy_build_ext as build_ext
 try:
     import cython
@@ -79,6 +82,61 @@ def run_make_print_config():
     return make_print_config
 
 
+# This function emulates the way distutils combines settings from sysconfig,
+# environment variables, and the extension being built. It returns a dictionary
+# representing the usual set of variables, suitable for writing to a generated
+# file or for running configure (provided the returned LIBS is ignored).
+def build_config_dict(ext):
+    def env(var):
+        return [os.environ[var]] if var in os.environ else []
+
+    def sc(var):
+        value = sysconfig.get_config_var(var)
+        return [value] if value is not None else []
+
+    def optionise(option, valuelist):
+        def quote(s): return "'"+s+"'" if " " in s else s
+        return list(quote(option+v) for v in valuelist)
+
+    def kvtuples(pairlist):
+        def appendoptvalue(t): return t[0] if t[1] is None else t[0]+"="+t[1]
+        return map(appendoptvalue, pairlist)
+
+    # For CC, select the first of these that is set
+    cc = (env('CC') + sc('CC') + ['gcc'])[0]
+
+    # distutils ignores sysconfig for CPPFLAGS
+    cppflags = " ".join(env('CPPFLAGS') + optionise('-I', ext.include_dirs) +
+                        optionise('-D', kvtuples(ext.define_macros)) +
+                        optionise('-U', ext.undef_macros))
+
+    cflags = " ".join(sc('CFLAGS') + env('CFLAGS') + ext.extra_compile_args)
+
+    # distutils actually includes $CPPFLAGS here too, but that's weird and
+    # unnecessary for us as we know the output LDFLAGS will be used correctly
+    ldflags = " ".join(sc('LDFLAGS') + env('LDFLAGS') + env('CFLAGS') +
+                       optionise('-L', ext.library_dirs) +
+                       ext.extra_link_args)
+
+    # ext.libraries is computed (incorporating $LIBS etc) during configure
+    libs = " ".join(optionise('-l', ext.libraries))
+
+    return { 'CC': cc, 'CPPFLAGS': cppflags, 'CFLAGS': cflags,
+             'LDFLAGS': ldflags, 'LIBS': libs }
+
+
+def write_configvars_header(filename, ext, prefix):
+    config = build_config_dict(ext)
+    if prefix != 'HTS':
+        config['HTSDIR'] = '(unused)'
+        config['CURSES_LIB'] = '(unused)'
+
+    log.info("creating %s for '%s' extension", filename, ext.name)
+    with open(filename, "w") as outf:
+        for var, value in config.items():
+            outf.write('#define {}_{} "{}"\n'.format(prefix, var, value))
+
+
 @contextmanager
 def set_compiler_envvars():
     tmp_vars = []
@@ -140,6 +198,46 @@ def get_pysam_version():
     return version.__version__
 
 
+# Override sdist command to ensure Cythonized *.c files are included.
+class cythonize_sdist(sdist):
+    # Remove when setuptools (as installed on GH runners) has these options
+    if not any(opt[0] == 'owner=' for opt in sdist.user_options):
+        sdist.user_options.append(('owner=', 'u', 'Specify owner inside tar'))
+    if not any(opt[0] == 'group=' for opt in sdist.user_options):
+        sdist.user_options.append(('group=', 'g', 'Specify group inside tar'))
+
+    def run(self):
+        from Cython.Build import cythonize
+        cythonize(self.distribution.ext_modules)
+        super().run()
+
+
+class clean_ext(Command):
+    description = "clean up Cython temporary files"
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        objs = glob.glob(os.path.join("pysam", "libc*.c"))
+        if objs:
+            log.info("removing 'pysam/libc*.c' (%s Cython objects)", len(objs))
+        for obj in objs:
+            os.remove(obj)
+
+        headers = (glob.glob(os.path.join("htslib",   "*config*.h")) +
+                   glob.glob(os.path.join("samtools", "*config*.h")) +
+                   glob.glob(os.path.join("bcftools", "*config*.h")))
+        if headers:
+            log.info("removing '*/*config*.h' (%s generated headers)", len(headers))
+        for header in headers:
+            os.remove(header)
+
+
 # How to link against HTSLIB
 # shared:   build shared chtslib from builtin htslib code.
 # external: use shared libhts.so compiled outside of
@@ -170,8 +268,6 @@ package_dirs = {'pysam': 'pysam',
 config_headers = ["samtools/config.h",
                   "bcftools/config.h"]
 
-cmdclass = {'build_ext': build_ext}
-
 # If cython is available, the pysam will be built using cython from
 # the .pyx files. If no cython is available, the C-files included in the
 # distribution will be used.
@@ -191,22 +287,6 @@ if not os.path.exists(fn):
         "from the repository"
         .format(fn))
 
-# exclude sources that contain a main function
-EXCLUDE = {
-    "samtools": (
-    ),
-    "bcftools": (
-        "test", "plugins", "peakfit.c",
-        "peakfit.h",
-        # needs to renamed, name conflict with samtools reheader
-        "reheader.c",
-        "polysomy.c"),
-    "htslib": (
-        'htslib/tabix.c',
-        'htslib/bgzip.c',
-        'htslib/htsfile.c'),
-}
-
 print ("# pysam: htslib mode is {}".format(HTSLIB_MODE))
 print ("# pysam: HTSLIB_CONFIGURE_OPTIONS={}".format(
     HTSLIB_CONFIGURE_OPTIONS))
@@ -364,11 +444,20 @@ libraries_for_pysam_module = external_htslib_libraries + internal_htslib_librari
 # The list below uses the union of include_dirs and library_dirs for
 # reasons of simplicity.
 
+def prebuild_libchtslib(ext, force):
+    if HTSLIB_MODE not in ['shared', 'separate']: return
+    write_configvars_header("htslib/config_vars.h", ext, "HTS")
+
+def prebuild_libcsamtools(ext, force):
+    write_configvars_header("samtools/samtools_config_vars.h", ext, "SAMTOOLS")
+
 modules = [
     dict(name="pysam.libchtslib",
+         prebuild_func=prebuild_libchtslib,
          sources=[source_pattern % "htslib", "pysam/htslib_util.c"] + shared_htslib_sources + os_c_files,
          libraries=external_htslib_libraries),
     dict(name="pysam.libcsamtools",
+         prebuild_func=prebuild_libcsamtools,
          sources=[source_pattern % "samtools"] + glob.glob(os.path.join("samtools", "*.pysam.c")) +
          [os.path.join("samtools", "lz4", "lz4.c")] + htslib_sources + os_c_files,
          libraries=external_htslib_libraries + internal_htslib_libraries),
@@ -447,12 +536,11 @@ metadata = {
     'packages': package_list,
     'requires': ['cython (>=0.29.12)'],
     'ext_modules': [Extension(**opts) for opts in modules],
-    'cmdclass': cmdclass,
+    'cmdclass': {'build_ext': build_ext, 'clean_ext': clean_ext, 'sdist': cythonize_sdist},
     'package_dir': package_dirs,
     'package_data': {'': ['*.pxd', '*.h'], },
     # do not pack in order to permit linking to csamtools.so
     'zip_safe': False,
-    'use_2to3': True,
 }
 
 if __name__ == '__main__':
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py
index 3c5dda5..8fb1971 100644
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -7,7 +7,7 @@ import string
 import copy
 import array
 
-from TestUtils import checkFieldEqual, BAM_DATADIR, get_temp_filename, get_temp_context, IS_PYTHON3
+from TestUtils import checkFieldEqual, make_data_files, BAM_DATADIR, get_temp_filename, get_temp_context, IS_PYTHON3
 
 
 if IS_PYTHON3:
@@ -15,6 +15,11 @@ if IS_PYTHON3:
 else:
     maketrans = string.maketrans
 
+
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+
+
 class ReadTest(unittest.TestCase):
 
     def build_read(self):
@@ -65,7 +70,7 @@ class TestAlignedSegment(ReadTest):
         a = pysam.AlignedSegment()
         s = str(a)
         self.assertEqual(
-            "None\t0\t-1\t-1\t0\tNone\t-1\t-1\t0\tNone\tNone\t[]",
+            "None\t0\t*\t0\t0\tNone\t*\t0\t0\tNone\tNone\t[]",
             s)
 
     def testSettingTagInEmptyRead(self):
@@ -525,13 +530,13 @@ class TestAlignedSegment(ReadTest):
     def test_query_length_is_limited(self):
         a = self.build_read()
         a.query_name = "A" * 1
-        a.query_name = "A" * 251
+        a.query_name = "A" * 254
         self.assertRaises(
             ValueError,
             setattr,
             a,
             "query_name",
-            "A" * 252)
+            "A" * 255)
 
     def test_header_accessible(self):
         a = self.build_read()
diff --git a/tests/AlignmentFileHeader_test.py b/tests/AlignmentFileHeader_test.py
index e6c4287..a665f43 100644
--- a/tests/AlignmentFileHeader_test.py
+++ b/tests/AlignmentFileHeader_test.py
@@ -13,7 +13,7 @@ import copy
 from collections import OrderedDict as odict
 import pysam
 import pysam.samtools
-from TestUtils import get_temp_filename, BAM_DATADIR
+from TestUtils import get_temp_filename, make_data_files, BAM_DATADIR
 
 if sys.version_info.major >= 3:
     from io import StringIO
@@ -21,6 +21,10 @@ else:
     from StringIO import StringIO
 
 
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+
+
 class TestHeaderConstruction(unittest.TestCase):
     """testing header construction."""
 
diff --git a/tests/AlignmentFilePileup_test.py b/tests/AlignmentFilePileup_test.py
index 43072fa..8e75a52 100644
--- a/tests/AlignmentFilePileup_test.py
+++ b/tests/AlignmentFilePileup_test.py
@@ -2,10 +2,14 @@
 import os
 import pysam
 import unittest
-from TestUtils import BAM_DATADIR, IS_PYTHON3, force_str, flatten_nested_list
+from TestUtils import make_data_files, BAM_DATADIR, IS_PYTHON3, force_str, flatten_nested_list
 import PileupTestUtils
 
 
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+
+
 class TestPileupReadSelection(unittest.TestCase):
     '''test pileup functionality.'''
 
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py
index 28de420..3a6cafc 100644
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -24,7 +24,11 @@ import pysam
 import pysam.samtools
 from TestUtils import checkBinaryEqual, checkGZBinaryEqual, check_url, \
     check_samtools_view_equal, checkFieldEqual, force_str, \
-    get_temp_filename, BAM_DATADIR
+    get_temp_filename, make_data_files, BAM_DATADIR
+
+
+def setUpModule():
+    make_data_files(BAM_DATADIR)
 
 
 ##################################################
@@ -723,7 +727,7 @@ class TestIO(unittest.TestCase):
         read = load_bam()
         self.assertEqual(read.reference_name, "chr1")
         
-    # TOOD
+    # TODO
     # def testReadingFromSamFileWithoutHeader(self):
     #     '''read from samfile without header.
     #     '''
@@ -1391,12 +1395,12 @@ class TestEmptyHeader(unittest.TestCase):
         self.assertEqual(s.header.to_dict(), {'SQ': [{'LN': 1000, 'SN': 'chr1'}]})
 
     def test_bam_without_seq_in_header(self):
-        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "example_no_seq_in_header.bam"))
+        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "0example_no_seq_in_header.bam"))
         self.assertTrue("SQ" in s.header.to_dict())
         self.assertTrue("@SQ" in str(s.header))
 
     def test_bam_without_seq_with_null_bytes_in_header(self):
-        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "example_no_seq_in_header_null_bytes.bam"))
+        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "0example_no_seq_in_header_null_bytes.bam"))
         self.assertTrue("SQ" in s.header.to_dict())
         self.assertTrue("@SQ" in str(s.header))
 
@@ -1460,6 +1464,24 @@ class TestTruncatedBAM(unittest.TestCase):
             return len([a for a in x])
         self.assertRaises(IOError, iterall, s)
 
+        # Ignore closing errors, as s is now in an error state
+        try:
+            s.close()
+        except IOError:
+            pass
+
+
+class TestCorruptBAM(unittest.TestCase):
+    """See pull request 1035."""
+
+    def testCorruptBamIterator(self):
+        s = pysam.AlignmentFile(os.path.join(BAM_DATADIR, "ex2_corrupt.bam"))
+
+        def iterall(x):
+            return len([a for a in x])
+
+        self.assertRaises(IOError, iterall, s)
+
 
 COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204,
                 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78,
@@ -2316,26 +2338,6 @@ class TestSanityCheckingBAM(unittest.TestCase):
         self.check_write(read)
 
 
-class TestHeader1000Genomes(unittest.TestCase):
-
-    '''see issue 110'''
-    bamfile = "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/phase3_EX_or_LC_only_alignment/data/HG00104/alignment/HG00104.chrom11.ILLUMINA.bwa.GBR.low_coverage.20130415.bam"  # noqa
-    bambase = "HG00104.chrom11.ILLUMINA.bwa.GBR.low_coverage.20130415.bam"  # noqa
-
-    def testRead(self):
-
-        if not check_url(self.bamfile):
-            return
-
-        f = pysam.AlignmentFile(self.bamfile, "rb")
-        data = f.header.copy()
-        self.assertTrue(data)
-
-    def tearDown(self):
-        if os.path.exists(self.bambase + ".bai"):
-            os.unlink(self.bambase + ".bai")
-
-
 class TestLargeCigar(unittest.TestCase):
 
     def setUp(self):
@@ -2422,9 +2424,6 @@ class TestLargeCigar(unittest.TestCase):
 #     mode = "w"
 
 if __name__ == "__main__":
-    # build data files
-    print("building data files")
-    subprocess.call("make -C %s" % BAM_DATADIR, shell=True)
     print("starting tests")
     unittest.main()
     print("completed tests")
diff --git a/tests/StreamFiledescriptors_test.py b/tests/StreamFiledescriptors_test.py
index f09ef37..07adea8 100644
--- a/tests/StreamFiledescriptors_test.py
+++ b/tests/StreamFiledescriptors_test.py
@@ -5,11 +5,15 @@ import threading
 import errno
 import unittest
 from pysam import AlignmentFile
-from TestUtils import BAM_DATADIR
+from TestUtils import make_data_files, BAM_DATADIR
 
 IS_PYTHON2 = sys.version_info[0] == 2
 
 
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+
+
 def alignmentfile_writer_thread(infile, outfile):
     def _writer_thread(infile, outfile):
         """read from infile and write to outfile"""
diff --git a/tests/TestUtils.py b/tests/TestUtils.py
index f33761e..97bd2ed 100644
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -5,6 +5,7 @@ import difflib
 import gzip
 import contextlib
 import inspect
+import subprocess
 import tempfile
 import pysam
 
@@ -251,6 +252,18 @@ def get_temp_context(suffix="", keep=False):
             os.unlink(f)
 
 
+def make_data_files(directory):
+    what = None
+    try:
+        if not os.path.exists(os.path.join(directory, "all.stamp")):
+            subprocess.check_output(["make", "-C", directory], stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        what = "Making test data in '%s' failed:\n%s" % (directory, force_str(e.output))
+
+    if what is not None:
+        raise RuntimeError(what)
+
+
 def load_and_convert(filename, encode=True):
     '''load data from filename and convert all fields to string.
 
diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py
index 4458d1f..fcc39a6 100644
--- a/tests/VariantFile_test.py
+++ b/tests/VariantFile_test.py
@@ -7,14 +7,17 @@ import unittest
 import pysam
 import shutil
 import gzip
-import subprocess
 
 try:
     from pathlib import Path
 except ImportError:
     Path = None
 
-from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, CBCF_DATADIR, get_temp_context
+from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, make_data_files, CBCF_DATADIR, get_temp_context
+
+
+def setUpModule():
+    make_data_files(CBCF_DATADIR)
 
 
 def read_header(filename):
@@ -33,6 +36,12 @@ def read_header(filename):
     return data
 
 
+def read_index_header(filename):
+    with gzip.open(filename) as infile:
+        magic = infile.read(4)
+    return magic
+
+
 class TestMissingGenotypes(unittest.TestCase):
 
     filename = "missing_genotypes.vcf"
@@ -199,6 +208,7 @@ class TestIndexFormatsVCF(unittest.TestCase):
             shutil.copyfile(self.vcf_filename, fn)
             pysam.tabix_index(fn, preset="vcf", force=True)
             self.assertTrue(os.path.exists(fn + ".gz" + ".tbi"))
+            self.assertEqual(read_index_header(fn + ".gz.tbi"), b"TBI\1")
             self.assertFalse(os.path.exists(fn + ".gz" + ".csi"))
             
             with pysam.VariantFile(fn + ".gz") as inf:
@@ -210,6 +220,7 @@ class TestIndexFormatsVCF(unittest.TestCase):
 
             pysam.tabix_index(fn, preset="vcf", force=True, csi=True)
             self.assertTrue(os.path.exists(fn + ".gz" + ".csi"))
+            self.assertEqual(read_index_header(fn + ".gz.csi"), b"CSI\1")
             self.assertFalse(os.path.exists(fn + ".gz" + ".tbi"))
             
             with pysam.VariantFile(fn + ".gz") as inf:
@@ -221,6 +232,7 @@ class TestIndexFormatsVCF(unittest.TestCase):
             shutil.copyfile(self.bcf_filename + ".csi", fn + ".csi")
 
             self.assertTrue(os.path.exists(fn + ".csi"))
+            self.assertEqual(read_index_header(fn + ".csi"), b"CSI\1")
             self.assertFalse(os.path.exists(fn + ".tbi"))
             
             with pysam.VariantFile(fn) as inf:
@@ -232,6 +244,7 @@ class TestIndexFormatsVCF(unittest.TestCase):
 
             pysam.tabix_index(fn, preset="bcf", force=True, csi=False)
             self.assertTrue(os.path.exists(fn + ".csi"))
+            self.assertEqual(read_index_header(fn + ".csi"), b"CSI\1")
             self.assertFalse(os.path.exists(fn + ".tbi"))
             
             with pysam.VariantFile(fn) as inf:
@@ -244,6 +257,7 @@ class TestIndexFormatsVCF(unittest.TestCase):
             pysam.tabix_index(fn, preset="vcf", force=True, csi=True)
             
             self.assertTrue(os.path.exists(fn + ".csi"))
+            self.assertEqual(read_index_header(fn + ".csi"), b"CSI\1")
             self.assertFalse(os.path.exists(fn + ".tbi"))
             
             with pysam.VariantFile(fn) as inf:
@@ -668,9 +682,6 @@ class TestUnicode(unittest.TestCase):
                 
 
 if __name__ == "__main__":
-    # build data files
-    print("building data files")
-    subprocess.call("make -C %s" % CBCF_DATADIR, shell=True)
     print("starting tests")
     unittest.main()
     print("completed tests")
diff --git a/tests/VariantRecord_test.py b/tests/VariantRecord_test.py
index fd80a80..5043d1f 100644
--- a/tests/VariantRecord_test.py
+++ b/tests/VariantRecord_test.py
@@ -13,7 +13,11 @@ try:
 except ImportError:
     Path = None
 
-from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, CBCF_DATADIR, get_temp_context
+from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, make_data_files, CBCF_DATADIR, get_temp_context
+
+
+def setUpModule():
+    make_data_files(CBCF_DATADIR)
 
 
 @pytest.fixture
diff --git a/tests/cbcf_data/Makefile b/tests/cbcf_data/Makefile
index 796c3a6..9c3fe75 100644
--- a/tests/cbcf_data/Makefile
+++ b/tests/cbcf_data/Makefile
@@ -4,7 +4,10 @@ VCF=$(filter-out example_empty.vcf,$(ALL_VCF))
 VCFGZ=$(VCF:%.vcf=%.vcf.gz)
 BCF=$(VCF:%.vcf=%.bcf)
 
-all: $(VCFGZ) $(BCF)
+all: all.stamp
+
+all.stamp: $(VCFGZ) $(BCF)
+	touch $@
 
 %.vcf.gz: %.vcf
 	bgzip < $< > $@
@@ -19,5 +22,4 @@ example_empty.bcf: example_empty.vcf.gz
 	touch $@
 
 clean:
-	rm -f *.gz *.tbi *.csi *.bcf
-
+	-rm -f all.stamp *.gz *.tbi *.csi *.bcf
diff --git a/tests/compile_test.py b/tests/compile_test.py
index f56adb7..300ab92 100644
--- a/tests/compile_test.py
+++ b/tests/compile_test.py
@@ -10,7 +10,13 @@ pysam and tabix works.
 import os
 import unittest
 import pysam
-from TestUtils import BAM_DATADIR, TABIX_DATADIR
+from TestUtils import make_data_files, BAM_DATADIR, TABIX_DATADIR
+
+
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+    make_data_files(TABIX_DATADIR)
+
 
 try:
     os.unlink('tests/_compile_test.c')
diff --git a/tests/faidx_test.py b/tests/faidx_test.py
index 171fae3..72520e7 100644
--- a/tests/faidx_test.py
+++ b/tests/faidx_test.py
@@ -6,7 +6,11 @@ import gzip
 import copy
 import shutil
 
-from TestUtils import check_url, BAM_DATADIR, get_temp_filename
+from TestUtils import check_url, make_data_files, BAM_DATADIR, get_temp_filename
+
+
+def setUpModule():
+    make_data_files(BAM_DATADIR)
 
 
 class TestFastaFile(unittest.TestCase):
diff --git a/tests/pysam_data/example_no_seq_in_header.bam b/tests/pysam_data/0example_no_seq_in_header.bam
similarity index 100%
rename from tests/pysam_data/example_no_seq_in_header.bam
rename to tests/pysam_data/0example_no_seq_in_header.bam
diff --git a/tests/pysam_data/example_no_seq_in_header_null_bytes.bam b/tests/pysam_data/0example_no_seq_in_header_null_bytes.bam
similarity index 100%
rename from tests/pysam_data/example_no_seq_in_header_null_bytes.bam
rename to tests/pysam_data/0example_no_seq_in_header_null_bytes.bam
diff --git a/tests/pysam_data/Makefile b/tests/pysam_data/Makefile
index 3921e8a..c6ad884 100644
--- a/tests/pysam_data/Makefile
+++ b/tests/pysam_data/Makefile
@@ -3,11 +3,13 @@ BAM=$(SAM:%.sam=%.bam)
 BAI=$(BAM:%.bam=%.bam.bai)
 CRAM=ex1.cram ex2.cram ex3.cram
 CRAI=$(CRAM:%.cram=%.cram.crai)
-NO_PG:=$(findstring --no-PG,$(shell samtools view))
+NO_PG:=$(findstring --no-PG,$(shell samtools view '-?'))
 
 # ex2.bam - bam file without index
 
-all: ex1.pileup.gz \
+all: all.stamp
+
+all.stamp: ex1.pileup.gz \
 	ex1.sam ex1.bam \
 	ex2.sam.gz ex2.sam ex2.bam ex2.bam.bai \
 	with_md.sam.gz with_md.bam with_md.bam.bai \
@@ -17,13 +19,15 @@ all: ex1.pileup.gz \
 	example_bai.bam \
         rg_with_tab.bam \
 	ex2_truncated.bam \
+	ex2_corrupt.bam \
 	empty.bam empty.bam.bai \
 	explicit_index.bam explicit_index.cram \
 	faidx_empty_seq.fq.gz \
-	ex1.fa.gz ex1.fa.gz.csi \
+	ex1.fa.gz ex1.fa.gz.fai ex1.fa.gz.gzi \
 	ex1_csi.bam \
 	example_reverse_complement.bam \
 	example_dash_in_chr.bam
+	touch $@
 
 # ex2.sam - as ex1.sam, but with header
 ex2.sam.gz: ex1.bam ex1.bam.bai
@@ -36,13 +40,13 @@ with_md.sam.gz: ex2.bam ex1.fa
 #	samtools view $(NO_PG) -bo $@ -t ex1.fa.fai $<
 
 uncompressed.bam: ex2.sam
-	samtools view $(NO_PG) -buS $< > $@
+	samtools view $(NO_PG) -bu -o $@ $<
 
 %.bam: %.sam
-	samtools view $(NO_PG) -bS $< > $@
+	samtools view $(NO_PG) -bo $@ $<
 
 %.cram: %.sam
-	samtools view $(NO_PG) -bC -T ex1.fa $< > $@
+	samtools view $(NO_PG) -Co $@ -T ex1.fa $<
 
 %.cram.crai: %.cram
 	samtools index $<
@@ -50,8 +54,11 @@ uncompressed.bam: ex2.sam
 %.sam: %.sam.gz
 	gunzip < $< > $@
 
-ex1.fa.fai:ex1.fa
-		samtools faidx ex1.fa
+%.fa.fai: %.fa
+	samtools faidx $<
+
+%.fa.gz.fai %.fa.gz.gzi: %.fa.gz
+	samtools faidx $<
 
 ex1.bam:ex1.sam.gz ex1.fa.fai
 		samtools view $(NO_PG) -bo ex1.bam -t ex1.fa.fai ex1.sam.gz
@@ -65,12 +72,16 @@ ex1.pileup.gz:ex1.bam ex1.fa
 ex2_truncated.bam: ex2.bam
 	head -c 124000 ex2.bam > ex2_truncated.bam
 
+# Append a corrupt read with block_size < sizeof(bam_core_t fields)
+ex2_corrupt.bam: ex2.bam
+	(bgzip -d < $<; printf '\37\0\0\0\1\0\0\0') | bgzip > $@
+
 ex1_csi.bam: ex1.bam
 	cp ex1.bam ex1_csi.bam
 	samtools index -c ex1_csi.bam
 
 empty.bam: ex2.sam
-	grep "^@" $< | samtools view $(NO_PG) -Sb - > $@
+	grep "^@" $< | samtools view $(NO_PG) -bo $@ -
 
 example_unmapped_reads_no_sq.bam: example_unmapped_reads_no_sq.sam
 	touch tmp.list
@@ -89,9 +100,9 @@ explicit_index.cram: ex1.cram
 	cp ex1.cram $@
 
 clean:
-	rm -fr *.bam *.bai *.fai *.pileup* *.cram \
-	*~ calDepth *.dSYM pysam_*.sam \
-	ex2.sam ex2.sam.gz ex1.sam \
+	rm -fr [a-z]*.bam *.bai *.csi *.fai *.gzi *.pileup* [a-z]*.cram *.crai \
+	all.stamp *~ calDepth *.dSYM pysam_*.sam \
+	ex2.sam ex2.sam.gz ex1.sam ex1.fa.gz \
 	with_md.sam.gz \
 	*.fq.gz
 
@@ -100,6 +111,3 @@ clean:
 
 %.fa.gz: %.fa
 	bgzip < $< > $@
-
-%.fa.gz.csi: %.fa.gz
-	samtools faidx $<
diff --git a/tests/pysam_data/ex1.sam.gz b/tests/pysam_data/ex1.sam.gz
index 8dd2bc447cb504be23c29aa54d1a7b8ccfb8fa73..16044675f2473b5bccf026d374ac1ee04dfe6b40 100644
GIT binary patch
literal 109698
zcmV(&K;ge1iwFo7Ig4Kc17&zIE^}dR0JMF{lH<s--5UCfYj%^BnM^Vf^8~hxqUlju
zylW;?YTXNMq^0!#3%Pq>aDV^;K$Wt)vY6HH-98<IhmYs{=lg#*_HW~&jUv=h`VhUW
zoT~ok|N6h85_OHAmHGGTMnBZm{Rv+`&&U0Fe?A`Y?dNfS(4WB{!k@xVRYPB?>AzC@
zQ}T^XAtjlrfB(1mzoXUF{r|x4b@i`*r{Dg^f1}_2OAWt?>c0s5=|BAES&Tw4kahLh
zBYXg&JK;e{cLEG3f9~)H&-)$z<c<g(hv-vkT+;9d3BLk-$_Pb}^zRpcTOAes`<--A
z)xHX$`}ZsSoEJ%2Rf%A#(i#D(s#*Su5kYC)g7A5nU@HpiB!v81>FA=+BKkUdX9k?2
z<~R-D#O+)L_<TNoe)3|?LBv&qUuTytZj9505>6??-~aYdwJ-u!Ro4-KdstmvSHgDt
zDkQ@jL?wl;ifA8?IlKVWB;4(Q+w0<FUCZYW_FqO5CG5Wv0d~SxwqIA(oj>{nuu4a;
zg$Vo9T$_9rulVU2J{t`ytKXb{Ie*iCzff)fFx*nYP+p~<>UK-F{N?bQ#UEp$KeP50
zqwJPx+b#W1?r#%Zlu;-dbx<)#!PY4BYZP{^(R3SOv0SR*np)@Futu~0cD6#Hf}7B-
z5F%Jzo5v5{-)i<>-+xHS`?%AN38DX1U_!u%spvw8#_|)7D`vYB@6nRDstB`Psx#TK
z?gK)MwM#xZT}!Q}H`Er92CyViL@5m{u!%;i!IDUB@!*pbqAH^NS+sZo;Kr!D55Z4v
zrYxR0cn#oP5McD+mlBSlv;#^72&Js65=8i?Yjzno-lYC<=unzX`yOC99hf@pqSt)a
z{cwGyN4qSmBFg7H&f(Eb@1|*XzaQTJO}EIycQ61R{$Cl;z_%R^Zm?Au=juZTUkX7_
zfR>Nv-*DSu&jBVZYu@P$QVG862F_-viV(pZazO10!b^eJv<}TB2Dkf_Yi~OoOIS(=
zm#B3WTA=I_sajUD1}jp6`Fs>slYFg1$FoNN1`nx#MYVFU-O6;fn?7x{-EJ0JKA*}Z
zHG1D>0Sx9F{=KfViy1_5nObX;ranvSx#ZvXns4s5;A0H;1MY~?gTstS-65ofuRn!!
zaGSSo>(AnoEUwoS(V-G=x^^G6JGtw4Tj>!2^NeQC!fWT4HBp{h6Yyt(s%WL2%PaNl
z+`IG=V|2Tl()n$C{&jo)WnL}SE?1qGOJv`{z%}*ocP+t8vPT3wQtNx;4D07YwZ5CQ
za%`zmzz2@*rBnU+7w7B5ocJxq8GU=_qXGB9I_^4X$<3YxYZ9s=%8}WFeQd}_jxNC)
zhyJp>T>K^Nbsf5334B$p4A@D*meX;V9$?yP9U=a~2xhnKkv@0<KZxYK-sL&;ZylN(
zctGgMW6No{aR%21+@NX|;RC*YdYp;DOtxL!1c=|>n)z2mWSn~DDogRVfJ<c2?MG)!
zu$T^R^nerGM+px}0p|T@z{&lE{EaMQ67l6p)>|GC4UQt5kQfY@arC*^ih}ddx!?-I
z7YX6rJjf@3#6N>SsZ!FgbZ7W=sYObs3;n?f-e&dje){~BZnE>>0PyP$|C;f2XXyk5
z&aDHe(z9^hj<z1dPOwXhD@h=6?<=2|&<mdOfXn}#EQAVmsMQ3hb!9I{)E;iUmk`gO
zmn#{}(awXL7SJ%2P=fg!w%okB4zTfhvh!MO%gz{BPB2dp#|OjwF>5$ZN#1Q4=FjY#
z=<};hZw`Fe?cKt1DzNi1=xAW;y$<YvT5g)a?919$MVO1}k#NrMQGnOc>@`h!L0uGh
zjU>L`+%`Fu$5HTB;hkSZdCw@HFRyp%%#=+ZO$<req{3w9)#xUHm;HK^^xxLt3lA3G
zflD4#J#4+>H%Z~v>#9R}pIb{zM*X~OeYTv;ef)}X{)KOisC2VHy}-absHhB{Rc@Rd
zzf1;=ldIM?Jp-jBjs1lh+0vx(pPgS0sVFB=42In`gtZ1!WBieKW_m<1-C08UgLgh)
zVnD)gUnyDLWn=65Q#9Jg^Ca#z<qV#z;Z>5+!pkWDtgm@U>v-sG>ed@6;hfy1*tv9g
z=esv_8r!%H-iSjRs_^QAcZZ(R0Mv`|JC7Ncb&U8AU`73cpRR3VwNL3++UwrI33VcR
zJelC<OU#EX0W~hK=oX-BUzSqeyB(+e(8E$<-UU2+bmA6}!hf~|qrU}d;$L8pl7rvi
zui)@Ek6*i!Sb!AXBQQ^J<1H>NdzgIom~C&JysgSG@2mNt?!fNPXOJ@r79Vbsw_w4-
z3((%)W8QuP(2Bwiw%4!{u-*C5m)Rujbx6?M3wS|=R*~X4PcD6n6ReZ(H@><E@X7Cp
zacwmr`h^2>Y9zT$LN>0cvj<hTz-2wfsmA8sEZrrIzI_0*Z^547xomN|F7SmDg}HJ7
z_&_A^8@iu>3SKA58Bs6_TYmnyTrTQ#Qm4!1Tz{O-3M`LvA6KPRY-&5~c$=@E`S;8I
z-w%uAz}irbfWY8NuqAp<mk1ENf1EyQYZdsCjkWsIrfD!fn<40&VEr2NS^-x#D)57i
z1k0${6%Hd87*3oI)yfXualzo{s+oz(XY&}_Q&SjBT6_?}2L@L^ntBPR%dYwkoYS;v
zz)0PMIzarpt3pK2wxa3-JTkV9Ml(-67-ndUr8nk*IMy)U$9_<nPx4MvecEktgq#9r
zUF)a=`0Cn>kb<%4*3*$2v;k?AIHb^|;oU}vF*Ry>g9|wO<4O5IIG>(}6oF-q6oeB}
zV(3#?P3B#sX-2Y*eP6u`Q{HxTf|LYv2uMc}FFI#z-yI#-BFN`_hF6Y|LyS3pKD+f)
zizNt5bi`mGT=Vi3YrCZIXsZoamok@ugY~<f!nTg*;s4&krq|J{AvBjY_kVrJx&2ju
zkKrPv*Hj_>`fg?l5Sv0Yh1nyB%Z>41Ks{ehJNALdtt{evRI|QB`$e^gv^%%ow@%=9
zc(yT!SODr<M|}s-IaB|U|H1D35k=>QS{IxEPgp3n^MPCXTzV>je9ara+R%93oki$F
zUSXn99*mde3(A3Fao6tyN@p@3%s89@EJf|!d0qp6`z|o_0c%=AnB(W*f|GJVCvFSe
zFi9ST;o8r?^c&LKxdsF<9p)1}AZ9*^<lQS8evse=mVy3MOQ$rRa(1?Uxl&?7@zGEX
zUs74ci6Q5Km2h^UG-Z*>C>N`GIr9<y*Q+6w+p>q~eMaXU-oSp_VP$>is#}5E?y4e2
zctN%H+TUPztoXL}cKeU9xyAoz{~`zUufJZmU5afEo{kbCNonDCB?ey^fesxJ)Y`HI
zW=NRjz--5dq}ofc`4qVRlI?9kn?vat${bPTPY>)ed)}#<hVFbW0S*xc`XX<_oJ}p|
z$43nZ@uU7YyQt46jCx|7%ctW~+TY+{VqaI(@Zwl*8wF2ZLEwmwd3llURc`)CeH#$|
z0G}29!R1D-(Se&mZZLS50q#f`UEtB<Eh%kTMmZa9yVkMH<}te6CY&l%mKs}>_TWA(
z0yT>Am^*+bc5#QNTmV?t9J|7lW%DlJ$>TWr+Z`+cFT?yPR-5Ji+E14k@DTohw;mj~
zh<1ED(S6?n^S1caAK1MxrSLgviIF+@!NLi_fclAdiM7cR%$fCcBmYg#M7?fHR};X)
z^nt~`NM^bDgI!k_>>4R!yjLEsvrnOauH&m?G<mSqrppd13GuEEn6u)JrQ`vH&qazd
z%;#v=oAw*~J>8<+A5dWV6s$GYPNU{6O;QRLqYW?%C<{rzyu{OVWjR&drYpg-!1S*H
z3&lDSFpb(n)exBD6SM*88r0Ng1`ou&9=qAFZtk!lA@C(t6SD*!&_<Y8e8jWq+S}u}
zgBf-PY3<G%crc@ZXWP_bNLEUzht;%nkEoPV0QrNtj;+l)8ym%j6KWv%0pr9SH4f8A
zgz0y~O#L^%nv2t0a$IoVYa5lYXy_N#Q$rwuj_@X+LnzJ)K5i9g*$q+QZxLYY;gn*b
zSBxSF=_=<dUFQSpD71LRc^~7%?+@(j5dhAna}5ER`_;HQUEmMM6M={*LR<v*_igHu
zsBxFaY)=A(m5Rzg3c}xj{lCNVvbV_yTwYm~mKMzvHA{5M>DaOgw{TeHw)hv!l%+;%
zS9^{#K#b7h1h?q|7Pl(HyjSM?!>a2ELRI%}w0^IRd9St6C+BKwthH`ho7UUvX_5-)
z7R~C&NU>)WYpj@B+Jf`d83FT2zrhK(-CUZMrZ;wpVsn4n*QEn{?g6LMz4HY7E*;@~
z3^?y|I8XKzoYSW!S=VTNx@ao+IA-?eGqQ3ub8M+$$^ll0)PrU~6?^Z&`@3K+xxAh^
z?oH<*zg4!TW@CL)U_T!?C7Ai*sZKRYb!uvy*@E=jrre7}Uk*QC^N;us;GDiR2yb-n
zF{*^)Y2Yp?=8p14W=;LUlu0VU96k5`C7pPJ->$pIU-}irb`-FwlC_|XaOU;&n{eiJ
z#DN^^^2b0w<<AaIuiMp>(DqJ1jO=~x<K)SBC3*VDkhfn3$r9YD&vUW7Tx^bXTjx+-
z8Vr;L77y4pVzIndc;9VR1o?pH9=p#Vbho!#>;m~ws5A|hHXKfB-Zc>%I?NnKOu7w*
zUa=sV{3nt9r*)i9j6=9bGBouxlhn-g>D-Vwl$hQIy9BPsu%ImZv?@DLWtddbf`?P5
z5ZU}<&de^@rL7H;HfHlFwBm_l4iu}gIX$ug=*yR9-*HOH?QT2F?i0L8aG9*bk`YdO
z^hr5@3a30fQ20Z=1t>L=SJ=82aNP29QsWSD6hZHutC~mb8R9lUN1q<kueMX}`Jm?0
z{!gmQ`Qz>5*X7sw>#M1){rdQ*Lw$}X?QIQDwK<*0@2e-NI#FYLj0_<v2#(uWxJBZo
z+bpULi&yb5X!2#?aMmy9QLAzbm!NEjM4w)bhms`k&d>HzRvY%XS_?EWDQ4WEwoUYm
zu_B0tR?nSz&C3EcHk7Ms!%18%U^R&nCC5sFm|B8h)A|@3D_v?kWZX4ac?^@+?@Au+
zbFKA(7$Xf~(FZsm=f<7Y`E8n{m3mwXwu^>H!`fr>7I+|mr*1U2M5>*eD?xocW-L+u
zGW1<Jb-)tgn3toBJGR7`+OiepH%aDx-BpxSg~6R?!y`Dq5>r2){&`NHr|YCo+q4-b
z)vl;t%-rCFmyXKR5KQxEqlIew%Y>s|#Ra8P>gW1C>O9Kac=Ot_pf(&+i|GcO?~?`s
zWNa?(-*wIIJywP=?X(FgI3N=IH@G{OAoDQEx*sn#4YNyh=mcE;8jYzQw%?5EfvR&m
z*I};CTDjI2(Yy0@?bIarl+imEwP%45;PhPu%TuDsY{D5@|2=VwpH>!B)LU;8xJWL<
zU>Qi}cDj(gxdK3Z460ut1e@Mtx-^Np_HO;Rd5~)RV&Ytez-d6f>D)cWW}6^4;hcYt
zTx(Ekd{+;J?KFFgPN&iclq2OvREE|%6>zQ$AFIv{J)h^1wt>6$D-S)aCU`DjcX{TH
zBh7m+HzJMe&3;p)a*WiBdlCmf4!adJ&O4!>%#-zoB}26IWJVdxdxrTyn9mL-))e}9
z<Kye3aV6c!feromxT@p=4*7NtoT|sU(AMM&zX;Vw9L#|gj%X<dtd>Xb@S@@)y0)PA
zN1h_uon3Z*>pUJOFB!I<k2~RfGG%UxIKe=5<AU3fs!atD3s5DdZY&lUST39+%?CAX
z0t&IOM2d?mM!~1sxY|7z8*d3YUwKy8lCszsSywJC)5<zdxKj4JNnQ^jvH8u?!VnoL
z^74f;;LULN+0y0Dp<H)N%Rujso@gak;JfJU%K9o((&`L{ZJH4+JJOxKg7X?Y-7bC#
znA>MDBdScb0dUp0PvyGoQue61!~eRrT5PY^d9YYP;*9g~0_4xF)29O!!ndsJ((ru8
zQ-t%RX+Pl(&JGKB9%6{PW-y(=U|#rtH~&>g!fG|u8|IAuPafRr;I8&7jt^{aS!dH@
z_3Md+d*yKJvGajH0{C{--HBC(0zUGmo_tO3le3D?+4EkV&bG0b1BC})z$J#2{0QI(
zH{Ty{EaU}VgO6`#!@OtJz6NkG#-Q5@9(q<aCIP_9;dc!FlgT{b3Oj;r1jW;Z)qO0n
z84W{G$?%NX_BF;B0Q0#362g9r1xA29zOzCg;E<1mYAr(n?a*&W^KYqbI5^WEeM?N5
zyL4T6Y*&uN9Boc-5j^V=!z9%OyKNT%;yIu{hSRGXi&qMp!_L%inF^<Oe&nd0q@YJk
z0>iW5^*|+nyaE^E5-7raR_8`Emt;du>qV;A7jXLG3802HgU$dB8Ae-k@5RHB6`l9u
z>g>G~fj*au<0;43$=T`Viy~5*+yw8{16qqCrc<Q7Pl)tkVH?udcA_*&n48`y*m+FH
zEI*#(>^Zu?x?V7J*#eZ0n9eV6ngUf@HeX_`87Y%I(drEkf_K9hmo^x=z-=nQ1lxK(
zuepMLrZ==44|I0QfdAsaA>a{~=lLHTQUbqq#!JDEBb*%7)pah*1^&tfGW6qT9f6hN
zeGXc&(Lhq8wd60>2q;d~NONRC%`2CVJ+h!Q4WPll2EWy6UL*57=WBG%O&4+A`K`B|
zX@c)EUwZiZN{2c)FiYhO?7GDWo0S|#Kyhk8x=lc5SVYLFqCwb3>n7^mKxY`U`b!^J
zjtbecF2tBsf=Q2PXqL+12PaHdC>IC3f(I_xZbX!ei1EGxmmdB2{r&A++=_QRuq5>I
z3@6-s@KQA2|90+A=Vqgts=K8%T}b`4GzbaowtHzLmYYhs*!qaYM~zv329oEhd~V<?
zV*FX>_qrY&qJ&!?S{@N_OZ*6?;npcnFqBc=tMaI4v%fHo6F?50C54^DM)Ue;uw&zd
z({yE+_vHy7-My?~ccF(8DjJK84qi_!^de5x@ty3zIYWXFk&4dkwN2f&ON)Utg#NN#
z0*Z6`wL1nB2Rz$xrLQQ`N3)yyV1_5m&%s2_u*=qscg_>0uzXPGSwu<;ivCg{PQPGz
z-3+3R&^yg=`_U@>tBCWb`p4J#^E`ZYpFcl8;PdC_`Tg^A5|56%3G4acSwlW3vK|;7
z+y&nG=53V=JOzwT;t&}IXz+LHE7auJ`71R>?QScY;TRplB@PrBN2#TB3sUNRIH&i%
zu9HfZNp7o|2_g{@9{CT36@@32ThNfF&`T(L3+gk`0r;N*mhysLs7<xA$(i$Mf60sj
zWn{L3Up40Y!xB>K$idr=v{}gZ5MReZ%E>V|X21E!hskce%5x57q!59|xHBtlw)G!i
z8M1DvgNETN#mT?Pxvw!PU4pq>hD=>Laz8H%_wWRcc?%8SB%2{nwdT_!iu%pUV2cEm
zp>c;sTpR^VvPP~zeoRL$Nk?;30@+ujMv3S=-~cX<YrLP51eWur-4lwW+fw75(rh(H
zIP*l?Hk|7Wu6wW3EDiE-m-t93QsrK9Eouefy`|psG{%-zyu5?g-sI;K$1@m{zT%AU
z*pos{nnj!o3z`tTi`pf?l@56JV}fC}p1i!H#88M@!-!GR`$yom$uxW*wMlNVywg7P
zZvM`TOyeTzno2yDt>@@~D)N~h8-UJ*NUhC>NXdpHBN?ycrJakX_=F(Rb6wDgmSH00
z;@(l5BjlxQ=RF6C#i!a<MVRE0;H^G)y~8svR61598`FXgNF~4X5G`HuVB0#(-0vLw
z$t$<%&g&!ekPIn*>A|0F6Qd=d=#Wxx*+3Na8s{CE3&y0lw1LuNPO!_w=h`UgfDO7U
zFgXE1HwlV~B3RdN`5lR6{IgN-|D579est@@d};*AN4&?fuG8Ebm8SB2LL6H<12#!4
zg2bOPIgb0pKr>!{4Poc6#;RZT{0qFC&$qYp{Aj;vvyEydA^yr=$~K-~CaZDd@z$?w
zInmkzQW3wKPn+rMy6+UuH(qA>gY^V%J<AAdX}%u9WbogkQrsaKO<&O{MhWTtF$$H3
z269#mfxneMGSZ02aCn@IpTgu7XX58~)eFkU6eYDpx)IaR+ZU|r3Bvi_(SLXAODg6$
zwUNaIA0}{|`E)QLt-A%4nuc=zRb0<|^^fb?*ZKNBWz{Z($~}=KE+zIwMeUe#&qsv}
z_H9xbtb}s7@-DZSS=pw>h57sr;S>Ig#onQ&Iil6!F7fFWk>~DoRm4dTQ%rtO7EhZa
zq>C>|Z6?2yNIhb=AkDSKXd8a#e>*9jUwgRnwT#H$<!iS2JXV5`o^s(unDgXvm?0Cs
z@jAk_I`5xo>)Ybs{ip*{<S-`7F;X&5>KIZ!H0Dv8C>|m4c_9Js9X<3-dl??n_8Jy(
z!W(Mc)#9LBq)|IKTPYgM@=#`!9D!z@1bls%Sy0D@!3fV9ww{g;UFQb)GRPMuh{3l{
zNyAS8K26#5wb?etig^;aQ{b0aD_@Z6d`2aWGH+PU#g0wt-@YE0C7ENa9=6?a>r`iF
zbi;yHKA_4{jzD!!PM`d;by5e2R+(|)K*>0T+y`9(8r~ZDA+Iv#QoWq90rR5|3$*{U
zJkb4m@|hQ;b0hMC;^{;m4??%#(M7VY!_<81mUdxwJ<B}MbDZk)K&e>|yP!7!#n?T?
zHuIpGxg5l?cu7-NnZl`H#YulgP!0BLOTBGLUwIGzM3rHxPB5HN%_BRSXheO2BG5+_
z5>a!~U`Aff$U^&>2FML`!0oMfHW$K-=PN44uM*b2x8Q7(cNi^s-3H7aSp`3|4EV=>
zH?>@sz6bLNw;r3F>=IDaB%w#C1a!`dCYBn9Q|nLjhjf{@I9Ei#zsCarFTU2yAsOLB
zry2ftw=L+xBj0g?oXzedIzy^H+<M+o?z?`x63lKQ9B+|NU5#m4x;vtoGZA^2$7yKT
z+HAs!T|%i(+09Ha?{CxY(oWb*j+I(5vP~oZ&e5n-8Rl%(o{pxZi|<V14%8TO8ZXqi
zmVh^~^T}bV-0*M^CR<BfVmHrnyzJb)a9a<X&Y$>zk&;LHhjoZlu<0mj{#-IV4~+VV
zJ>)gG_mbyvI8bsz>y0?M@982j(_ApIgI|r)?uDVmZcB@y`w8m_Z<6Jk?k6GgkQG}&
z+42B%!A9!nKbqW_FX4x@Te7laww4OE9-=v8SaX{)qX*vQf#wp--pao=U}EdFw&<vc
zU8i#xbHT|5@a_UmjD|7NepDD-s?1NcjP%hV<*YPk*M5jN=8}G4U#m;Ed2hLWkL*%%
zWUTVx=T;OIKPYX#c}Zg@bG+p$Z6(Yzm>4l|@mZCQOL<|8gsG7d#Br4_jffv&^uNsc
zoX$e*I-zayj)v`FHSxaJFDUJR(riJwVm(dPDm4%yAERP+#6u-8LRO>m$rlu7$xx4C
z<ASmWJ@H9~m|SY=bgUC=Oyue3a3W*P1v6utfClvCzo)F2{x_nJcBsW-HzT&(`|*nE
zVG7G2wIBRvY}k7Wp{wPhe4ZW-I%q9y-1~;UDjEzyNINr{vv|Q|7jFyRD$!2wsoCi*
zsw6MkPQxkS#YKmeFtG`0)v%~|o0Ss#)dB{XEa!ybd~S2wZ~&@H(5$18sRcM{D_Uv3
z-KRw~g{pf0KC3$wpu#yBb5f;grtDZ#77mJnk7sNg8GY_Op){43TUK+v(+QL04vmxp
zhf3|O*q|?pMNcAbc1#QtU;P23cEky?^~SH=p|1mp@EKD3e#0$$=y9(pXJ5v7uOgeT
zG*kWbqs>^97Zbq(&MfoZucsXa$d>L=8D%>QBs$m0XRqnjdQicMo4V~bNi?{Ee4YZU
z3g9=he7%)?-(^qSybg2O(vy$xRTB>^XRa)O-PFk1jHxkpi}=JfH4js1zJzlM6kC^F
z9{)muH{|h&aqog+`Yz`gk$_Z0<9M(%0*b?mBo43_=-AQE-NT2v9TainY9b_Ka|_5{
zV*9eE*0k{@l#k9TC6-;fl61w{oTGS8BK@;@pSYsq@d~xrh$U&R${9GJfhX^*L9&uA
zmiI(ou>usSik_q~`7yNI>NY>(d~JPve{0mIx8iG+cT%W%wbrTB%3zzgigax|Ph@S^
zi9EAv+v;bj5`Uw)?L`=G7HNJqrQC$nx!@pb4><qXmb+p%PnkuVcS|(SX{`+2oqMtO
zt=h^ePB^DGu=3{5QbNq=fAuTs#3Yx4sqivPxKH0Mea9_h>zh5Mu8lz3s6D*(o>l1(
zr|vIPopn&hFz>T4msK0d==;#@m;a(h7d)ULY8<Tdyf4G?m&r;RRV=S(Z%19{go3MQ
zFn8}7Lw?i6q$9DiA{`hkde$S93Yj0;SlJq4E8v-s4Dx3JG7o_1_t9Fu3s}wzI?Rq1
z_Jy;i#i`qreOQ?Kl^EKAmxnA>L65QeZYadyOr@|@c|nKa0L(RKH(uy3)NQ@gjXXCW
z-(K!jTFJRvIGecp9v>*?lW+xGqkY%MUVypW%iKoKqM1HTn*^4o3U$mjxj~|iB2NtG
zGX28@<zqleyaaEbrlyG%-0LrVL;d#`3L$)VIsEmP$;^!X=7Dc;^pH{usir+IEIA&8
z;3T!6N$S-Fef%uS6Sj9B3vRnf-}-r4FR6tHJh{;+^<$3|Tk*<3mqogB{p%MjwBKEz
ze-XN2n;&>aNB4#AXa8#!=Cf+Kk7vwiA(mg27@bO<ECD62#8>6@Bnzd&t`}4v5rFHT
zP+w2nC%pG%oO$Lu{aK({odxVY_BREh{Tu8P2a1XPuJgKtlTXz|Y9+{yw9gVfNj2B~
zQlF?Q5g@9^mK?<^b)LzI7?SFxQgQ+0cr@9QTve|~Dk+I)U*>@}lo{IA3_PX~Dyz{;
z&sCZY4y`G*g7ncLowZGgF4IR4xVF<-|4EU-&sfnH(Wv2g%1H`0Yu#^H&-XfKK;2YO
z!mWFv%cB-KCx*3oY{nwFo8fdy)`#V?M^fJQC9>Q^8Sy5{&;6Z(uhMjbRq~pzrfA8q
zOKbUfz=5_mn}+mA<``d-V%jPqu8`mX-o;45*2C$v+z<8iau#7emdUW&&))fc6tuJI
zLBr|fIa?{(A&Vl)Z7hGyZWhv4Ii@Mnyz{c&jI<(#{(;hM&Mnz3b>1=3dGJB4v51!4
zB*#i2$o?3_2<XqklK!-gQ()DP0WNW-xm>*qai?hVtR^kHRs5~5O&$j;pMiFwI;|#i
zkF;mJ2%IfIh5yxderW^LKYv_vKc2^^2>{|Z9*%INNcQzRK+~xE<xnvcHTZn1!N*xC
zR2V&@9nI$y<<In}&fq+?0zEVUjw1uBiVm_2NK;PU|iH(rLSw3v?><>a(Tk+v^`
zk5LHnd8=_Z3CC$%*0$UNb4e*MF3DePW1aW-+zT56DvEI0`7uzGd6jPadJeop@FX?)
zi2`3TdHT4&<-X%0g$r*aM9zWty@Y9M986Rh=wpSCq;pbsXB~4<Bqw#8hG*6D=_SsW
zL%Am^f|qcv@a2f5BN~qA-LT8V-6$nka92Qzk~W{jL3&Op%H&cM@V-Xub;g`-oW4vZ
z9MU@SY8GjoN9?vC#jl8&2B#4j3R}gLGtWNFY`)yB7&xG`+&O7AT8bF&_bRlkGmA;w
z=1&N&&f~<O9*@NqoY@jJx0SEmkJ607-2jvc+@WD=>D*s%{)}<nDG^<Lq<Ges6R~zi
z`1V0TlgEJ|nhNze<<Q7xRpD+f_1tF5unRW^>!R`esE+D5=%GSDT3lL8J;$Zj{`gcF
zH5Q=iQ4NcWy#A8gsW6F6YF(9a-q*5v^sWYzj$}$F5_jE#;=o8BlHzMm!UG#%2Gixq
ztm?94KG#E!Qc&ZAPnCwK1U~#M($k!h2CY6Z<ds1FJXh3#Z^;{zUhkASqQ=%+ea#tZ
z$m)0U!0SNol}iZc>*fQYQ`l3`A`SDmzuv}3GgcU4NO|8Ii3Y+V_dxGUK<^cjHzuqx
zynpn~L}{)6Ue6{?rgwg-c{Lo-5m4J^cZw?tOB%2y+Ka`(L#mo2;cuI|X3ZmEK6bll
zWXD5ow%#e#HP>K8RuW>HEP=eQGWn%WL~g&BF|kd8AEgz%`o=Mwc3f@{9+OZdl#6X8
z_Js92fb=!>NzD<-reW8)rPq{;Kucdhi8wWLLT%3E`QLo*JLx##EV-qIy`w02#~%DT
zbIliV;`mxkW2$Wa_xD-{7i3X}E6*u@P$Y#&G|E8n%G)N|Z@8fNH%<vO(>|@;RP$|M
zk2GmDiKAevPA49RPzP#h_@Ul}lZfxV(>O<q=Rm*Yb@xif&BOEzW>$>LpOA0xiC?HI
zhAlfpL<fKL$nvWXQWz)<0`_Z}b>h0VRh;t^rjDA;AZt#AWWYJX{9<{XR&5S$t$O8_
z(i;Oz%qHPLH#qOi9U23*IbnAQC_0yIiVp40Wja@}QI%sV7{x9Le{M{j<F+!=P74wd
z+NGg|-SY}lrP9IZ82%^{2QMb+3mBhSeyzR0OVmBP2b5a>5?ox;VAHH3M?h^?^9YJ)
z56d{K$unp|)6ya#<1u)#&Bvs#!EGd*7>ln>*E1*SGiQ_DvYtpd5fo;>RhrH$9A_As
z(Og{Ed7s%4w(NZ8q&ZYJdgZr1ti(goCfNOLbhKQ`K#Ni30}m;9#N{P@AZ1sc^6Z8@
z|I{(X20?|B$O52pJIXs7yrAso7M#r@(}+i>L&IN36vL?Gr8VYhu=>H8ZO2H7tgYUJ
zwBr(LT4@{Xv26l23P$+o;Drynlxo{lGD$-s^R_kY(#v}CEa9auK`a%#vZ(3qI?}5)
z<ZwVa&i2-Oaxx$Hb)cv8e);~mh!bhVNR!7YMDvSSC!+V$>GMt<a?-#$JA_<g-?EIF
z4FPC55>l@7wBIU59I1U&&3b}X99hxsfJ%CrP%VhnI<(}<OSPkdd=ck;stYRmpe-l7
zu_*h!J6g+hml7jNQiWhBxqu~+xA>|I@@HNmF!duK{$(K5?V#-$)a*RN*$bCQ;=;i^
z(ng>X+iWk>v7f&2YQKj!r)9`)6R#T@+uo)kR6L~dtqC8jF)aM9pjmEUwGrDxaW2g>
zV?W~S!n2FT<08s@mjZt`u@KjCWu*N3ZspoJjop6p6_3|+N6A_Q3UScI`;ei<w*Nbe
zY&6}6v=?dDb|p|{1KS)tZ(tokwK7YCy&`?SAYJC1=q}ZHo^_(jAgg(q6|8hAgF^r~
z@mUGc9a5#bh>boyNdua~Hj=D*5@Fvcf~&4hQFQK@yL_2-oLXh?nSVc=zy9B^OfQ>^
z5GYABx1INPk-&G;U%ymi(}(;3N3*$D5=lC{#+)Z}ag)HdeQu;!nwJ>sP!D%m>0!FO
zJ$kC@ck`Zs-Y2_>e#YgJYHL#>Ham$lX6HtxJZ#u{pYux8a<(L}to1hB5?D^%)iIpR
znIar7rzOqM__@on?Nrrx{#5_?NBTrE4XUhvcv5{M=Pkxh<gNGZk(}3hvn?@`oL5hx
zp<|8JYglP4V-CmA_Q#N<=5ud<e*|Ze2;jk;F59{*+Qh*efZ-h4%GLcjv{lv}UA8H-
zVpWVPv+KoC`1oPFLTcKZV8I4iq)--^|E(Ps(z#jTz0twunTR6H!R_&5ArFVRssAv9
z-RwXF)Z#0R8e<NbGy8NE;B>NIO-HL)MwwF6vMPEC7qS}mD6e*CHOC-51KFK9{4}w8
zb9pPc9WAj|;x#D8S?k7Uj)>5)^AY7a!0|SYANpeeNW?tO4~8~d01Suz;5LE#*~OuF
z(&a*czj>QFtpKFO0&|8eF6_+n${~Crj)xM+pJ%!ID(!A+niJLQ?XIqwYwAr)gIC1)
zQw5aGOSj~5XK4CUg>D)}$5bt{bdr!>3shFl5ztTAallwY_$ZO@2XF{l0|G0ur}Gj!
z*uc(-X!&GC+tk*%Pm=m>3lQ(w&WSk<a7^#P4HaKad|%Z87k(waLq06HLc&@W+K-Xi
z9Nw)X%_^GN^^D|^W`~)k6mirSd=F=x-7RA>r>*VhmK1fi$$8#}bQqWZL)zLsQZK>R
zLJyiXvrd*&P;9>g=%TvrOZ&}+GdiaOJz3J$)3SE+l4iaXdkz*nx8?*doeOcwl+`~m
zM+hD!>W_mT%lWZTVv%&qHz2M~K{xv3uc!nJ$F`0T+D9UCYCh+y+{c8V8C1s0jk3@e
zbcMDoZI!-LmL_~}-Y40}bhsU(n^M++^nXLm?G-0=vci%?&?cm-U(N^Pk4%7$HI@7G
zYDl!YT~T5{6)3$@JgVeB`X=XZ7yHJJ%zcBU(oy0`^;C%9OFD|6UHb%-uT`O?6&~4i
zOkodVe3S@6cZ%76!wPv;w^Oy-R<t{hKHDbWa-Tcc_|VkhygSFEGhZjNkzhN9_uP#d
z0W~#^E^Pt&*XS!X^jp)=XNb1HQxdx?53fpq!x`n}#$a<zz6>*arAbhS2sSvjBn6}K
zi{0G=c}5uw6VBB+;=E=-4nQ4h@<vhETE<C2Cb1p)Pb<&qcJFx>re`qQD1$z|=67BK
zjViXqrUizF*9_0A1m&=y`ej2Y9Z^`PpDKMI3s=8H>6NGL$kRC0@tw!%eeVC-f)giw
zLJDcS>g*=QD{zT%W(I-`t}?G7ezT&r;fNny4ubexhWO}5IyLpDezqYte*OAv^acJ*
zpBe~YRx?q0u~7}|4d(WUAED-8c~!wk>ZmL%Cmz8{4;7Cqa@W5nc+$U<qNcTXb*k-z
zPs(hvIaBfO!Ph`eDbL*^l=UnF{i&QyZwX@zI0dBe<kIvY%&Se8UmFR#4~HPIo;pr}
zWR=e1qXmd}R`p6Fp1VlXUN|=Eg59mG(R!Ka3wcbhNakq3_6(u0^Q~iC#QB5GKqep0
z`cKYBgYzD>kMMHW+`|M5>r4-mLL#hsl~Cr?!ub!%CHCXd*h%TY%kYZTIxXwPuqHDE
zmqC^l^WYF@f2im+)yf<;o`n~vn@L8rjFhClhOvY{#}ho&@g0Er$*a^}r2<X>8j@R$
zGIz3V&uKdLC^@B&s$0o|BF5Z%6}$ec0iye@3Sz7Bg*=?PMmBgk0@}Mml>5pTegjZ&
zcV(t8?nK3Zx94+4wtKkpocu^;A{5Fj6>vT)p7TwAlE@mnS`=n{FlJ+KryB(pH4S!^
zaXzaqWA2)+XDEli-l7Bu_{cULahs-co@`0w%ORE9koG6k&(8Cv{YyG4?8u<9;6f3%
zE%>5kq_F$nZFF`X$Rar*iZ~WrP(r~V`H_t+uVo9AIio20h>c=pm}v1sTY66|Z(vJx
zM&B{3xSJompGCldIcKqlT8+*r&(<lEOHw7E_xmXI(at1@(0vw&)a~vbn2{W2n?HJ+
zRI8U+1o~6i+J=NxGQIU}k{eTBB9qu7{~1duIMya;Q6M*W36^lKw~M8=XtX=`L0wpe
zo+nIyNLFcf(_|tNpKV!D?6*s?S$psIdd)Lh;{(nx^-8kVcws#sNPu+3d(#1RMWs}+
ztw}Jq&!ajOyGfj^$AYP%o{=I*+V;7piRubklRY>#B8OhRr#@C$v-9cMjGRUB`LG3N
z)wd2z6AO5)^mq79N2Ck7B9{RR%0zrSkSaUPmePm2ZcwZE6D>6knTx4wOr{>wptDHW
z5|B8sR%|GGwB<SJAl=42|Ip3%0&S$Vhq7H*k3$9&uu57-GxD4K$Zyh_hO>;5`fOhq
z5K{VWWqFU4LkR&LN_t8*&c%UJ%AvNKaLy76<++8>b|D<`?`ZhQ;>@ALGAF<7a`JBa
z%qp~w^U={*BUfbGcYx@eyQE|kp>_un#}i@P-gIK8eHY22NNy*QxegLcynZ{My-ycf
zyyon)LGp3g#EX))8=6ieUv;F9X;{926CcSC^67lE!*Poksa`ys#dF@}I<3wsj46Vp
zT~T>L3Jl_0=4Q#msqA*$+IIY-0CSi&Ov7YcY85iWS%HnWZ{PE#X<<407ZZ^RluE2w
zo{96wST8d%R{`YDD=~EX9`yucX<i?pjT<ZX_PZYH!B0YM+)QFzMA;QzDCiKG;3zSk
zv#d_#_Et0JRIms88F(b0*&5^)tQD46lZ_M`Tb;)IadT8s#&6ZebP@*@^yoq2I3<w-
zt8F-24aX*>KSs-xo41G@IAFp-48kv(GYDC)JL<S?JHHsl(;W9PH5H8=zyKA_DWka4
zoJR)dd5IdhgmOa9GEL(&@%-Anw5@_obAA#$Pg0o;OE?8FitNSN4DyKh_c@&#rGwtx
z7SNK3NaT^g79q_n6X#~J#Bc>}T@mJ(>ga`1Bv65Px7r6?c!k{h3LC&J)212Yg#Tt#
z(%6D?B%LFv%MQ&vDLST8knq=mOueBg1*AQY4&EfzQja+M#9eUtzHb6*=qwuF#YX<+
zUd?NYZ)pDaj?`X!Fbf+`99P?Lwi8|y0GmL05L`+OK$BL^Iq#2TM6gLbuO{a0Ywjus
z!;?(iC~6Vuv#OEHXjfH1Zs9$^+>Q5e@jY*^;Iur}TQW>#jFXze2rZ7n1O6|0BomgU
z&ml;1H3UkY-*{X3J{v+ezTN_~*Lz7*3v~vsNd+)Yy54<6JAo3|{(u4yEGiCe61@vi
zZH2ksw{~bE4OMf%gi;cWTza<k+`B^(TO?#E;k<WRjx(91b05eb;5M5|;dZ9UaOxOT
zam{)P1&~ytfh+E^9PThKYa1?reMAbE__aOquBwqAItD6ztL(G|sGg*5+Lu23refNQ
z=6!FR##oi^hC|9&wbsfjk0T(BcUbZM_UoxIzfDUXxs6bsB=-umC>2GWZ3fhkj!GFL
z)sNjYO=lYPnv;RWJxd7kCVrDJTScBQ1~H+|^axfBP{)oFrWVUkt2tG#l*?erdE-;^
zd+jI|KmP5ZYHCwMx-g;Po8%+tBe+<`f%jf-#2a62)7bv+ZLXbBB4a?=dyl$g7t$iz
za0I-iafFyNPVJ2v2PFpr5#q2$Cfk6LnT&kX)P`k4jiKp^Ehly6%cWsK|9LPD8Hphe
zCW_ZSwC+ee)-gV@r`pZEqQ6ZRIF-p9fO@{|Di2YVabnpsjR%I(h|4&<QrOCE(;nGz
zHYh7DEx-8u6}5RRrHpefVj5i}90%Cl;zBB47W@7s4x!h~FEN~s2`3jY&sc`}`2Rs(
z+_5nU%se2aW$hTwQABTA<>nI1C7k`bnz*xNXjT??uuGh{r#+y$D#N^2K?m<c&{w2~
z)Ux&TajHURz@qtmVcZv-mf<v8a6VhF<O64VLnkHv+iDtZ%1NQ0kIF<=F0H9*nKv2M
zpQSa`t>oOb@&3vRv^sl#u0*{zH!YpbD7bkwj!mHqAxEmF?vudhv%IL^L4GH2KS*QL
zz7&#Qky>*=Uz`EuL8$g^zOe+8m}ZbqC}95(s%U(MCeKnd0;R^)`K6e+sZ2*Bt>8Xk
z&zR=MiPLxxF^b=Wawb}8OJSxhH>AA^I-L}m0!xZ)BTg+XCsG1a+wBIRrg1fZ0w=VQ
z`f8>!I`Y82&xs=U@26lw04R{0X##&9&lQ-&?LtmI{O4aOe`jWy7YkCuZJ)5N=k?Rj
zDV3<|ik;U?;?Hmj9}4t{!Ktvac>S+U6PrJnFzF4ILnq4FI=1=nj8pQux6ea9MUXte
z_qorlpG%PROii^NHwSrR`O)F9auy%d_-scgiU%OGH@O2M)2kW~)XQAcR06E2;Xs-0
zPqzDbtF|0e<ipZPF_!vi0Ce$#PicXHamth7mKY#>?#aRTbf|-NP}?)=+8DpI_xGRb
znyy!s{MAz`B4yGhH>x-7wx?~u0i`Mwc4Zmvmr7n&4moz-vZOCDooOPP0Tq8WPX7A%
zOktI~+Rp7gajcV@4{<Y-Rxz-)q3&}gSg{4GT3Srv>w~S|)q1@l@0<;)CLSLFPpMUc
zFDR3lx7ZZn^%kT!Uh}M`bil1Z6){(X7x31QGTpS*Z9~p9$iEfID!FJeHqqQok^04=
z`l@&YV0FPF;R2`4U9KChRhx~p)URQKrcTG<AeezzMIpT;P5Ty+{t($dj>{$I&N$<p
znio}=b>D9eYS*nJM4m`AoORobNXJkYESODDcZX6N6iH_kz1jX-{sM@hZ+smnENW|$
z!k<K%7ZtTWB@IxT(V8--4vE~TDYJOD4uq$Ea~NIIG_p7R?YAfn&{<VY-PoI8^5^u5
z@~*@+3FlUm^Cy>>bjm`XQh^JAI+{kX>%<d1`8v_tU)cC(HA(V}{k_=-b>q&XVYMFn
zM`d&@Z<ZP7TC<9issSI=vPpfuWY5xsV-)kfI)(5e46-egB60;1oGP5hsaY0{6S^tl
z;08lmTv`&+OwD_<s<O1(tj<U$4+a1!-TJmICvuje477c(hAO0;(f!<g4Dtk)L+wZ5
z9oBU@QZdO)V(V5JXg&f-hd*mn^HYU)3tTG^gJnet#U52B9L1{Tqx!4@Qvm#=k6$;@
zw<5S0<JXZ^4(!ZD888PcignS!q$<X|k4k2(zIO@eeP&EB(=c^ChH$Oou1?}P+0Xtq
zDc0tW!kS%}vb^UAvW=+HzG<<Id#`HjJ8@Xqg%~xkVkB}<_Wr6hod{$ZYfU3Jx}9yh
zHfOM`I_YIMMQygLV=X{-rI=+^nd3uj!TKaxWc%%7{0!fp>hl*43CHl+a};cT4Z;#T
z0eXRO!B9+_^CcRvronZW3#=ElBa{J8h|R{`<y!i4<LliS!1i`QY#j{=a7=LacZ-OL
ze6@b7^1@bs{dN26uk#JQQ3Fx~_TVohNd1Ou^=%;5yvW`C@2w`BL<x0>1w9)Th81Sk
zO4h9=Tp8z*5Zv(PkByLEHv*hktn~9z?+B-8`?bplbyXz<DcSr16VP>c2q`e{8!cA@
zG@56rt!#y?iYp4Qf9l&`|I}>jtT1_RtVT$2{F;QxGt&DSQrnYx_C07#ZQE!-lw&F(
z7wxL0NpVy%@{GsXXvN+CkaKFqC1q8gmnb#1Y+Ci+1aMCOU_*kYqsseY4R|JYu>ZQ-
zeCjyuaX!a5akZ<{85_$XWJu>z2)Lq2OVvo5SNRJ<*A^%yK<M62xJvzmOS9*PAZ1;P
zWi-PU)Nlip`{as7e?bCcM7;q{q1~PR!<Kz8PKH$JUEndQ9z`l4GpauSRl<o3rI`eA
zNK>T3M2Vd>`L84a08@}2q#(J{0V+>WJKatW7LmU6cAd*NUu{nOfIp34py0NYm!xsA
zT}uzKit=Z!B;ALt-Y4YxI;PlwkEliY6V|#tLWxJ0yp!E1$Cp16sgB!EhK~cIf~Iq%
zR}uLThPR%RdO2dCl2sY!b43F*q>r;mB3$<NpLJ~0SVVDwq*~bd<WyN$uOyK*0(xHq
ziVlCBC3G7KnNbgH>Q%3fznG=R9+N00Vz7rK8~xUE4l1Xy8{SsM_2eZKH(AiHmX$D0
z;u5R2O^RbDA1o&3C!CcFK&8*qgL{DTF^%DrCK=-qO%a93GxYLMohPuD-oR(kTPBq`
z@;aE$p9PqY2a?pe)A@AzK7D^XrN5EX794#%0*YE^)WUdHN0b?6XsM~O2mDcD29f0F
zvD?HGeaK>>u-3D*oIivUo`Y%_g80H7ZkbST@8d(u<|I$=-vVBPNdTorNW-Fok2QV^
z_6c6mz=1j*J_j0aUElTx7J}~;vq`>to>Y#Zl{tSIlfKk`fxW449lQ3-kfMy;g+>TF
zw)RDwkLuSi_v<_T5^uF8MX*oG?tEtA9XmHuq~=OJGg=7XJ=gFqp|+-H6<&^1Ico55
zRe41pNJQq~uT*cADOKBAxY6Fj2{1BOPWsIU=gaqTD6BMQqgHW}=tw@LPUQB}NV?>j
z^bmxF7xbj;`R#3TpW4SsFAoyVYP}EJZ+V&5hfZ3vw5D)Q{g>aDI@jA4)svXAH#k7d
z9(*e%Rv$LM4D-J7j_U3qnB07EKvcCqq?}v}1y*yvc@ZY=y^k~FFTY6*V<{HF^tmR$
z^FS35{NqsB2B2qGJ5?um^pRweYwm@G9_~Zp<nus7*KEFNMNO%-W3M=QcbTkfJ*dDa
zwnj)Dl4HuPKvxJijx|Z<kqAN53j)V>1Z$GCJY1GJTwaBVu{64*y=X3d8T`BXGNHMN
zTXRaM7nvSA(Pvr~ICK;-840mSV1+1-RM8+Q3Mm&?^jYb6xtwa)C%RVb+Uw@%A<cz7
zjgxw!ripC50Mx21SGol#jzO4wb*%jZX0#u6#=C?)k~P2crUF~lCd<ju%RI9!#^*dn
zjO%|jzZ!Ts;L@L(B^HzUr)liuk84kPK|>9e-jUMxa70;L@4y!$8qM{kB+k*oiv;iJ
zp{IVTNItA659&j!Bvr<V<fuN8U}T&ROAhG<zG{We0!7_WO6L<$<ZN95dQ$GHPD%ax
z%szeKeSKl>{hW~Di$%gug?uLPjz?_UnqN|HvzxbNNi_*myF8t#>1hJr3E4h#{Ipk@
znv<)cJh8;4E*^|<Vm(LXYE*7mhq-b{;}GucABUhMzOX39rzUN@oUTp<nfr~)IG<ID
zNu-;<rFhQj?kA$YUeT0L+i8YTNepei<DN5zO7WdAn~*khJj3Va3R{1@Ht;k}lTTV~
zk%^VL*+fEc=XiAIrZ2rvRT<?dvTr%2W5VP^Ld`!^tv<i+ZseU+wq%>H_{H~~phyYo
z9d#&_KrREM#7-JT`0zvfMW1$QTQd4wiDQsD>YDfPaJD*UbgzN24)b1h9neiLWvlzk
z1m-G)t2>}dM59{L;IIM4BYZ)57q^$~BU;4ytlsUpNoR|Mx!~_K!4!GG%vP2IkLq1S
zt2o$EPN}CtE7fr?!n{{+=bN<x@A@smH_@1U)y2}SpVV*k^5h1lAQUDe4Bz-sPABBZ
zZUfH##ry~kUUt|oTSs+P?xNhe!JOQb-+7bxDr$RSLxdemaKhE%FiqwTD1p4+4=4z2
zbVZ5u5w5&NPO!ms8sW4ecbII!IiADpfAp9b6^8=)oXgXgaMrjTJpLTf>0#prN(q!<
z0?yAs`+g!ywbNY0yt`=kDq=j`cg_XY8c<&qVS=%TlNg)Yd#u<b#$c3wOMuhl**8zZ
z95{tPoKQ{%=4{rjFXJ4Nqg`<sF+wb&EJ_Z-*SG!NOKXw~OgQ=mhm$BjhXXo-sZCCe
zZihJ&Ad|=EQloamB>SmQA1?+Jg!inq$m^-wM6`Mc^S%u8@l(CuzQy@=inmiw71OLr
zadj_}92dZ6Qlm8K?ma}IjCW^*)FDk_jUIDhJs<1EFlye9&Hhd;Y<|T{gq>QRx4^gj
zYB?p2NyuQUBE|)|<utg=*&o?!LT99G*_CH65(?Y)!BPQgWlkrYb!gFjWSPHFm3biq
zSt*6}O&VAMMQ_wQR;~3sP&O{dAY0ZnB~A;aQ7{bUW2o<eGLd?(73~E@Ay5`3kVdAO
zBFznD`H+S!Na=I6njaHw^)|iK#p<mEJakR4V_R^*eqv71AuZ#rno+x}C6u-a7lF?e
zC3)nL+!N&;4Vr!GgEk95k;V+Eem4P49a&vJQq~gJHP=gO&Qn$+Ew)`NY<>d6ACL)>
z2h5xi5FD(mK~O#bs|ixdocZQRM`>mUvo$so^B|N{0%BY*ysfqz(>eOjCNeoT@4=-%
zF!xHa2+k5paJu+{asoS?|G^i33(#Bn>j=*t(ryv8yHK$u2x@W4sg=s1$gXRjrUb<)
z9SoV!@Vn@dFh;SwIH#|3M80IIGRWtWW1<G}NOfd2Dd~unP;i~S7yw5=xW64xH4a6P
zvT_U1{oB5gJsCwOkb*^%1sxtz1ZsQV*)Y!R4(Yu^`ZGcbvF5wJfl2>%5rfdIXZlUQ
zr8I+7x^ibhH}Hz>na1MUnx#bz$d)gU@BT<GusJM`g4q(Ji!uBzjhsDMq541>CKVX_
zt8;-Lz+e6*+O1@e(5O*qq7`S{HY191GpH0N4Idr}tc6e$M-Y0?MVJ`Zry0et^@bw5
zC7CA>A)E|YPWE^rGo0=(ebx!>l#*9bj+n<W?Q{+wn0$)ij2%&-4)c%aTI=wPY<C}y
zgppI~TFwwcx4xd=iQ2QqE1JBMCDN^Br!*UTPc@Ms^;liZa1uLbvrk&y_nB~Hyy+(G
z%!~I#`2rqK^!CljHt~vxtg~$@qkL8+Dx(8jdmTv-T2Y79n^EAzkvgK<vm1f7Q|+-!
z<a{yJCV_t+$c%bqHc-;g+oVUM_+eUPLVsi%(m3*%nxcS@KA;9ZTqWM~q(Y{(DZ@N6
zFRHf@t71wAMW!qf)v~P*6W6}W;TC|>3;Dt4EwXN@_tG1qjQM2_5|$!GG58|}OCxFt
zDLPMJ|9*Q(X^Pw*VXmYhEQLHFU{JXvtgU&~4k8j9zbxbA@BHhuz&p_~xcTwR(oBmY
z6%T`SF}$~Z1XN4Y4z+JU>PzP7!29X(j5>vbOi}y6X||kV&$*UMc^x21hR*GK7_9zG
zwRS7*iX~<+kFwb61*XosZmGynhB@;jrF0(n;9e6?^>)23O#z+dlVRi443b)i4z6aQ
z$griPj09%z&7reAhRI7d$hagi;aC9q=yg1nUFNjk&NH?J%;X8<`03<8efI9Q0DZxx
zDca1`fZ3HgqEt<VS|r!=UQl_-_bcl7Wp{Zrs0{Q0|6kPe5F#GUR<o;RtL`RR_I5pV
z`ezW%qu|Ksw!4Y(6mApB{ST+vl4(X)ne+#Oa{PEQ(RWIVP*Y9;<9!Zh<VK%TIdK;u
zM9*EM@_@Zn>~(Lo2wI%@x(lMIEWun&h-E=&;g8&R)JXuKL8AV^kYu);{V`v6RUKK*
z#7m1FwZwMQ(+-tnWKhM*ci;;;%7@98PV>NeG8IWuz;b@?a&jJBLlPMhQESEF>dCr_
zP?fM|mGT!y#H7H9H;$zoB<AmGnqO0V`wd@JUA5{x1(eH=l52s#|AyY74%9&u>Y)kK
z3LYU7Z3&5QmvBB{?IEn{${|^V8!&!`Rsgd(KvzhwCtQ4GqS4rpRkQURIrT1!auzV&
zfAT3!Ll!hlZ<d)FC%c)7UMPs_z?5Q}{ct{IJxPhx$o^Tv`S|0sI;{h0DUcUB1|}|L
zIjNc^;&jqqE`hwa6V+)gnm;&E?Xe-pSQKjGoC;&8+V&oO5$fxM6sPagZW>(_YQqc;
zDvCub`HIRHn4&79d{m^iPL1KW>-#O-%sBi^uZ8cev}#yWG72?=hkkp@In_{s1E2XK
zNc!ui7j~FdYa(5E@X(0{@WsDZVe62kz}-jIIv@DbYMyy$#lkmWu98XV#0wy)r&L)d
z|JmS>qM35wloGM?a9HijDWI12(&AS3;ywINC7(PT2y3yZ>_bo(Em@c3YvZ8Us70YM
z=a4zoxZUS;RoaMrUN@YAN*3ef1KPp3{TBIjg+yKjDGWQY+<hhjFu4h6A+~E?<50If
z96_rJX0>*foBq)oh0PjBs8qgIgB<U7_rc<msy%MM$FIH9i<ra3ygf32VGH(93$dK{
z$pzEWe-$v6l1_ob{G&Q!V640`Q8RvuF-(u_>25DfIH=dyHEd||q20{3Z_bgxv4mTX
z4%F*%>Mpv&(oz{`;dF*z-;bp2T_oD{+(k4%WN(v4rWZ6y#g!~3NHFWXyOljZO$Pg2
zoWZ6Het|O3{5W}`sn=lx(4YIWxS|=alU5#6O~9g^2ZzrFR<N_2nM@NiO-&P)CA^h%
zI{XSQlhRtWsC8>s)$jB*(}*R`Gr-07jO=Fw)QdK1+JJNBqBHZ4SPyC1W5G_sO~T<?
z$H<dLIEl3+$YVGM&W}I}qbVJjVR{jf?VnG@!aF1Y%oE_{bBbZ8v^mk#auJ8H-q0S7
z>BP@Nb+)U4mZ5qd-~nn<a4ED8CeAW>7Cs%G4TA3hi#Ig`s(J37rfW$dY5m$iVe>_7
zyrRr<pzocc)4)!X_RF_nGSAj<=N(mQ2b36$b~Sp_QfGG{pDoCr7mxt6rmA->udwCk
z=6w3c3p33N^@AQaXsFi)jTtRO4s}>ioB~EApYnSCfRE?F&9jyyB_lb`{x_#C@)!$h
zAi|PbxPB}_%2~6#MuG}ml~Hz=-Z6SFaG5iAcCcXn;Q2q6DjUnM59Xe8W}$+3Zfh?B
zy;pCd@kUUdF?h+ibGqh1bs$-ipCxK<H354~8Bl)bjqF0E0!&y^oL#a#6OnWHHrcUs
z_-^w=?~<aCYF&dv8hAQS+lg<2rLZE-rF02Shgu{n(dmWRd;2UX{L;kmyMWW-e*wyh
z9>GanqbFERgKb3a#WrnYp_gPtE%K5?&G{THr_A!oxAKw&m~)9o`;yh)y`VHHU^9sd
za^*=@T;v>2=<8t^QbakWNqiZt^het;iPjDLdu+<`e&~A`;mlV_sf|Dr;z_v;Z%gB#
z*+%J#y<-k;y;r>QZJ4BkCZ1Gk6fVKUG)W|F!8So+X6YYxcWBTVrOs>A<BmAmKb%n`
z#yR|~(OZPN(2E&vx7ffhh?+w>B87+99Fjc7b!E;dhlz_#<;4v!rzH{UvnO4=*kso)
z9-39gHWK&+uBd{jNs11ptX$VHia*rVmR<MoShjrwX;pB*>I||zfYCFIb@e#_T={xX
zvqokcT=;VBSG$Tq4%Wt#*nvjXdg=J};Q3h^okPx^4eLZL3#+{R`#EN)A<SccKny~H
z_vHEIn0iCt1#Sd75xbbwkJ2GITaBd-q&csN6o>iiqa`&|L<pOZTG$>R;kG&zVmgNy
zGXVj-+>WweZpwmXHmAJtS*$VhCB(~`wuyxrYn+=d3H}pW*p31vPb7t8a1xnRWzI=h
z<THvdM0kEH|3E;}5ismwWFiee1&+E!Ho%>j$kW7B!usCCf#doj!pSZc1B%Vz7i3J@
zf;6@B{4UORD<9X80?!@n060fV@j?=(!6WA#Gu|5xKqV8>hQoP{C~25lQsML`X3%wr
z5)d0dG5{kFuBOSv@-dxyaAvZs(Y=NJrQ;T-q~}rPM$3LhIS-o7EK@W@wT|<7uRilt
zvfFKHeMriD1hv;PM`81<%Tk8=Q~mn*MPK>1&tD&(AHU%5kB^V@7koJX`uzIGyzY@0
zfqiB18Ow$;tSrhz5mCx56@mV&R6Nq*iYNGSn4*gchkc2_iC-y*Rp6|5SIXyha6s~S
z5(Or}eEe{jsM3n#4AuMj{PuP}pFh6NU+2#=eEa&ke0_eNzb>cC*ZFe3T&CR2?VU0i
zs<JbwkamtPAY&&ow(_uulOuU6GYRJUdj32Y$LNx68{5F3hJ!iqvFcU-s#>NYfO1k7
zQoSkyy=Pjq6jkuU+`MXDRhVa5GdAT#UKDu;$bmlrZEN@j#1(ORtWHxO(L1${hADBZ
z4pVq63(m=hJdV@iuzz`B=Qz((8V2coO7Wm;MSK(rPAn2}4;RagQlTUkz747L(hr0)
zG1B)m`a06Xu1%KaQ4;4q4M#f7F(SpshO>%nugHa%u*MyzfvElVdq{E6H|q$ZKo2*o
zj_LAhqC&+r-E^raFh>7LbtHbj9m?k+8w{1euU2vpJh5(?WF)Sf4KQE(FYTY9Y&AF(
zfPwtznHu2?F8fp4adQ3b(%_2Sl)97HLxfb#aHlHQFQ=|l$196J)YTN?&kMx9n5dJ>
zox$3rQ@CD5vM1d*W0FX-(Xp5vr4;iRg<>Gv@R&aRkXpnKDcn&&3`nt&0DpVp-*A6!
z4g!W-kT)l<x3hVV`WEUS-`}tD^nQA)-P?5tkNcKd3{`N17bDJLCk>7B*=)a6E3tE@
zb$WLDcxQ|BLyPqDQ=RGSpYQMZh_kN~`#f=u#_#w<vX(U|;~mDaDV{TQj19vRoe(L%
zuO7GOhz-NFraKpj4Ds1QxOt9E*gcd$Pa-m1QLuqsR)f32nR+}<NK*J#fcbc|;hV)!
zP3GpWbUWiNCv(1?a01sJAv;422zU++8F0rt0Xa}jWQ@3`8<2iqz4kJMbV_KzZ*U6x
zT<q+$@fKBeN3JHS0U(dqu<@Qh+&WO#S{dUC3rfs$Qm;9YjWhe;xI1t&;2xtUNxzz$
z_bA^aoZT+syjL;C73I%4n<zGRCu5OS;50NErQyzx7@CMBR?Z>)c|i)n@HI5CK7C`V
z7^M}yuaos%o#7RFNu(O%41ta6HILl<C~+5h{?)>YKC4)J3(E>`tc#hjBja;}(8jJS
z-#*%?s{v;euM_J6f)gNZe?aRhE2_O>MBqKfpAmCCB^L0xxi+Wlx4)-kuL~5hs*Quh
zd`VbSA{K%SoQ>vGeAcU%jW<<!IU#7jrcYQrXSN>E4V~0&|Cff~vaTb}OfaClw<d_r
z4=UjV{~{UQ=9!x2MXPLg`n~ro;YKMV#w}n-(N=fQ64qf+@^NPC$2dDJrLO-2es|5U
z+z`}IZ4hVB(|)4ZIVs)2Lh*DR4oJ3RyoE(Y?s{rpSR3P>7bz-@a!Q&#v?x+q^Njrn
zsSsKFXA9CG8x?|0+o{3zmX`^QzDHVc&ka6LDpFpF<nu}x9~F3(O|rTr)!*U-Z|q%w
z`-H4FsH8i(O_uL_&2n`Gpl@hH&#Q0K?%On@PRo!wo&aez)XG3J=Q=#3&V5GlGJ!h$
z2l#`d@#H@vT?>-lwMn`bF;CcnvCYyIMZDZQ5p)Q2ZUekDPJg~PCnKhEMVlpx6}8um
ziy$MIEjQOrm@gJJ&8CygwgG-rSHSKn9QR@EK%DW)n>^~X2=m_Wex<P;F$Q$&qRZhg
za<X0Y<@?Kv<{g~MRl(0W4ZSV^c9^Ae5xW6~1?5aqc|o({bK#I~$kXmmC%uZ34L6G8
zL=mtH%9CvYdY&7aO23s+n34KsKy@YR-bmTp0eF8dpnN_aPyFEtzTVZu^EE+?E@E;4
z77Wf-3}<_a;&h7V`0+6>_N~iqloCEFP4DPn_rvHJm2JpeFitoepL}b8Qp<6coSTrG
z?wX$IQ%TsmqEP*5xJY4i;ccFoD*!AiyoY0nq?V-zo>A1O8a1nNm?jfm`5xzU1?Rh?
z@oj2g5c4XZ>k_nfQHYv&8w@7smlxFM*}e@6y22fn=JSVc<J26=lM&rIif47F({UDz
z_8x_*2y@b{X5;S<A+ib*>~|!tvw_vt)!cv*4+ajSwJ0A;C|?d};;go@naLe*5X?lj
zE0r6)!O(LSYsU#Du=t&8hpQ|3Gqv-H)JQv#KkQRB-~hhw<`S~4mNPhbD3`5*=RkSW
zKoC`?5-Q;YkD1RMWO%71K0~YO!}YpbH}OS=vcK>nnE5>l+i>omk+qv?<G4sJgUU-=
zTD9ZR9Om-{CMxJw^%V<4G}kdr$NNpT$t|kqjl}2sJ1<!O9}3au)YIB>lF~06Q_6s1
z>H)UhzXr|j&@<o4x>^*Trl~2`OzXtx-UoV^HbAY`UZkV@KO>PnY`>6v``fOuEo)l8
zsYax$tCLbzB_+~u1k!<MdEoA-^YnxrUHnYJ+h!nwrSYCJ%KNh;mq~WY4o;8iwTaQx
zyZf3tc`9$g$Ak0g6o<+izHyurWk6PNX4&k;@h)m}0h{m6*3GA%x(j6%Md=_2aoG6n
zM@z~Y20~qDNr1v~PWVETzp;#X%D=1kbR?52vHM6WPw8<xI-?9|d+|LrU=@+RU){~T
zVFzt=x7+!4b8ek-eY~P>DLIbMXdR6~zj;WLIZoj~J!I4<wuTb>NXY89tOIaa(SBl+
zzqe-{(MdU}kI&PjT`&|0!e?|y2TRJBaL}!yDUafEVzWqE5h7g3k-Cjtf)Yh&ArkR?
z{?Ku<iEN*QuC^e(*_$GQ{nH<9P9IC7Go1W!(k6Ealws1f|F!eY*KT7rSH}Q{6xrE^
zy_b>$t=r2~#`&l+=Z)O~)Y`3ljM#pGBOiu!h_;2%?h(~3<MtxZ2LgR`N#~*8zdx)t
z`BU_Da#QAIJ(l%d@k}Im`2mw8QEQHrm2E>UyFAJ2dM=3=4DVISuE~*c-l_8Vu>IVY
z5*v;n71|<F3x*v*wR(hQCDJX*<1;$h>`L*dndVZ6z@dcnUg6MjQY{B>aLr8j|FKco
zv2rFu%6(|728y9|pn%l*<db$(caS(a%h?;kQ9(-Lnn9K!v{s`)k0Zt0DTyUBp!Wr!
zeC;lKY}8ujv8&=yTr#=)ir2%5X+l&>3rau#>t7FQWb4LFQ+>|A+1k~Y@Ls=%<{$qs
zAGMhm_En=Nf6Bj15h;2o$b!YFTL`D?-={j4M=+uHc-~ib{!yLZPVxKvuI){`lM~<6
z43YgQ>ETi?>S^h5U&-7EDhdHf^i6q#sN;MSp>`xZUp%6ZSHIM=oId<YA^3w-=8DK1
zu<2IY1)$^&pjh$&psCfQ%-3p0y`=1z&Zv<N*Iw~9X6y2`FT#BMRL;VyoRDih<q*2q
z#;mqVE8+8mW4&!Va0`^DcQ6L<p(?|iO_TmLVPxKw)tgn==&zDyyoac^al!XK1H8`z
z#BMR_*f^ig-`}Tk!0gA7^Sm8haG+bDjz+Bl5}gWeoH)`T9WfuSu4D(a#NXkayBw)B
zP1Q)JYnqR!t6Qn$GStd9Zw<xtLpdZ(&glVKj29vI;WFvbWfEE7bphs2zoKwRt+hgb
zpTLY*t7`b^WNrW5!(Z0UTl{4W;+!(U5)L4B3P*dFkmk{ITNU>nuhZrI{T$Cp`p{f1
zu&@`LnM^CA=NsEsEo#&eA(E~6F*W?LS8WoifHFI!sdGNZE@`kTp&uxAXG~s|-`Ilk
zY$;KVz>Bg4sLkdRTs?mUcptCHvfT|(FG^*@RboCqYJpwwSt^5seIGPEd%dsNOuW(~
zN@W;0!EJ{S4<aYcEvbWeslQ8gJ1N~Fee_6wUXeD<+po8#xytE~RYA|YjhvL49a=HO
zC+6^c!i5q%35CK%T|qR+x)Z4*q_0bQe`KKT)AVk>g9*6#fy}*ZP9~gA;H5`D)y(bc
zp{<`M++17kRNJ>-Q8TimGUuL48~@X_S=(RND{|kRX!FD5knU}F?Pb5v(SsOI1U@D|
zhDwZ0atG91wFkG2!UNV6OQN-;-1D%eyl8K*h6m1c8E2c)4buev3Q%=SfO;OO6CLSc
zM*3Vq3ehLTb{^9X4gBq6UmN%EX@)Rdm)|BNWJbn)i0|RrOO}4*<I4Fisj9S`eUo0d
zklnJ%rjvA!Zy(KSnp7`KXnJfWv?L0#0Mm8s>xEr^%+;71USw9`cBJcZDyhV760eOA
zyHIEut_w)tk`qFS_jfFZ1o?i(NGUZJ)%d36U!NCMTG2{=70Disr{>2~_bl=2C~52m
z&R_(Or1=;GVSXd0)AkW6G)*X+sI+F=&b<-Lonmsli1DXY@{W>SbpZOAfBgLXR6B3`
ztf2F-mfRO{KIg}_z)Z0j3;O;M{ht@Pw|Su0<&1J~TYy5)g&N-Q7Y;F`KO*<mWpcp8
zV?VYJEcnE!^v)Rxy3WLWYcTU~Rqx9MvPGw_@8-L+QKwV$&-B^MH@mgIh~Flc%dR_b
zAoaSXIxH?Ig!3t8_IfvehmM5vv#P06tZfUace(#%16KGW0DQshpx2_&seOh1z=(Q#
zuh1zaxopWzJ})&QWs|Vg2C&s@-U)1H-Sm{_DV>gV<WChT*}C%dIH@~hC(I8`iL4?$
zC+FTE963eOobH-4%M=PLZ|JPX?*i6*2F$Rk_hqcUvG(hWk5YyyJH_tKjX{CIM1_S1
z!%S5e2xM`*o<jTbNv%%rO8v5Ry8Jr-qPdYzb%8AxUd?L(8NDr?M<dAskU%WT<4`MJ
zY(H_v7w=PO*6DMOiapY76HVUXj0+F#E%%{Rpn_8w=-}$5$)NW)O?j)=VSl?x+gs6N
zsujTG8g<TUmMFuiHdLZ5fxJKOE2{2o3|%LGWN>~ePuQPPJG%JoW%?mTiWQvHv2z;f
zFKnJ3SgGAp)PyQFDjF0o3e1hNtSW}bmk22yC76$DN|Hb<g59iWXhI0hF`ipV<p!id
zLsL2|sGlTtaPPMO^>%L<dPL8g-TDzwpS|ZTKrf@nGyQfsd`?hpfXN9HY~51P6hY!x
z6Yj8VE8+DY8E@(RJ%^mEAsWxK<JjifX{QFGs@&|1Ky$=-|9NDfhz~Cq(RKpRt8T(o
zpRea%C*+)M&W*-)16hAsc=g{y^ERPO>DXgko71V)d{Sqm@yeD|3?i>4w89&-qR*8T
z#k+*TdrO#QeIGEB_kUiS5N%ykZPy;qLBgoNOf9nmy`U&(l=(+WTmPuOMBA_7X6Vw6
zO*Xv<WwZ5ElaPd@vP&QVCcNz$Sv-6_JWQ=TdP)az&goJP>p4OivirORY2x(YX~cjy
z-}Sh$KX<IL#FmtW_Ncf-)YfI3q`1)XGA`Pu-P3n1`;d?-hT!4$v1Fbb1y3QToQp8A
zqbw#vypf^Z_TOyQnA3n8P`be%K|Qz&_T!g9{tWBcZTj4@?j9Uy<9k_?=A#>}=g1!s
z?ekeiTHWA($0bL>y}PB%9}B-+S_<#wwplhswPi5q<DN$jG+>p~sZBWN-p%A<E2l@y
zKKu6_b5A&GjkFq;w-h+;HDM%;^oSd_ea8R1bYki7;Sy~+CYC%_bsn~zcM=71MslRQ
zjuR<v)A#36_TgB2)~tr+V466;0xD|K>sbW){HYcOnSP&dRFXC&fMqHxj<YKS?AmgO
zvILT4n=?hpE$ZP<RHFMdSx~uoP{?PRu^?HRjI$OXfv3?`86{k!JY3gl7S+A;Y-{vQ
z^tV0Dr<!+Z*05b?Fp<^ml2vQUK>c7gW78`rj&v3GOBhTvH2Z7hS=@=73v@QiAOU8b
z(yNBnx}BFt#Js0anJXeUL9Ot?v=F&vWkQ+}48Pfp+gBEe)|y7wxxYMuN|6$Awk<=w
zcTmrCM0Fk((*2wW{-WmRdc6#0mk1`Y&k3TBTn`znJttNOMrWnh4V#Yzg)CUpTd2>*
z1VH@Jb!Hy0_EYs~wxD(z(_zm`5hu3cU(FS^y%(L<FD~g3deIl<ui=z(q>={O<R+xx
zAE1mT9MXu&3wT&_eiWJU$k|H=<^`rAnrulOcbG#vU|qzC>Zv#fm}ovs$;4u$a9FnH
zRi%&tWh|H|0NA>s9nQxrP70!Nvc)=aauL~ePc><#f}ek4N!(N0p1IDgQZI#}V<j*K
zI!vuUYJrv37F)8N@szOlJo^}b$Evmwo<!<C?;R>{u=C1+*3<!`r$C=`K(k_MifuLT
zT5fb6=kX^BHey~mdtpUu*X72`D~jb1-|VS3nj^BW{fY*jf0At{VHqcljr>vB*TFMY
z_)sBZoIqOcS)@4)q2xBC7FSd^eo-%AKUwRH6xgu&oVEgm!N{tXf!-_IptO4vE}PeM
zb0?w2ZVe<cU^!=w6I&#xv7`udi{kIwJj|6oh+bbyQ_yVrY*Uca_Y^7Lipz;%-UV)a
z5X?ADvOEXO(~H*5fRj7XD56TTRYjOjFr$}`DA5&Bs_Sn5U7v4Gnv@+;o`8~67m-yN
z>9ayXkhH*hf25miR`QeKG%qhxsLC*(6}<Ux(oeQK{T}-hwrF__Od!I;YtM0d?w{0c
zcU}P`&J286>b%`yM#Uya%18L-(1?z1lAi|m(rtp-QIGz?Qs|u0&OA^f@>SXblx~k-
zeVq1oJK{V4Zx2;tt)7Shw8VVARts<idHi7$+<+m}`8AF2e~U;wi?nD;?-*Y*i_ho8
zkYpMe%1S85v*E}jolJAPdCEMxaJyY^=9ig|pvIA2DQnW-Ctu64i0%c#Jw)|fUdt!W
zh+fk9{9&6<e@>_7)HN2hN_&=mT5Zwu8x>l*oo|7*{=T6m_qr<VIwB=mgesn6jOIH9
zEI*teD!qh1ljCb+DI^HM>1Nia>1)MbeLqObtmB=3_2Zcm=-z+YWc|TZq{d5SP){Pr
zC7@p9lK2szv6;}s>{d_j5|PMZ;G?fKL&=j8zMT;+ZhoJL47W+Mp1GxvwbxGbH-EXo
zAaj>EYAn(oojulDG6ov5FwcflN*O-;%@>o*E379t7rybfQM(KGvi_U4Js_vJ5N2sT
z!Slm{(*n!~hq++v<>0Hh^-)&0$f+gpErorJNSMYJlqF9@jpN}8=LCz_{-xvDw?`mT
zZHU_~`rp>j9!x2xg28NR?s8wXOT_V8#l~EkkCf7%i$>-oHG5M!0;+9ph1dd=)VK6d
z0C%s?=&`|Y9AFRB_9)d*8}ou2rqH!Nm0(Ut${IGq+C}^60K4a_QYG7Z?&X_0d(UN{
z_bOoPc=NW_{&tW%btIrZY`tc$D4kIfP%i>S4!dV1&A0!KjWev9ygRhsr{8h-vF+8E
zXle|j3_AjDV)g5~+5nadvQWoCGKS8mc{jQB-Arci)FJtFb_Dz+7eNe5<yEah4G;qu
zqySG;(ziG!{!0f6&fJk!#9%_SP&HdtUe87L@0N|c1h$lqhzI*O0y?O5O`<F!c=k+j
zP9OpQMP&T99M+~lqUzq_p@cM!HA5t&EDejZ#x2jec7jJ*T2cTm09{Y>v}K;X8NMzn
zfgJ~MXr<#YkwJHOPI01uD1nw>J~B+){M1~1Xu=hYX=q{)XRX~eeAW5{|2ywr)0x5Y
zq;T&^q3$<aDQP)SIWhm)<6B?N``UU6<ge4O=3i}dq)0cA8lTC5UO-AaU;if3l(YM3
zKS6-c0~&BbQJCHi;#MB}bq(|LX&kEv@~2wNkarSiQ!-6aZziz7$Vq}R4UFH}bK;`x
z@Cm&g=`<k0V$NG^K3u|z&;DxJix6Y~nsSmAOZujH;$;yawrXM<d92CIqbPBfLP@0=
zB5^>bczB#RBM4=AsI#z$^HE_#d#gyC#xvLH&|}|v(4OW*9j5lxz@F=s8`*kA6f3I$
z?`;>y1k<FQ+^e`h5}nadA2yyp_S$FGnleth@DS7)g6jW(tKT;08q^63VytNB)%fWQ
zna_O=<PQgl3NE=*%j<4YedDfJBu~zr;#)J7vQ?p2#LGR;@MapBY3k%MR0TVaO2NhT
zEIA~I-S2Wp<}_S}dxYb6y^yw7x!}Z6K1i2qit?wr8n{X4b87rGosfI}YQ`~Ca?{@I
zF*AVh^ZesD;zDf^7LE!}>}h`GHL23;s<@tTL<OuGgn0r*3w^{lGflZmj%h=V<*fU=
zGy<CGK(+yeD898vH)GI(Hq)u*-Gi9j*fm5rL1{2fjFT3-j`*u2NX4a$^JhaDFC9}{
zCPJmCWQa=pMWi^%j-JzRBaP`m;M}QU=|HXIT5*qAj~{xBLtItHxpwXGt%`7V<8HM!
zBF;uekeWI-bYNlV7$`;|_3r{*+Iw|KYKUEYHE0c?L{4sbXHcK>P@h%wZ>k{%HMppl
zEIX5DqVVHQWAaRWuG@Vx-)Kh4U5?cHP@#|Q|0uJE6yUk)IJ>6z4(PKF?o2Z-;tJI!
zBAc)E5zL7>6$@(XgOTEGQO4wYUSaR4JQ{B?jl8?`2WV6$B6w`?c{dCxgJM%u2_zCJ
zV)vt;n{!zpBi}mRCWx0-<H#V>N${Kz1m1f|6U{}KC~C>5E1K~-Mxszo``chqu^v+-
z7Skjnw}dl8h+2y-Nxq7aolicJk-r6snXV}v>ed~JUJl#dav9_+p9h4IIJtxpuDsX8
zU_mi)Bt76v_5KHh)+J)qu%w1h5{$6ImG^CFbOYAKlv8E|aoBx$K%(<S44SPg@7g5x
zH%vfpfEsucbefg0JwRCXB8bG0a{J`lw#_ooXZ8Lbyg#3aSEUXuy^_Z93{;(?<mo(1
zM6narA$c+qMM%r;{m9Rkf*$-L${%dl%q`lTHY*<2&<R2}U=p1>#G7q%KTNQshSuY>
zV-L#ahp7N^A)}L88X)kb{Y*R}x6X|*)%yNj;y5i)XrgNU2As5TySfTDmvYcNd6e+s
zMNzA!0UowLF5tZPII~OYUPdodo*H-52EE!)I!6K3lIcl&={4@YTSl5)z4D5_IL{=l
zZW|x*kOrTu_34ODisQF9A+C^2QS+>$9lcHp(~iXZR{%Qeltv~w1r{|P=`G_DQ-f);
z+=$Xh7Iu`EFk)F0Ofq(9j;PB1#1SR6a=c2)G1Fv6z9^Ec)NT{b*JbP;{dYK?F2(o#
z`%cLcMGOWCR+idFhJE)VuO#dA6>*LW4Ly+>J@*N<FwgqcD^dI91pORL+Ytlog!#t?
z++J1s=5f-fsF0g+`adr=G!++PSINIzrEMz;&T*vvhrh^E{tN&53xD16C-~Y=L_AT$
z8rGA&@h&qhP|cRul9xe(g9qEwI&&{<8>{_U6ScOI-UwSz4i%?ekW_68REcc!zsxC7
z0GiBq|1jZ5(HUR*RmBmz;Etg2^u(C#9qZ8I<Q1YuT{%Ts-0Ryuy-JkD&mDT#ZVvyP
z=1X?F<l2S><v>xqjRMduK);g?DcV0K;?W08INgFtGR^Z&PVgW^q41%o$|#4{5|{aX
zlXFQmr}~1;<nY*wUqVf#Ij!DiLt#agCY34d$$!Dx?!C#O@+{UzkM;h{{z!g~zB|zY
z`whRx#+<=9xya^Qu8ud!FgxBv{dp4kD=dv<`8lB0g`6YVu&CAb*X<fqFM~8BNmAdd
zwU^^{Mi*tk$=oK4qc2$(rLgljH6lN$*qK>qU(Him;{Dns8q6+}nLePHeR!5|KC1V(
zCTfi0z#xeW1Rl=XR(Rx$p2<s~+{p&7&W%_^DloD`!2E=okv~%B^7l^}=S;mOwUp^7
zL6Sl^l*|E?wSc(OM9JMIH;O$`R8E;{fcTt)_{`K#p44O<KK36?(v5Wl(&>v!U&()^
z)wJjw-`kx0ou5PMvaq?T3X4fdQ(7Gu+3k_E@4ThwQ#yP?S()C~Q_F<q?;^_k3@b$F
zXBiESIH&XO)|{?63ulH5Ha+pJA0u^`$FrM|F3Yk8_zTZ~+3SiTIliVk15OBL#y7sC
ze(24yI<tMw+W6%qRdDZJyR`L3|EK#C;<-sf(6E>nShVMls27vuD3K}^*YsXcw~i1o
zr32#Y7$>V5wNSD$!hmy>%dsMNlN4d*gk#IA6WJiM4M+_Euj;#3y9vqya%T!`%9WF7
z-jA%P%MIBjjE~ZkV4CZ8tv8Qd!rK{;C*QC@c?=ZQZsCmnKG2A9{{DEWof0HB&&E(M
zt|;lmyLF(CMWC@u>WI_3h;dy>(k!WnBITPPh0c*I?{rCRE{AMb(%u|>ph<G=*pSY@
zzCLc>AGf_}(HU8({$b)Y1}NP`y1ZqWKb3Fu;m)6%8#<)7Bo%yGrkoc?6nh5W$$O3z
zM}vK+ia4>WplK7M>5tB}kr)or;=}f%*70zgM#&uH{u6E!P`5;{0*mak+{l3*GO96<
z$}y6W*4)kWVUl*j!g3kt&;PG{&Q#^PKc#%!8dWB#Y*T~@kEn`Y%~$K+i}_Zv{Zh9b
zI~}Tt;;ay#X#%9y3>K=UCEbFwYaYVvof=Z)pZT)}bM?79WSY?w8<v!h3Fh%&Ta|G>
zs;H^9h)%O#pn2UrcAEzrkOv677@ye-h9y<QghNi1&=gJy=$*Rv;J*;tTrf96Rhx*5
z>)>Vj#xac;V`xq$rL&l)+gUfN7EggghY#o5_{|EVM1H{ZPN+6Ou3n<2ak*JEA|0H&
zA89lD|3}-GELV;zORo7DW!*ZN($PT76IE?8Eo8muyVCys|6epB0FnVt04ZJFs?4l<
z{a*V8F~s2Ez6BI~nv;ci1JFnRD1G8;1dDRdda0RxT8KQ4HFJ@Gsv;+KYI#{%u;{IB
zf`f?vvi{#Bn=fI`iN{FA5LR}HG+uqVd0$1kb&ryA^t%b>e6qK65T8Fiv2Ox5@o~PT
zdZ;@1(vR_P$aY~<Y7d1dop0Er{C`>&3d<4kNrh7drm`e0b$0dX!Omk3=f`yTH{N`7
zY?a{=uiAC1QEbgLPbt)7i3IICIQT>p1PsGzaB7i_U%jB#Hba`A&XUyn{IuZG<$V0?
z1xzq)eMxG>b(l&+*#rgaWcVe$g$)nk87T2(20!6sAYFrrMH8E9q!FqDtFgr>o`8Sx
z6UjQT7vq9{`l7=O4);78HCB*rE9$v?nPMPto>c@1sfzov7x4dlx<OrE+)LIb9qJm!
zh>{omdh64@^cRTl-rHC2m!$};T2KNP8v-fi1X;=wq|U|hI>_an`7#YLA~PS1eHINI
zaoW0P%0>#9;5>tUCInJ}dB5ME;@|)4jz7=${PX;Lf4;?E;rDIxo{={0+FWkuV`LVA
z9*kgAmovWb+J+d5CDmygeoeSUBT%T0$Hf(r?$6?y$I(zAl6_Qw;x#XGfAI~f<ac_U
z-UcV`{7_Ai>urg$apID}#3jgrFnXL=zbNkj^QpF~pC8}<4O_V_kLzg%Fq%DqRADv}
zt>BCgQ}NkDT{2sm3-+8dPI8Na1tu2Bt0-gK^cM4@*FnBcdVk*kSOz4u?N#<ScLnLq
z(dqD*{B!Ky?gRbIK)<`-2yK|nIu`HmEW8imeWjkf6w}+%$=?v8o-Zm!FRHkaUO<kz
zx}@LG5I?GzT+=WZd4!+>Bn;S1F|w#-$t>OsRyH9GILHP=0@F|CYK6e>MuN!k`3CMn
zKAEg2y-?Ad)%0DGPdq^R{n5R=-POzM?dH|pJN~YV-osWRqWPv}{H9H#fH`|m3834w
zsPEV$T2xuh?~j)H&K0?Z)EAA(g0PvEJ6)x`Ut~<EtW(9gKN21@`XOc|fv?UJYtkw!
z>?G$;PjMQYZ0bc5&bSqI*ich5)6~QHY<a^)){+H{$8o$$^BijnP;n0rKxU6Y=sk#3
zI1S3AnX#?u7S^{#&S`AG#a-$I3HSK2kB6=MAAI?FkzX%JQ>GNu0Y5Ls&w3YUPVP?Q
zbkwOWcgLTv*@i1zPz#ogTI~UaAiozsU(h6}d}={+{0yCDn7PCf2|4jas}W@M9qZ~S
z_h4ep;T>Q9rM!FF90u5-IJ9cSq01SkAiN@mgX_R2tDyW^^l!)X>CdE}3nxpPiweI2
zA5h5fd%4C+ijLqvP{E3{$N8PNX@^>nrvk2X5PyZeEh+9=Rl8$jl{{v65g3^GP^86E
z4?++r(yy%P2l~ooQTLG5hy#{%hJiuW|I+UeH){NnxJeaEU77->F2X#6X?-2#{C?Vv
zn@JlUE|f}={zuI3U}hlR_LlDg?M<?iq(DD*fzAe4`WeVDlU#NWGk1T6V<HUOs*qu8
zj5}mKGN#D0MBHYK%vzuI^+aM3^zG%gbJN9^(}pE8dFsvbsYffUD%8$l3nz2QOyb`U
zF1<<aN5b9=xiE2ydP^%NywF&13UB-tq{ZSXDwgk6{FA(25l{uvj?Es8P=+E^GG65O
zPAApmrjNhzV+Mf}ZXl5{ylu1JjyXFR=EZOnMcoxWP9?8QcOao!$C+2u;LW7*FO&m#
zyv4QDkNtfJfBOYcR)3tb;f$7G%00IkT-GWXpC*kn;dPM3Q(w|^-^PbP58>KyDDd7i
zI^YT3ub__iQ=evq>QHk_9GSU#ikq~F{}lf6xX^!{xghWfQ3-f-iX{0p!{h}_jnC3H
z$ZxWqAEk9u2>GDj!VPP=;Vs<4DOdX~>!#@v6|_tOrmUDO)js;iR|l-&h-DZA(*x-;
zn^{5YnwbYUwSj-EL$Ql_Ipyrp7njo|U;8@F;_bX8kzcs^dG_=XrzO49O^-wkZ(I`A
zm|^N=F$W5yhaf><Wj!7SnjeY#=)@o&{KMX)?LH|tz4FkkLpHXM{SMCFi6rq>;QU-I
z<_6ByMQZlSczbNu4P)#1;iO7B;K-Z|yFR}TzW4`NV0PaB2`~0BC&+9|DgBV6rFYJH
zlF|WwyCknBg~`BT5V*s~8#BQy!#M{YE*LAB0Vgn^Cw|R=J4W>eULs&<xgDSo)dMNB
z4QO)12sX`$n7c(HQ3&xM#mm!}uszXo(t~-vm?+6v^W3nj0*$x)t2ChxK@GAOBB?pL
zXHvvpop{mqzB6djaurup5-LFf6uDcx*G)i|twcA+qD~_hf4IUUL2_QtG5xfTklq}e
ze={8>oqF^I(2p&kAMvBapW)u~xFKI}5!mbCL8Zvgeq-BH;0MZn*kb?*|M<TYzdd7?
z)E#8;#G^Eo?CMMU(e2POiIHeHJy}2LxN|gR&HdR7lxE3{Cvof=%CE1HIUbVIbWyeq
z$kJ5r;fgxZ($|`Ne9Fz#HgR)mFd?gRsQKW%FU>>!RSnurPxI;6XNi)O)M+`A^Nuls
z<#{)rZ}Xep2-!>$1fN(uPL>}0LZpPT#LlY&T|S}nLZUpr*Mqm$ye5D?WaA!Xo>RwY
zr;1Gq!}U;hs=?gXpG^7+=}*7mZEXqA$=ceWpa!x@z7wdAeV`zd9G-%Ey2BoLejc6&
znN8b5{M~^%UjcNqwwC#V5^MRioF{{)pz}@wMYG>sRB4hVUHhWiP1RYrpe(y40Rd*b
zpppxd1I6br`ineA&Q+j0p`Iu9jc=z3`5V51b_*wX4B~KT8%|UrG&sWcswc}nGZ#!R
z88>J&8L=J=Z^&|*;=_w1J6^%~HO+=%`uR53ZFB9>gzy;7Ti~tzCFUDnS!#JIQfgPn
z`O*2H)C~n*SQerTKka}gf!rAE;d2kEog=BTD#GWud2zO-0qMr}^dzc^TftxH+boQ_
z$db~a)Unb2u!i%S6vf5wid8`S8&Gl&3PkfR)%_wcK}L2J=xRyJFR#KCHA38_#^c05
z?=@AMcDi^(!Az6`5w`|&#A)u?UP3dK3s4Hl`2jxlrKw<niiycgwm@3cbHu1OT?2YB
z*EDWa7tU+EsmyNW5b{yBxYOlpuTl1;4s+|e55@1!Tr`d@JdozYM1g`M61pNZ0exn(
z4g^Dd20p%@r6Deqo-~f$L1b!yPDWR>ctGQ2m7e+<&O>!a6!7*d#;|E4y+x2Qbc%;c
z&^D-6#Wk8da4Ik{xfg>Soj7089VINtyV`z;|9N~doF7kz@`I6v9z3Ny#2kA_V=9Ui
z*Xn|<_h^_J4kqh=op0PEau!ZBfVu_<Hcu~8p{&7lnhZY12y%)szq2`+v!$-1P^ek(
z8o0W^(a_iq>0|H|BNMJ^*RIfkV5s)Qx;oIWZu_bq|KTOZ&*7S!j)q>CjW3hYq(Ewl
zYg8+|@ar&-rBYwvujk8pPrL`wJQx+A$KDhdsY?Dbb)e&gZ^$|VdnjH|w*g#6v|s9D
z8`d_>0@I8#=_Zn5i7BopZPF83g;^=(#&mz3hnmaQBJ;IR<sg7&lw4V>Xg|8d#XDmu
z*jO?FHW6f`aRV4qD>X$aU&_>Btm6Fc_5vtu<tTSLe%`cgm=_eR8v1cTQLw~;f9wP?
zB3wd5r|a%PDmfVSj~VX&IM+vTI!DcK+Y36<vqUcC%k8Y0qy7&Br$8U4Q!SPX>#$L1
z;czt(>euF)hM7ium(UM6%OKsdy(`wW%%8AgwgCINv91o&`xkgY8^wxLRS;zDfkc3E
zdZJ8H)H+(!&%CIN*#~Vv%l%nG5;@3!ecWd8REBVinbg0pVcP|1zplG>=-a`GgSU)#
zEg!!SMm8-&ce3r|!{kkTQX%R$L~Oy?vbY2Kp72N=0EMBFokwb#Y7OY*bm%6yrBRlT
zl!Cs!)z)iH4phM?gY*fhnY9(&nH%uq+4^f>;KyBPJtg@k+oQH?F~*v0o9KHAjItWk
zPnfx!q?|P>7if@y^0+DZo4?0Rd?@@TcL>A^Nx5*TYE9UgRxNS%t~-K>KFAXjVq!ti
zc?WTG_YnSuL!4@lmkV0%F?SZ;5iyu0PL@>4s$c%ABgXdQZ#EH%+_MPJ`mbYGro)kp
zF9cYkXf(p1NR6H&lstyDh4gcQ6s53%dY}0Bs7iZ>UtdKCMwlgIW6^kG{l-nY<<d13
zAuXxyfnygMa#xoW>JfH8U4?`G!)ceUEmx@>q<p;f>qz6bc6PJ8bJ;pP*CSjH&763u
zX!+T#Qm8%&)E$BR-1Tm8`ByXWRT$kiy^e9T2IoWLr6dQcM5=PltO@B>Rqs9w%%Cxn
zbLxU8iV-NA>X2jtxd&O(GKuQDT!-AdLeT;{&o!F)uk`hwLwLUb8SZa)=N|l@?E@tb
zhTGOIvh$noR&PF(c55q|zrH&&F(ge}^8rLY6PZ->B-(dn7G)EQxCV3Dre@gY!FjkB
zuDOZAxLEx74Ct?PmzLLlkK-+MoS$pyg5mLfkVF6eioM4m+3m2(qxPh0z_tVIxWR{b
z4=2oK8d=A=m6qzPO`88HJn+L9O706L$1fiL25t<ko4F#<GZr;=stdZog}UQkW}?7`
z^Zn6r_7eK{J?|mXr<=4ustn#xO-L7-MWN_I`VC`F)50NNx?E3>lQXMhZ@N0pk4}d0
zHv2ZTJWhR^VHZnrzAT!!4#f24LoajDjH3J;IeIfns6`loF^PEv#oYVTYk0a1(n&Mi
zq>;BL_w;vR*fpk3`63k?9p^2xV{(%^n&pYajHoVXjPz?l{ug)=lL`!wVkFeiC^g%;
z#p{&RfL7Vk3h9DBJUnMvQY`jA#f6oxhs;+dn=C}<UFw!06p)@9mh@4NmRHzg^c#`X
zX$#SnCF=+iO;{&OR`6$i0XozQ=Jyij2VBC?7rVt<Glpm{N<RdvZ-_OOdctsM$tt%B
z5i1Wg0+g0az*P<@F}3Byx5>=T#_Nabv=7pbX*@w%#3jIm1Jn2_(6~q0z%<MYU_Ng*
zq)La&gILt<bUOG8tX!ne--H$Jb;~RxK+BSpKP~@<%4!^a8ZD@+QA$*VT0~J$-~*H6
z8ql#p4%}DJ)bgWEm$}Ek;-%{T!dX3;lWthck?&8d&=QNLpsQEI`PH$kojN&U3}Gn)
zcfn-{1-?P-7nBNVtE@!InC%mNrvYhI`13r{k-oz~;#&iWYwqksq(yY%`lS3>1-i7z
zctgG4W5S7UoOFSV6sQrbSW!!hW)7rO`>u-e`}?!o?%)4RZ6h_y3@XDwIf|%1MySP8
z3hG~}t9KIVFEvY$Y!r__94Fe;LvKF%=~Y_Njp)9FNk4tAK%w4BzNAw9{8+=HZ}JqJ
z4?aZ-27@ojx~aCH-yH;`d}NytIj#MKRJ0x^4#Rk$Tg3%kQ@%_~;<vBjY%-M`&JJI%
zU75-IM5R{1q&bNKRd50;iruF#4F(C+4(ocOWYVeZp%5K2DOGILqOuP~|2zP3yr$n>
zO&}l)mzW312@Tq1Hb$vt7b+C@P~X1^YI0pquTm&}d@wONb7yCsw%kwY4p_dXNlLi_
z6pp_iz{AP-LI0*M3>3|P3LBp^3;IQJ7c98C<j}7Q*4yE)1+Bu6ll`gzP3sJdivKIn
zE&)kpN4J1py!~)l)sY^){aN}wT#?X62YH6JjocP@Rl(qdadPLnD$KniZzxJCSHkI~
z!6do=$>VQPYpo3_kj(Q(SFHv__1eVJot@yMxv6m39;*Rn24=Peinp}1U7HOi!=cQ0
z2?1S#e)}Ruu@2{PTadmd0jmI$FUo<Pu%i-2Dwy}P=8~x(8H1D<^Z~O&DNZB51=LiT
zXj|07A2*!Wrr+}Do<Dz9UXfEK4W)?&pcc4Bo@6Gq!h~#|%7w{xgRHap+h^q!ImM^&
z{VOOJy`3;DNj<N_&FrX|9BYIuY(>ePPZeUoWfarB>T1HwxQ>sq4|_j;RpQl+##2z*
zcW{js8<eXeHPOjP{21f+KG!s)nP*5=?b=$jQEW+~Hjv`+Cgp4cYz|Pk>_En}0c=<5
z|B(%9LSmSQ3nOJFv0@!GpB1BfR2OlSO2dR#mo%5AZ)ssK%#|!nW$7*@NWr@`F`^rg
zF5JV3iMV~wb+z*g$}otI?^8@qoua3{5Q_j-6eRi;jHqcxTnWlQg}K3V-<GXw7=(c8
z8F=_{w#GVUrZPn)b*$Z1JR-Tr5Fm%uS5(%Pp6BHAQ^g3dK*wSC`l5p5k_mFi))bRN
z1=c5aaueR#f{c$O)}w`{Yg%L`C~_dXI!@}BWG6GXamO0-AlY*2wV}#~tv!PUNeAPP
zoJLVXETbsynvjOyub*rfAnyRxB6)YV0S)9j4eeyZ>Uge0r5H+yM!#clq!`~Pn?)cO
zApf%fNqePJ=kXty*$a0NRkp*OKilPP%FDKH-{#b8Dc0qZny$K_@y)j$_)x(cG^Pmt
zKG|jcAkJTyO%j+yIUh_CUlNV0hVpy-^n=PzlCQgGXnDm_nqZ}EU7XHNvqe>?!!(rH
zYSwHnm^IV8lny^?oi!_~KtDPkmlH(pCt*1J#il>7YxVt=IpVycROtYm5|bNl4JBwY
zSPn*wFr{OrWHcPDXloxzBX?WE+=ifMzYt{r!civ09zsnYKg?oal8li{Y@vqx%FmB&
z)_9kMcni}78mRlloiB@6Una^?eMP^?fPrt^q$kAIh)3QIRExejsu}9o7!Q+?TOqQ1
zVel27C&3&_s#`|GC`lW8Tqcp~0_OJ`X3^6RR6k(qMFw|PFKD*-0dGvKMmkY+%o5h@
z@sl1Y+0~Zx1DU+388u$_Ara;BQY(zC6zD;}N$bxRQAVzm1ceUtB~fe8Q%n_O>b?o|
zVFfK*SJq#8=a5ZHuU3a{1vUxZ94~D*Jb+*j$XiU(>TTz%PV55B6Tv`f*X|+J*<!w7
z6LePMyw+Go3BNS1y);kTL26whPP8BmLseP*%JkmCJcm52<=3~_tD*s^cr{ot$s%E*
zuKzC@E4{$y1IARm2&AHSmMzlM9v`gMJ7A?(9Dz%dgRHlhTb<QjgccgggE}}-x}Z&{
z;%Vh3P3~54PIXD2l~-fn%8`$h0xi%$31_2~O-nkm0V3APlnxx~aH~#Ok!g&Y9pL;|
zU=85)cDq@A>*f7+lQxK(xIM)iR>CFwGQ8LYXKu{?bUy!P!I|6iAEr=gLT&rpmZ~jk
zOsaq^2G42C!tY+a{zj)~%f)>@{{>Prrv42`bv#G$HW{97aIl`ApTHq^uC920q1vBk
zPNYGuthGf32uSrD2tZd-ERlXrNIz$!c#qyE{{CM*gtxbNlI&j)iVnKBON|~?dev-9
z8;*=_1HIE8+*A1yD~VVw>Vmhd>c;n0iyqqf43H)Ha%EJ^!HdXRX?js{i;@i$jE4$M
z18D4n63T6G_H-pZ=^P4(R!!Ukb_;7>Q<ypueHH81H>s$EEx3XGSzT4EZKqlklTtE1
z0gCn{OQg)ArA`ady+yTnto${K-^z%X9DbTrC*dR0)p34yeh~h_ap$&o>FH}~%aM*=
zr{w7e=QkWQuET_!+M{fw|3&wgm818(p1R1Yc;ZXeKtl8E_WJq+2FYxyOR86u1p2@;
z`Q5AcYhtn3-&H*R%_zg85WnFlDI=PI?kUOSoGSc;j6&^n$j7H5DB!O10){#@pfJ5@
zjMgkAP9FZs%{1$C`z-!?wnznfoDwVkF-y@&L9m2sZSm|wdtw{vCxwa=@L{T(2b2|2
z!<(svs0aM>EoX1CRrT`dqt<?9O=E;=+%^Ga|L8YL%UJKcjge~kLTOX%RFQayRhT;u
zl+H<2ysdcP*VOKGGK1)m*8o*Os=vL96jU3qab82Zi242U_qa>_?ZQindQ$NQfPph>
zM6pe2CWUH;A`S6@#6*mrcb4=s9=}&-ZcaLPdtpz`J~0=6tbku%-OZULS9)aikGc(F
zx1>_KCa=JznhF>OU@CpIs^1B0BKZ2=d&z{4r{DV?-)k?hWF144G~v-Rx2Jb|pf-So
z3KSX<vWL97*b1)Lw01119B<jrM4-!Iip@8{ENkw>-MGIO9BHAb3iJu@fbXk{ay5mT
z?*Z2c)o&@X{-jT}<wsATQb{E6f<*(sI#QIyh%o@R{la_vV_n68HAMk2SM9O&=TuP!
zEEJJAD0DKsvvzU9SQ6;7PH>(xA&H~tgn94Z-*4}C@&59@-K88e6|)mmbI|OvPy(<A
zNHs`Gny$X0<j%4=47lfDfI*SPT6fo{Ybq#$r!h~p3XT?iuMAse)Lb_swNq&|{8F82
zWd%m}DS-+`Rxe-%)&$jBV)@ttRkqpaGT5nn(NZ&fdGgbMbOq&C7nc$y%vlcp{O}uW
zL^R#decUJb^@~kfB-1E|)d+O+=XnDCJe;26-}6)5URE$yO@yzLX#MYXDsiL62NuS=
ztS)F&h0Yj8EIn$>9U=W(RVf{2e=hw=NeTX7cYoI7V4US=63V{{3IKe8)yndwQW@*l
z!K!Ag=4sCOEku6?Fe{~pzefB$^oDZV)=<Jrg=;DnH)>1)`l+HK63u^SNk6h?M#`zz
zDV0G#2F(q>#h~Rrygh6q&9}ecdqa~3$+JpWR2H4nz(T+c%0Eb=tnAe9BhKGoWC+4N
z==*jOP!Yx{oi)j!Qp+n9%wpC!zl(1l7gZ^g69)Ms!1s3$?pErgxYMEE$VrA^)#?Q&
z*%Z`7&~FAuz{=UjK~&*J{&B$nxXh-5Utiwu?=R+TUX=$D;<19!?wGNlc_DeI)S{n;
z^6U**O9`~!neoERT4C=eChjp_DUVgLtSTvtHbNbUtX%dld>`UTc#ZkB8<tuwT3;~T
zB0E*noeKRlo3g-ToH_JQo!`d1nw>VTBrpwyiM^CG-gk@=BydAEh}r!4_X1U6!?2WY
zQoB0((EqOijZuGaMhY`olx{>-r>Ut)46$e4@_73f3rh1SgR?!VtE2qx`q2OBM=`9?
zdQ0R`)n1#L$O^b|qk4&Y9z7lHwEeP>9#_1MbX`(`=9!lvKKUVd8^A^Abk8ppD!7JT
ze~kGH>W3}!D28uo9p|1H9@c-ngATBi7iW7k7JWgs(W*7l{OY3`o@+t6+3@waU!h_F
zyTAu|#p(idg@H`tEJ&kKmJ$_1bUm6XyRHT5bC$loO?<Rfs&d*k2uD0XWGW&RF+G)(
z?VXXzysCY6TD<i6XOEV@K|EiXmXpX%1lc|pFMeXUmw@Wxb%H^ph9yOHITMhd5Pyva
z=;evU-?VdY@h>BdIXj57Wu;E8Qz|eC&gy9jt%~$(<f#3sn9KiQcB*lnA1q|(i(iN~
z!t76KQPglA4Iw875q-^K6}wYl7iS}4NoOS&NOe+i`T^1~cF=`AJ=mR{_kf(jpp{xg
zQH5|LaOu!{uL|^Y&yeMg^4E7WJTY@xSlFeW4-h_-D+(x<#!$VQ#t|wgzdQE&pp-S{
zZtvFGKK^a)aJqn<57Ldduvkzf8q*R6va(VyyA!so`McEVtu5#BDGfzw3VdsM82D)&
z9~2)*7Dw>{u*C&c-~{dSc$iM(L<NGuq7$DD&_RKTzw;!2IP}Vb&?`J<<Yls}?{2#~
z$nQ?wV1n!JR4{+^sLMX;UYo7hxv|Cv(zhbM4>AHRNRt$5KJ%gp>biOA>B}F63(gG4
zoL5+mv@))%E*dCy89=93v<}q(8mN*C6WRC2wr1Fji0?gUcmapDsPTYgqnV#ka1sE4
z!G!XwK>0DE{D8jH=t_ONyC6X87=9V<P6l_AEcEulZBKZNb#t}B@L7A9l0|j&85KHd
z$seyLAiW@u(&o$o4?c{bad=GoqRLn3OM*WeR?e$Px1J|8gy-P}wMc>nLxC&JD2Fr;
zP!tCd?1JY(p*j>kZ-ff{Jt&!^u1Om23Z;zKueg;{tVK!6q+?Zw`q3FCjQ3$vrGNWa
zUGgiZP>Y&`&E`>6q#xbwCb+(Hfhx0ik98DCN#n!4Q<LahrlkeQA_@k^Ofl^f#l`AM
zmRI6VeKla+E}w@yLj&2+-)ngkOd$i)BdLnduaztK$T008a?W>z^ZOgr|FS}N5m1L8
zOnYou@6ruGp8#R=v_}I}8SZa)6B9pszrn;0N98#?@V%5zxwb0+Nb}%x=6N|m;fi<F
zh$PkCfb{rEeGL5kyE+N_05J*JzP;S}{4QzZwIL09=y{r-b5mi;5h3R=ToFM><}@->
zV!W!UHX;?9bX=u`|9b=IM*;M+y9H<Bm%c@G&*Rw7%_Y!Kq7r+hDSjY=o-%1D@WN=4
z^?2mz4k)-2@$f?vrM|46T~chHRNKi9NTwS2xgb7-lYUHS^b8f81_7+^>9Okb=xi0~
zXZNc7v(|cej7JHEzO*jrzUG3@Vcr5dS+&G{z#SQ!E}+J1Rj0N@1Jon!si(AaubA*c
zwBpby8cgh`y~?uUq$a0Ibybw`tlH(>A8O{2E#2M}0~+QnsJBa+{IS01OS?o!Z@_wV
zS3Uj6lQHg8kWB!(r!Jw2F8;Tj(iM}n2<oc}{<iVG!v3zxxs(ABQuW+FL$i0;J!SQh
zFV0Sbfgr<c)hM%oN>|lpns*)Qk$q%RO|Yj{HSbhNSCoZ2fEO@h94)4buSt3-1;{j@
zif0U(rXc8yL#grLYo_&0Ki)NaeAH6k_3iQYaC?#R@sa&uQvO+tqwtK><gm(Z*dL80
zDqy5T`p@a<SIZOteV8jNy7Q`v@nm_71<$AdsE-+ck=ko+iwl*EBcuxzrje>JQ-MnC
zi;ktz|9K?}?4N-q0g_U-pys=uk5%+4RG6E)&-1dm^kWNw+_P2mbWK6>7n5oQJTH2q
z7M(twifL_2dV{)3Jom3b4SyocSEGmV0yy88%KbA59TXS;pp~Rsv<Un_e+Z|0YA9DE
zA4h+KE`_z{1?O@vn)e`(6?bVt@c^jC&X=i86pvk~-%F@6-}CUdd42WqHhuEu)eQHQ
z>y@cfQJ@p8KpYhqwB;35L{iqGSjy3gej?1xQy=>D$zN{Z)G5!~35{A_V8INbpC}_K
z8lnoBg6aClkHIv!)(<x2(;=CN{zSlc@{-c1GahqPYe~|_sEDgmc?PkL^Rpud+_tq(
zSnFUfw*VF;oayH;lyTGN7tK(YSAIHF@ds<F;j+L3%u6z;qtoDYfwaf=*)^nOfw7i4
zY)|%8JAHWV#|;{zsq#>jSYgGDYr?uM<L2_;m(r6&L}4&6#TCr=@9tC-AOdpVCeW|N
zqH_6+qw*f^PcIbMbUf$iTKS>EmS=36h6eQsw7L}V_7^c<Avlcj397qPmUP3hg%?gZ
zZsXPO+vcW_eUAP|ke`s7ibBQ0FLJwEpo&#g5`My(os9+*4oox9B`LHAORP+ea~^f9
zG1GbXuXG%(LYSAT(ejeT?c{ab0m{k!9RRB7;NNgz2mFQ#6|#I_U>3wNjiZNZ6-=Rh
z^btZRs(j-Nsy{M5yKw#EJ7r)5S%G4fF!V}|4=S6(DcPk5wF0wP&o?=yq<$(-gtt}A
zhc`M4X|TOxwl~n|SH(7^kH75m)MPVOx6ed7Nkh!d$rw*TSt6!<RLv2suqFnwh!BA=
zTLnp8DG<VhP4I*}0Y%6%7kxoN<htG7o<m~dp;<y!l!}fznGIx{p@u^Tv}@vX(CG)K
zCa^>x6SE~!jqE<%gcRS+;}dn7<uQu;#8)3W=#r9=GSaN=!=31ohinxoI*gNr9fv$a
zs`brIy}`SOhwXMs#Tvn&FM@`!#W@N&=7BRU$)-!S0dbf75K706CuUS&M&OX|%iChd
zvev2(0}%pnk}gK3SSghXRi7R?G;Ire<R-4%g(kHI9YTlzdmNilbjQ-$O663g1`~Zc
z2k$p1RN*IQ3p>9t0_Y)3^&sR$)|9Gx#($7F+;s>OhCjzjnjCtEo0Ze}>h1N$hlgPq
zs)~kzREGv*Hr`k9akX?wsRX4{W6}3`IYB$S6L+&CH2#B<@5)$^T&#jNs8NB+$gU1F
zFR7v8=)W@Bm9&z0590VV_!qP~l@us#S4H_Xzf}iwXcy=E!MO=6Z$U6s5IS;!HNC6x
zv1-BE4=H)FItdTlkoFTEMKD1t`6A82O;|NjX>43k4JCBc94D(vqrXuwvko)`D(3K-
zG8aw8B#fX6#TT&(MH-L3!D*>hq~+!0rltxIYho62rQL%jL*R>IsE9igR3}TIoHOg{
zD0c>sLj3Ab0?I;f;)1e=$({(@;IJe;`O6kM^84#hJ7w)WEioy(`8TMl_<%*g^M)7>
zscA8ylEy*)O>ccye0t!`qNLBICG}p9N_6p{=_^{!s6)UVQhuQGWH*sIJ(?h1MEa9U
zD(8GLN(Ciny^tSevUo>9cxvuQ?}Ge-I_g{8i1u;rN`_bKVm4)l*8moCLg?*xv?e+Y
zU0#^#wO(2p@GX$WC@SuaCZsE&4EXRb5<`}VUfT28vq+vnWqcLaR*+$t0;khz?DOPy
z^a0NB8f1L#1zt-aGBu21xmcLo(DYpBq}Z+6l!Gd2yzQlh(gej>1STdspH`RiqYF>4
zZZUG=#<*?up=&NUFmx_c8O>0N52Grc&ge`>t0MjG%wrld)!R>$xWGk7v%DORQ%v}w
z*zbxcN>b{avT5?X4mBoypoTKUbDE5Ue`f{(mKB(Y$);BrK{o)6qb!;=+O7`tqnpAe
zx0^|yx4y+1I^3epI|(w0G?SKQ8ANlK<*gz80P-v(p<${?z6^!K<W=!uo+prS3Q-pq
zD(&@&-lV*=iuAjym`a9~t8Ck7$I|hfY*ng{nF4+oJWfErc7eucUxyc1(Z9L!!z?*n
zp+V7ULwtCYN96IQihI`C5-F{uBrP|V7WL!%0O(iuH;DUii|2e)n+gjzWWK{kJ0f}V
z{Qb3!IbRG@<UUrHt3I`_|DA`BL^vdRoaWAs`a=*zOQk&eO3ZI+QufPr9|4M_z?`Zf
z{Yvjxmvj+Q*DUuZ;i3gC=ck834dVW4{gXOvK9TD_DCgUIO--AbqECT_G=<x)0dOac
ze9Axv;*Au@8N0$v<Y?!?yp5Yd^I{)J!uPP*D4}I(JLs5&c~;s?Qb|7%N*(FG@yT-W
zMPT%LReBk+aBQd5_COZ{=U)w0r|K`q08qG?0xC+OE}&vo1+xh)P*Z0|zf*G*5F`jH
z{W)Cl!p<#9VEa1oL*~WVDv)y4B@co+(&P2)2Y<l^J$tv}yflx$@Pb3ls8*(PI^S9K
zJK#dLV%q55z)FtswS^g9x>ISUidGpA2<+qhp6;rlD(QV*FdxR}Kq!SHF;(lLL8;v8
z&2nA+R{iRRFmmS`4HFthQ}Uq#qfzj2E6Dx}q`B(bf;2edj%8-+p;_DG{hFd1(34EO
zr$CO->>)KG<t$A|;W|ZU3|?BJ?Q#8^T9gJV=^SiP9gKO?>`E8070-idRiJSBPdoLV
zT4}OkG-(=Tat(IWBrI<+LbJf?(&=x(3W{DH)6~~uukI^*vc|$MIE=%Nb+nYOmys1}
zJ5*tUod4j@-BP@<L}@&DPW51vRAhk#RMU^W^g7g867e2zw<=Wj5Aer)E1VS{P7?WG
zh07Px4)Cs$H%jQlO{sybSE>A^?lf}f;#;br<-;h6N=sTx1tLgHRqHT6n#yV6Y&{JY
zG|f)NoLUx%s)i=|v_)-7R1(ndZJ^%>v>UV>l)J^pclaCs_-GzqwT+KGsAL(*2DY3<
z7Nt5Omzz122DX<4#xzH7dAdbup~CPKNw+Pj8%o!in&F6OmPr=iE(>0hwmeR{GrD+7
zlaPOg`nd!3t9#L0>JXg4klLA%m=^!pg~wYE=9rbr`OdQ+@1~fV2<26L)k%C>2<3__
z>04dXucf2UXhTHm6o2iN5kE{+rA)5P1v5n#4Ii1le+yDvz~dYBY|J-~_X{1t_CWMs
ztZGqI!SgS=I@FE$G!%dG?iyB*$B!>6$PdS~xd^2xRELw3y=#K%PKVO6c>@4R1zJC%
zC{-0ozCvl#o~$F?$WUl9Z5f0tS9l4S_NjPT#WaN%%;ZAKweN9NbJ}+V^z)<RL16(<
zAv`|vlY^bl*~PSfTvojh^dG_I#GjFD`WP$P4)M`}?AIjyM|B^#S|vo)1vjzK9~C;y
zc{O3p<A|u??M)J$_dfj=q%q3|bpz#hoJoadDz`DHDvW1XA?evOkw$&%SYuS>)t)}?
z59$<418)kI4kh)dL1%G<<{0SLNB8mqxk^BBtestOMT1VIHnnyr^s^7OcR6NhfC>w0
z4{4tlf<Wk3KrPL|q-Q~-Hq_5WfV3M!OV0zC@!wM7$8gsPt4KGNM)<d<xA4?&yCob9
zePoZcc>SZH52q4@tPu8dI?H)i7W4~7QvFa%R!*~i@WF!68s_{U2`rlb6ly$Gd@Qcl
zvHG#?ACjxXR7=&@U##rsq;cZI>!M6qX#4d^LnqPG4sn7~{;LXn@HT#K!;?~$K{wjB
zAMw)#M=xinr=Y_lUDgsT)7f^s0qpQL1ZLsc3c;FToSzF;T(15m;+OD1sDnnUQhaw^
zSuOyyK!w>PKalVJTa;RjwyqGknqt(uP)!tZ@1uxH73gPoWaKEZ@BVhJy9(^I7+OPw
zT^uW_L>L#<W-+=d($7x6!8or4N6aAYdfYo)u2yg)t4MHj*R%{^i&C|gS@7u)CUz?J
zM=0Em`9MplGAR&)mQKuCL4lYqt6T2XEJqmE6-rjyKw+5T&mZ$N++fxd%2kB;dB@{r
zSA~Ev6%?iaz*6u5g$XQp&~h;Lqs!`+n-$nfl{Lk=51OT@^!3efk3jkDDSktKH>bEY
za}h{dwDgdMGHYDF{QRJyQH8nAu6(r;pj#g+245bv6N?bgl}~`qmL3gQAKlRN#{wJb
zeBS1PfK&16Jnap&N}&GJ@~<OO4-qHQK>DtVauiUozuZ&7l0}SGI8ba*-J5>?q6i4%
zG$Kjnwjf<6*e=^9!*KHLTFFA8v9s20;g+y~YS?s9)u7U=2?VoV@elreaPRQ>W30`u
zIi-VLmO*tgQIabPWwKuM`Tnm$g<vJ_$48`nz2cd=EB|AD6*AOgh7H3Mi$b6)5&+hx
zy;=tvU#i<{{LW$!TF~)SO~z`^D=1*73N00QQfZjtX#C&v?C4jA`Tf;lLm~zg{$=6;
zg^I6x-`f9y*w6BqS3BDMbwwS}IMhvTfjTj1b9Ak7OjW=?*?tHyWx1ase&;ofcfUhr
zilU$9EV|C-B^7E6QqgY~O4d6lV@~DtO~=R7_<#Nxo_&B}1?@=($b@CZdekM9n;q4D
z&w{)_JF`{5+B^9<&ujWQq5Uq=f_FmqXm5|MH;*sD-Jav2ha)!h$X;=j5xy<_FuHu%
zUD+q&k^gVOHI*JP-uKJ81s2*WI{`8*t&P;r&ZBSc=<zrG;6I-)vS2e1ShhH=fW9fy
zzU77Pk1#ULD%F7QGXdU_1kNSYhAt1*L!nJ_q%Kdpffrgjz3el85d*;^q+gx9-^1U(
z{BO(&<?#AORh0&7H+(bnVhH?zwGcxB4s{l#=%UIZz)~F!0@jxFYtMz=LxspmW5AH)
zRNHzI)ez1*XtO9*Y}B%9rPH)(9p`6fvx2sU@cM4Y;7b00vyg|X6}(#DMgket914_J
z1u8(;CaA}emAVvOrE4phJ0sPOz6BZT$g)F`(!LHgiI#j|mBssbgoa+e+=h^b)^3CD
zA8!CxKUx3i?oZp|&x>n|RcxNg@hZwfiYFJ1+UkOhs(8KOXa)Fwwh@{8OP2Ix<7PdG
z{EkVR?t&^*kk!6B|8gHZdqHz2FrvRBlQH<IV%(qdCZY37AX6XrCTP~ty~~O!1`LD(
zN4x>-GRs!zpF6T`82IU48Dl{XqwT@%1;k6}!WLBwAx78V8q%*$KRiGsP{+;sl-)l3
zVc&hhyp?-WQ1N(yCp8DhXL862q?pBkQ2kdO=(Z(w%DJ`S#w<sJbdwAiRECvT@lkM!
z7fUMWUKQC$w+=Ku2kLLUR^$Jv!Ls4WzPu#sKL=NBxM%0u1=i-7Pzm;AKoRAHax;Qe
zbI1jcSp(QkzhE%s+eIN{YnK?>10b<2iZeyuSlRQ`$ZA;BjZwR~I_N;b(p{l2f_eos
z2p~B$1<>;0a#(Gt<6Iy9VPe_YAh(Kb&DI|r$PC~l%Y=~hOchY$H4XKb7N{nz-fKKX
zIyr!N<hCn!e|>#5@m!g+VgOsN^z>Af&y`Nrgf*ZLxW@09JhXi4+zoHEQ3usB9`AuP
zX_bL;QS4KZ<O&ZVeuI4NL9I3Y$P2nPqk$fVf*@H;!xmmdt&VqT>Z%6xW0!OxL!z{p
zRo2pI_9)Q?Kye8&pyZY+kfzFjkWElQnd&)}SM$ja;TMMNM;A?nV`4JK0?gWC7wBRa
zs3c!kgZtG$xb~-K?NezO{BvDgT&?NCn&MPt(aJhZoDFe>Xw`4{sIuhvra)#x(Oi(z
zS!+D~5Wm)8CT=KvbHBy92$#?Q+L)5uRlK;XfO-Y3f(Epm;&1jPd6WH`KmCT|Bf%;P
z=8aJ~4X7>a*Vov@&|w_46dL_2^5|tai*;^d#5dJ~WVxk<i$V}YGKi^JSBH6cqF_@%
zkadM~gWi<j|0UwxD#RXAjQ;x{AvHN|eX-J9Ia7t(>wKmn=Gp8u?NX<@HO#LCOgvNf
ze4(1*8UJ*DGnv<JHd$8-U)n8k^)$g`z4Nw5tD*ub*`(Se3PXKGfy=0Z2jMCod?tkX
zUtor(kDrK4Oi>Tj6077f{1a_ZccudG=&0vyxFW1Qc3TAKEV>w}&{->93T?P#09ApB
zWagtV#igM2LP(uN!C>AVy2G@-RsfaUA}in&(2%3)_y_+ka2!kzXjXeRrb=rH4)54$
zwNwF~i{j?jE~Qg-px>R2x1LSPHOd1yZ2eB1F0kP%g(XW=RcFN`RY-00IBA(Y>MBUT
zI`GhvSd}o82taa{_baCKBG=dY7^~2kFfMyyP>7W()T*xn#p!U3uf81!lkIh?0@y1O
zt5{x_s9b3Jn1dBZO43$<gn<h<{S8oe&y&Gj3s^5@DWyFr`Tnwa_$|w1%PP|E4yM7K
zTXKWt4;hRK%hph3jVo&xSox|-A#ev&pvITVpx4JM!6p~umS>$+He3qHUM-MXmAthg
zw$JYh()+`+(Kc}`_?fA|+-@@ma<Br-Uzv=t3gHECDOR^?-__T2J5G^rquf#q0G|9c
zJy4J^fu10+#EVl^AZ4Jj%<D*VFCb9E-?&*H4c7sTA%@3WxZSrcfDU+}n5lx-yThOg
z+H2(d7nv$fF!rDcSzFW3E<C67B;07H;%77As0FB#u}%*Hmb*wXrb11~2BbJ*<BsNS
zugPp#i2LJzFIx7^aw|H2dY3(wY9J5I>cWr}*Uys|vurI0*Euw+V8uRFt9{x8wkurT
zlX78J1V9Of;rUUYfBhX4{$hyYm_@}(^6iVXAI0DpP{M0#`rW<v@3)%}bPV|QP+Yl5
z4Pt3hN{|*&N(>89*{_BZ%l<yID|EW^z5VH^#g^eSd8JfA1F(4}xDGX?(sUH;R}_~}
z4^A+P;$_0pRaI<ROhuoXD$vhPd7P~?W>*tXp1%6G4BzRpmVrth3vK~>5BDCL=wVQ1
z^eI`m7+h&ouDrA<SZb!7_W*c65v<=^SU<ZLWBQndg8|r0^5FSM0Exf6r9T`_GH%jW
zvlj?lSH^&C8NCG*UzUE-3s75Euq^eVqPK8mHfU*4PuG<W2xGLGOzJC0fyBmq>Q-ZE
zS{54nKdJ$&j26hs(zg}HabD19p^mn57Ci4=s#he+MM#69A^$hHw6ZI6HM+F4R1IEh
zSOKgrY)r3kH~`1)eruLx!(*2$t05M<eG*-O`H&h=2w7f11=7hxUkza;WDvHwY*q47
z=><x#7L@>(`&D(QpOCWr@q;mih}loWE3{XOm#aiGsNyXsUf47zo~A^Pal)cL$xwi_
z6tKPQSqI8{z_GD-wrGH*i}V_#Vf%Dr9Yg)<=-0jv!;45WVjO5pDS^Fjzgr7=nDs1M
z%tyV!0c@fR4AX)j6_CSLU=E{c=@!iI8Rpg?OL8ld7{l~Ku>F1c5RCfeSt&rs&5?E>
zuW2OQ1J#Ob09~f6G8KYqOPWU7-PTaEBNJ}d6zv$g)-nf_1yQal`TR0*K&l_V<N%NZ
z<q~h@4PJvC^5XdU*-nJTK_ApYZ>^}U7rGlMz>Yhp4)n7#Lb!+gI;D)-9t*86r1!RT
zjFO}!EG9$(3Ial2g0-^DS-ENgJKW5<Va7!pfbR$en$co2uB&#%1yE#IC={T(D$vi_
z93mv<dVB^%Tx9M-dJf*=Beor4k5KhKUj;Nx!L_i61Pi2?x!^2A15_W6y2nD0r*h_$
zy|7)hK9sV`4vVY@bs(>$E5PdY9RRf88rJXbevd)BjZ3L*id5F)t*7^EDzSj<o`|0z
zUatjZX@S*_1|fB<SpF$UAbueecwQrHR9?fGJLjRSLbt14mV+@HIJ{OrFF6CX&a;#*
zt7KJ8T+BHN6l9#`8#Zl$^mAU+ukOu0zSuYY*V(~o1E<Rwm-xZiadp^sS<Tc{sW#>W
zHcSi&R_b90j)qveu6}>W2!#M{oqtL$(ES*^*qZp((a{<$VZjsJ#Rq}YB5R?+Aq6yp
z!N%nH^QRnZ+LVF%z@m!q!w}dq3}{mc#PNIZT+#8~G6iwX6v35NXkJ@K`d<+V+;ZHS
zH$Wj50`^-k$#LuW>yJ=V4)}V`aqDOUq?Qo4feUY8u(4=f7783vvkvG?)kR$|Ynr<o
zgaeip2-F61KYX$n-;=bI_`2G7Uuns9p$dgETy>~BlY-B**UqjPL<$t&<nbn;lR~^y
z3PBw%kTk4)erR3=n{w)~Yrk~y4bV8n(65@;MlO#Pj<i>nbeRcwnpVLx7J~H`hC@nE
zXtJW3SVFTt58sU-KcFwX1rpSPV!UF+|Nb-FRWMIL47m*=yX+>JTfcM&87-l6#=@4C
z&{-!CbmHUV-+M!i0zinvO-~ib$70!P^Rs{D{{mQq?LZuYkbq5i?gt*8O#Eed!s!;d
z-3}gKD!K7ejn+`$U97E_P8WBoEix94-sP1KpRtxHOIY6oXPXyWKBgoFEL0+}Xq8Ks
znY{YBI!vjw{oR@H@bEC$7m14DW@}J<dtGzuQkbppkJ790se+PwPqRSFXiG?H<X6=e
zG>P!->93QN@WSZz(!`7|DkCmtpdXAc)fa;OOlz9lx0YbL+vDRSf4NECM_7MeUclAj
zC2_618MIn2Dmz}(ZJ%|DRTYImHCkD1UZm5aqFz``(^AfA-LJi>8?LMKbyZ?bqBgdx
zZ2-jj*WoS=bM?<Gih)<$LO7<Z7OB>(XM0e4RE&!cKnv7|_xCq*v$?P4OL^<fW}9s<
z2E(8g$QzdVb)gi3!3tP!R(HU53@B!Dz`EJmvIQ_$i4kibpMV%wNpXM`qLrAq0?n?9
z)tro1xhvi)>9&uSz=oUi3{*KC%>ryeV1o(4Y-YRK03Q_TW*#UedCt%fuE+-4b+T7&
zaRs<E4p!T!u%=-+sTAKP6}_|w--C+J6_hQTph6Vz9FyT)r_hW2AcG|~4wDu=<%AYk
zWlXhOS9Qwz(qu*YJ#~ZpOE6A5v#o1(%m|Ke0N4bBycMTpi!5l9Use>fab4{=eSaPf
z-P#|-)tPA(BLE8tkE8{QLMevznH5-F(o355qSY@d;sBl<5E;m_x_FK1;!^FCMdT9b
z*B1r)*-fF|^h*r2fEWZZ!DCEeU+>%IyJIVWZCq4|z6`>~&=Ca8RMdzU5-gx21U&oN
znl2;OnY<K!A{Mk|YD)qZN8L@p`zAH8zp7Y4rR%dZ&rdgQ2hBh??$uycr3W^|NFLnd
z)o_D);ceKSWLp@<Z!5=p1qAWF7-TGjW2DvSY+7I6xWJ^%I#w?{yZJeE@n?I(8#df@
zeI@bXW~jx5#sLM?uL9~<f_j&|F;+@vjO(TQbAPf%S|=U+{a~GwcH4+MXIg@<H|{te
zAfOy*<E2bz*$RytsngJ9!#?<RJH9DwNoE{2UR&{hlYV}+3<bEljMS8k!qaM;7H000
z^bUA;su&D%B`v_F`?cX@b<gn(aqO`?x``zsi@V?wn~8geZn0XG=cxz-K`0_S1=n!y
z%mn$-mM>g(3<4!Cj&)03PF!Twpz(oCLVQh^)rL>ZQrRM0lnw@75D0QX^~%xdz(V7w
z2jd8&UDx4u7Jib*Vf+>SH^Z?wfNu5W1NjY@Wno&=5Uao{HAm`Y3)GE6#mawvxT1B9
zM!ZPAiyKxK4{TWyikE}Mv8%INU@gk|dft|A!`PgB|02XjxfnDyuR{$%`4CVZpi$(w
z87l9YXJ8_t%q~=DUqQjR1*!*8kobm54PY}~SA;L8&sFYT#ka~VrGi}14>BUD15{yN
zd8lN|^$!LI@Y%hh=EkCDE10<&d<1g6NpEukpyK-82Y(OS{ooqyR-u@ck^+?&S4ibl
zUWW>69cg7f`4v~|6s>YpVh49CxS{6DKO@cI>UjAdBL%K#s=Ze5KE!Q2IPX_l7OW<^
zGP+&B;zIwr*abyD$XHb5u5W^hE6W=)Z!WIx!s>7iSVYRW$811}#AB>tO_9QPr+s|3
zeUJ~q`N6nHyt)SXcLv>h@9|l1;ewk0GMdnm3u~E*GMYrcj9alkP>0;5VLKSU0}-}q
z@G|~cyuUv?9sl{7QjCEMfeDy!1&c$vf6K@<>9e<D&EtVXS-uSK@95AKuIY%zjSHL$
z7h^3|>{}WVDwx9%$xgPi5BB%1&d##ut)xe3ZWfoehuW~{7f5;e{p}XgkK`m&Amt17
zg&wtT;}y#x+J5whr2*crGpxi9&=DsX$OGKHHI+agGWKU{s-`l*rB2quqSwy*l!k&V
ziG|6o;TAZyXbG2+!4$S?bQ6)lX%wP3Q$;WsgIFL3G~8F+zM_FWuD8{sSz-DPOm7)0
za2Az8%SQ?6+O@j(`3v5lKmGmx2fM=x&7|U9B>|QwwAZ9H$wi`ya}2QTSX_C9BmXk`
zNvCLgF)OGW6D;Ae;R?e1S{z{Q6FH~}YZyy{6O|!8POqpb@7$|1?!4HwNERZXX<!Ps
z6H!Q-fw**@7puU8yWiWG1&$qT+d{Z#s<s-GUx^o0OrX4UG$1S_%~a1&D69eN#!kJF
zLK~LeFZgoe#265Sp?=x-3!r9-Tx~MvD}jC{{fz?Xv{5llwf7`(KA!sf-{&{G-EAM)
z78`uKEmPk4LM>L5CG9QVw4~uZxYu_T-}xj95!UnVXU+K(4z+_aw}6y@75g-XfO1Y)
zNB3wPzy8F2%lG%k<~Hq3xEtrg@Cd=)-EOd&x1ZB(+2F-UQ_NoC({Bwe1PVq+n#O(F
zaHKX;LAfjoEtx?ll;RlJa-h($=$C6dZvvrnzpdd401Yndr*Sq%bdJYApHjSDXmG{b
z6;wmesD%bBtO!_KO1J8(d!;|rD9UR<-!#6lw%iA{2v$MZ&~;UmV+g=O(b#5vi(I(U
zHopu{S9Sqd2*o%BbXdXoU;&lO`MrWFyUtLP8*_sCjUz1x^&>&O^Bevr{q*tg_~#xb
zjb6sTGE|_P(g7@$2l2N$S^tAxvY^BWPgaa*2~^$}uRxD7eAPAm{_Iu>3jW-te<S=Q
z6-EJt0U0j>gZvPEfK{N>aSPUUfEtQh!Ta-}b@n`u_k4Wcgy(b`e}Ob!3DaI}M`{-(
zOz{J?P?01fVA>+SvHc!W2(3`u>J(DGNfYVC`O;-7U4Y>=+TJlyCf2+t3?Wtwt}9T#
zQRJc<R#xJp6mJ-XZ@j&))Y(<*PJqINlMb52^gZBg85E${K^=9coK16MVRwS~G_yT3
z1q4Vl@c7hfzFy$s-V#kvf4k^9@&)0IHQ>?%j@fgd#d-0@V#5`0g4#yt0_vMh3a9=z
zI4x829W+@$q`&ZN&0zDcRVc{Yw_t?`jI1nC5(vdyPI$0>Z`baZGgDzmLlH|!n8_;E
zw~S$U8p~@qX7K?-d?IicZ!J~NVzg*BY5;mmyli>7x+P0Mc3GUSinN@=Tvl8hcqE=i
z3MFDNLIProUFaPPUX%r$NHVhZo2&CPP){zgFg+6HeCH(z^YVCIIg3Ia^hqdzW?|+A
zprsu9`@4yeJQQ3lsB^`{=Jn<Miij+FA9&E-@s91UD9B*DwiT^NUHY?9mz%Eb^bMen
zJ>ZR=EIR6sR=luW7=~kS(JPj%=!edr@uuU|T1Hu`qA+t+tXn*RuyE`6yT=&Lx0KoC
z3BZ6`j0-?Kpv(!1g-vmmY+KiDAMB6}@Ian(m2I`njji0f?e@ZgKlTgk*Ot&Fge)Mj
zUvNE5#~-a3WS^`$$_hatcnowWoON#6u<s$LcmI+F4;xAut*(tQiWA>i;JDM)5H|~+
z7*??xkZ!36E*dWjq^^~G0V8?b0LN97*_J6X7?VWLPi)tnDsi~5Ta)UULY|lbjlLp*
zvCFg&EXJxhKe{8A`QaI&gU3^8;jDJt!yE>*;fq&Q^vfzHh#u8o?uM*^x*wc>17JN_
zj8g=(g%n}73sDT?fcH4I-)k`UIv9vkxh&2gP6yX1)7B7Bfoyz?s?-{6Uep->p%daU
z*pk~CvbQk2rv_@n9wZQ6hj9D^FtL|k#eLexO{(Bb$z7;@YGX94>%p>y%kwR$&I)qp
z@Rt)s@>-}}rxS(K_-DSq9-k>I<8Km-n|1vA@ocX>jkqIW-1Kp6wcSUW6~r&^)H%cs
z!$~$nu403_5e#8~T`TQgi?V?s$D?0?Nk4s=|K0r#>#j*d7TF<ve4F5oqVIlzlZ~p}
zEkIR|XWVU$U+Alr8~_w5EKGP%uh?5qrm}3znFZbXk(Xjrs0CyC4~bv7Mb)Bt$>QHt
zoVY~;-IFqq6aNbTd|sC?-=zkcm$KJDXWfOWc;Yd)LET7AhEbgzG@0#BEID8VKfYBk
z*icpk7l2bteF^XizCZh-=BM9soDnFb?u01AX^}=7>o~tV=eMq>pSrbcbmf8M4?Hi*
z++XNrrfPW;)cqtiXaCPvO?kM6ZX2k_6%nd1i+C#4SZhGHk-qV~xC(#{*9eW;3oyE2
zN!x%zb<2iOX5AhF{VFa2x}Y|72;*Nuo>q>ta08h5>N8#XbBaRhoJpS9;Z>Mme?H0E
z2dc)0!D%>OOd}S!pm1fqwltKIeUVIa?6WuEJi7le=o`QT?KC+&lNUhT2*|8uG$og9
ztJJnQ#=1<TUYB%B9>T-Jj?+PA1=s`Ltwz~E;HK;%<uEI#o4)=kHw}+qah4{WO&IPE
z;hK7Q3{WNT!(moTc7`Ga6Xa}j5&x-g+ogD9v<3}fjVB2@RsPRdrqrWkpq2?|e+6bd
z?gVGwJyDRK$wL;6nr9y1$JfVy;AeiDJcvg>>pugyu)N%pdh|c=V~6Lwn2ygSvYd5-
z4p_lPlWf88WJYznS<%r!fu(www5-iN9IJrIZ95nVp62oTiSHy6G(;{1;hja}y6Sm8
zJkeleC@(SmN<XmpuuJQmDH>1(UX`WxoAAc7EbgzVNfH+V(oWD*@vy??)vr3dTDA1b
zB!tRw59%1)QSMYB3J-=M>8h;3+~Z6nhUdcrjwqh)nvYUN;9VK-%D%YM%V5ssVr~uR
z&IU+YtKt3=H?SCVLT|)stT@qcUql{)rJ?E?P!c2E=MGttGmnog{?5<KS0hvqgZ2@u
z)igeqMQMz){Oxz1oC)T4w<1eqHu_Tl;#YWoHt&bh4{q=9gJiKpP8#%VNr+rPO`(Tm
zU42QvJMWdsA(1BGlKFvm@rV`jP9+n|OKd=W>hh(20CEv>2AX5Ujer~U0WQ`<;V-%0
zAP4#pP9zs;rz|5*F%l3etNDGvLY~5q^W0t#(un^7$BOt!T1L;k>X)f}KfUHVK({Zb
z#6~qq7#tat=1>7v?dktQxamMxn~b9gMXRFB=tP*Ut|!#*+$i?#n>_$!TfzaVZ#;f}
zh>r!`qqD+(+M^9PyTt}g57h0RzW!~aG}2&Kl2MwJ8B1AwcuB^*z;6!?HEXMxaF(;D
zV~XL2%^ovv?S*K8@>Hsr@D+vhJ60J+{r^LpWtE}OeZ_MGdqQ2JnrQTYP&&qY#XN`7
zi<5-yr3y@3(J*j_%Fk$^Jn$YuJT)!yeL03I)TZ~}nzaF>Q;7D!U4ssElnV9esuuH&
z>XHIhzhIpG!q8-@GW6)C=WS6A(f#rswa^YfT+iLcJ*?+Te)I2kIF=`82UZm@jwpNM
zsf1<;m6<G`Qdv*LYK$}{NaXFx^c!}S6Q4J1jLkRc5WtKaq?v{l=Si25vq=MT6^xrt
zQy>g!WXK*XzO!9|fkcN=5PH%;kDc$)8SD7gAAj}&=8|oAi+FD$F6LtVT$XC@ZmsW>
zi;FS91A{`4a3P_SE+|m0&qf-x_1vs3{kXcA8{nYv>ta+0+8$~jtQ|S$4Pdii+GI}n
z@B6!nf4hI*P5kq}@Ye#YZDW_9og&=(FxE|>(u4#`B`=^%UPt=fg=Zg>AGH4^y7@%d
zfTEqZ3b=6eB`q|m6|kl)NTt+pE>9E{&x5@>+-}2ddp>60Ol=&FWso)lZG#%=3SDMS
zD$zD;K=aqn)3N@2i2H!&ZNPm>P2SX{{*p$`EW?cFB~ZJ_MyGFoi1VY{qZQ%e-d<u5
z#m?FjX4)pCqcPS);k*^nk+jJbTD~6Nr1*!AuaD1dxcE%r_kSL4PnI#}_4fTQj^8Q)
zS~L<$Q!=r5i)Wz7P`H>j?ePL{PNMs%=X4J*(*M2a0mY#!DjE1y3#yzW3LR}A!n|O0
zs)`k5Q;aq7b@d^BKzvO#{+-@c?sV7|qiV1KR$%|(O7j;{3TZnrd4E)pg7=UARn71G
zZW$atwaTEm+PrTIW$F|Lpb}LD=D{$J*8{{YZn#1)LXRQ_(Upu5OW~(fLS<-G1#ZO+
zSi=Ty@270@QSTGR4qzi#bAUAy&Rehw8+WF#_GLh}hRwYjO~UQ7G~>}q!JsA`z@??h
zLh>89J4r~l26Vmwh^!B$sqx9d%tf=|d}};Yw;9JlIWz&K%PNatfG6RcI@a$l9()bD
zISfQ-R7(q&x7AUB2?=w(3e1Y=&01hpA`#77pzgPCWBzZr6Bl}8SvXI`Th+#;b*>mW
zF|N3+OmZnXhWUXC6n_|aiTxRj8eX3w(D+--s^gy=OO*>|&W<3C{Rx8x%V44d1E(1&
zh+|B$4)9NS|9^g^;fJq}uMLoKtNwX@e7f=Be;e4Ke}<PL$a+JI-OZL$mVD+rs(A~T
zmckr6P6=Z~81bZz6Pi>+s7b8+D-F+ncK`gN^`G1Q?e@ps`JX@R{zo(R=ceIb-Cu9_
z_~)YIG{qy}?Ty+k<ws5O$T=e6s5%Y3kz8Spc(zvRGYuJje{^?qe~W)TJg`6dF79_0
zJajzO@W@fO<q<nj%PNl&MM%Qrt^<XF`9cu(2WG?ka2ATL2{l}tyE|}#+&LdAwD<GQ
zE-^EKzM{qfC{Bg3gmuecB;|WGoG~wvaRH^dyM<%+k4#D7VmBy~tw1GL6<fj4*9h1-
zXrNjH3g+3bRw)bW{(oE5hfPz!4?=!mMh5n@wty;Efs@c(6=)2cke}kTgFGZACy=N7
zlt(_f<~eWT3A5#pMgf(hR5~+FQ19?kzP!98rW<O_NHyQ{_hGmukf>2+1pG3fL_v>L
zA>#%m^JGT&b%-<mkG?whmh9X@=I3r`d>_{LH?M${l`ym*5TG|AiiIt};xt*$y2_#=
z*k{H5;#}sUB>lrK6vp`?!Zlc&r0ZJT0%EjLt|CRxjBQfsJic@Gx*EqxpmPE|Es~JP
zLU9aA{7Kchf--eWU0hMUL3h4xLX;R~ppwo7aNo(-m%S)1E0Nq1MxdX&KrwK_sh4}J
zd#>CrpqEnF1qlJ3Sq_6xw!~Z%sZh`{E^%#Xp`rqL(h&Mtb_NEYewLbSd8yAEffY%#
zRN3qG5M>Njz~P4>|KzEl6P7{h1sNC1(x|xueI&*Aks@D{Hz5u25ZK_o5AX5!xqTkc
z>tW+5P^~j1F$}OEkz{FFBH8-FLI(UU!tWdfb~32D4%9eP{%B--(i^e>G)4}(z)8iX
z(zj=QSBD8pdVXTNNg1j1c_mom0?tOSU{H!)bZF$TM4F3f3F&uCN^W-}pZej?+iQ%C
zv(2aZ^GCn2)b-PL$KqvF$rc;&{@Ols-m1l<9ynBDk@|2&ky3+6bk>`FA%ie_e^Rd=
z3~qY;60D}c+NWbxhx&p3nNOx*PW-f>K2m%HG|w5(=4`PE?CJ{OeOHHyLSEtAc4e)X
z6^lC{o0088O#_g+3Kfo)8U=m**zgaD5jnlJ;evJE7-0$29b?MK0&N+$V3-Zw<h6DE
z>dXM*(?bY*3(PLb`Y!Eg=KWfX7l1d-3)s;df`oqoCF-y@Su~Zdw(V#dxO2qjk1T76
z)$la8-GbFY(&FBoo3oZ-F<g4Fm?qJL!1VFvpw~b#VZdddc0e8JclR2+J$JNtYlV`S
zz3w5^I{BxyAl>Qyho6M58>Xam0~EIaGAer6Rgr$7&`7W=;_Q#aTed9p=4F*dMqq3*
zrmet)K%w)+;ScgR8rB>3<YZY3`YZ+mIn_Q!`i*f*8h{K#U#70+(q`Rq(4#=9z5vG_
z1$yXJU;<Cc<r$6rcPm`6CSD2VQ?$iy1-DO-%&%i5EVb}E{GV-+Tnd#wSfH8vf-<>P
zq@Ufky2K9y*M)~B0Scn@eQ~UyG;|0NeLz3Dx5r-g!>ctZVg+$eH#h>@iEaJKbM{FD
ztET=*;HHE(>j&zjLa92!`c9?w2_+<7(EWROiNV_7-)>Tex2G^XSs4%5pW*g2Y9}vs
z=X9YvXEt>mH~fA}xQo{nYMF<Nn&HMT6)zTG`g<@?Man<&{pZWzqM~1E_&49KCDz2S
z>6@?)Y7jp8j=)Ax3fz9*e7%Zs(&B?IZ1EIe|EEW`Y6a_er$KyjM;GgQ`C{ctK(VUg
zB4F81y#f@X0c`cZZ^(;;YnrJ$2E}_=!LJx|f)=bXE9BJc@olst_!)O;X;M!1YYDYa
zJ|AsRf8V5h%k_`0YmvfQo6BuAs1a;@0OLgvf-tC>+9#RRNAP|hsg`Fs;9EWf6l?i#
zskC3KNU@yx_$ILF`-J4mcpF$gyd`OOR3;tHIuUGGeeFR7KP_x&H9=hmE&hj^Q(N->
z3#`112T<U@j`cpMUc0+XotDY}&y2%LyNbkTtRD%|Oa&Q3LZpJ=5Xd1+R7ng6quiIp
z=$GC!6slO>tvDgQf%JQUG~H8n46^R<=UX`dbU3G|woPhFv!6Yr938Y!bFO7gZ3xMN
zWG5sHSGYhcbF%qrHB^r}y{;(mT!Z@cQBg>{m=$ih=`7J^+CbU^jcC81MmTw@zBt^h
zZ+G`A#kaMa4&4p5*ta}S1=aC($KycPTnaaRKq=btniZ*A(6qRTVQCgwV)!9beC?z_
z&=&6X0z1dxrBDU?(ODf+8MLW0$**quu|aLVwfd6a@4R;t)^YFhF*dAY1#S+^FnXUw
z6I}Sk9kC^-yt~5je}<iomKB~NM7A+*neO2W(f4wWovbfSYGA1S1?JcrmbXKFM0_A@
zm>Y*Eq8tw2hm)>Kdsq{Zh&5p~5o<)H25)t7xWe$0l2PFRLvYgYK=k<~YuT|~NI_t_
z5yJ0Gj%?}D5U@pnAg~tCyB4fKw+0#k<g|9dB?uRb#w)4UJ)m4A>C6_O0z(5-Q8i;l
z?)Z+qB1lY-$^xm(r@ww*b<fW%9(;3q7~I{DiV&qs^m##ID7`40!D1l!>Wb>NEn6`r
zDbuuEt?|O=0kx<+5qJ%k1KDjiV6v)emCAq|X#>)&!uwCp)Y`0uE~-Yin(=|nvQ%}X
zpJ{N+@Z#DB;P<_to2yo$Fmc!>i&`{doJ=a;ElA_tXEoFdQAj;rh4=VJNxW9IJ{<~d
zXuK{~l^iRRyvkaznhlj#KNCl;sJyOiwJ1`v3b5iYb)erJGwknE-#_;u+=6*G<{jdy
zzk&HtQ2YI?vDSu)9{K(SQURB0{8ueV-=M>-MT%CoyRsJ&g3`H~;4Ir3I;+uFVaChf
zJU-s?`(y?Qv=Kuc)xtGm=F<6vKy}3U1#DnmK$9{d{T`7b3%^AA)hYX=UemzEb392;
zPlM~9o*eZpS{!~^a$MwwPvM5!LxGhZDVA8o^;P}q;&B$UAIVt6Fh6vASW4&5NZso7
zN2JFnp_SD6FsY`g=@mUhdO_ru9DYY@1E5Wts)JRa0QH`chQu7%>KmT!!R^{Vtuc>#
zHV+RsVD5s6VW9RPPwKP?)Sx0<u4=DsAjw83Zmizsj#0+>vA4FQ^7684`>_+Sj9gOQ
zq`XC!wJ2gpk)|7K+W?m}B(1+dgVk_U(oz@v9w=0|K3}e@BgJJE*;z5z-{Fp|-KwVV
zuliMLja=d?&W|KgC+|ItSYLiNq_H7TxQA5cA^<`9v4=FKDlBHJe(Vlyj5i+A$F?Nm
zR=U)CKodD%v@Pk~aPJ{r)&2PF>&aS=4Hu>!HYDliZMyvWHkGs6zP{nF{5fvZM?F$;
z!VkXwrYJpG)1p=(`V<A!amEy&SDU3D^Mh=4E%&;M)Z_|Vb(em2wh#7i=-=P%J@;It
zI~>nhbO^S{`CERa^!pcJF|$G`min?{FYq2S*5N+vl-ABt-M36DSL>SNfN?yd0cuQG
zo#}&gA<67*Mgr$eE4M7F2wGm$S@}3xSOuHipk2_Q$`M{lZ=zqSyq9)>1hrT{2DWPe
z-Hj~*Jwf%qK_M!2>gmQUA}nO*#`~aQbt6(AbL*)V^ghhL#-IOleayMEQ=nMOq>zn@
zZ1rvMa33D|ur*3@d01=HPKE3Mvn&drok|1N*9`^E#UC%2Eo%->0uWvukFQ;^2a4$z
zY;bb{1|Y{kVY<*b$K1;@S|Jty7lz>022k^=mPpg^QzO!Tx?BxfuCBc-Pa)mZtEOvO
z)Y_5r3<~fyoG2z#;}Bj_JX@I8k~QVU(}jtPYeCupRS2rVp_Q50svlU&s;{8Rd;z!|
zWmTbeCIx*4-`McuRA5U&j-V_qf0%j!3AvUpKYHi<;v~t2Xy_hN>2gV+3F+^SXzlr_
z5v)b|0*^lC*EOiX!?3|YSD2KACOUDr8oF+H?9$^eR{;PXsw&!UXO{TK1+2nXg(6j}
z0DxhowYsdUaz*%gShZHW+=ZGv>}7)bRY3h*LXD5V3xD4HpvG}Be^dT8*k^Rsxvijz
zQdys|4bsqt$n8GZJ`caPtSgUUwffzmmz7>PMOuU^Qs$?shI7dodi|XTU~T_I?f|<g
z2=LTsKL(ntD(QjWAQE3A<Q4*bWKj$6YS8O@0coZx(D<rC<7$WGJX!;vP6$_$;S_1f
zw6N*(*MWY*y=p_@043Xo;2Rj(vtPyh`FZjDV@wsJ!v>&3%zqYhdG{Z3#5Tq52-GqL
zaOv|`k>blL7Zl8}WL}(3Rralw=su9kmRlZqs34AqONSx^<iB7Bz7}XEi(4R7a+)Qn
zBK>c8fp1s<a6pg0Y!Rqxy{IV-JVu(y0e)Qy{<#}?AuCBY09A1k$phB3U<H$t&yIOd
z^#ogm&rb^rlf^9_E-iBST8BID10Na_0(M^ckS=Y3G!5(ajYtEzwQK`JOk6fQAuGFT
z5JA<gVIOoLD*lrF;4S>}xGK7wZfmb)Tj|mk4WhJbb+?c7W8AUlFM~zjZF3lYZFsVa
zHHl)luHkfbFs}`wxBh+i=Qk`*<{co)sK886E7t%_7X!D|#bDbt^2qy3mat@93s=xo
z7z9&T!1{TFHC|dK1ao@{sk5y|55v0C>b@TGmIUT5RwG8XP;FgdzuJ?J7rS8BK%t7&
zJBzbVHKZ|yWnf?F{f3NF_(cg!vR#Xiz+`G;9qD(c!3FG;mS6Lj7FEQi%UZ+)$#Ye7
zb*P_R{6K~H?Z%kS`8pQtRq3q+(ny~z*W%+fR$H>22#gd%goR6rQ@bcB4gV4x&456(
z9)B)KBgw_%iY_8ts0@T~Bp7Y-1x9rOV`+Ipw`UlG0ktjFUqc9AIbKgO!3ODYH6hgc
zB*MEe!~5_GjFs153{))ojBm6TNC@La(q>tQ?<dDQ-T~QCx@iUZVg*lp<X@~#VAaQB
z?4*oEdFv6NDgoBeeI+Rtw<Q_yJUEa!4mZco^8evLLk|tc?xf){gB~p-c&8T#s{pG@
zxSm!Kear1Av2zxWiHq^4V$&MxPG3iuNGXcL=?KYh%MbDYkGGS)iKz|^V>FO`#w5f_
zNf2IHL*OErsp?=f50jRa4w~7sw27n<CV`RcVmxf-Z)_HqVHI9A)r7&1^G@EQ#sY_V
zIv51y^M!4e{!dkm6^I<djkEi3YBwma6b#lowp_+vgs89d8OG8A@S+s-8MXc|Ld4<k
zvnu)u`Nph=saX)!B5Sy?qWkeq^ueyK8zoz+v{*<0T0FSF7)>Es`I`{8^d3c9&VW29
z5Pt@-VhC<WjEt>pehv{Xoilc3WiO0t;0PQ<i5g^o9f`pBD!i#6q!9UXIuC3|opbhd
zrE|<^MGO;TkZ5AsJ3(Ar+!c60hTS#qoZX`@QS)GUTT39jc{#f`Mu^K3T}7C8V_3h+
zlevAk=r4*A<Z%`I7~}K=qg_Qg6^O?%gp=-K7$!2q%{k>ABg@ETd8iM;eIfjAOH07w
zf6^$l4D#%uPUpqO|Ci8jb2$Az#j-J0v?as}!jH}lL`oo_?8RWGaZ&KcTNsS-qr01t
zVIh+v9b^el5cT>)A0zxKthT)vatR2GkwoG)+ND&JtwBswRul6*FF*u|&x<Jf#l3ip
z?sM0U7ueTf?~PW?qv9~mB2e{+Y(^oLH{&rvpjl4i;F~`deokygM#|}_gbJ!>Q2L)i
zPT~gB#Z$ou|MKgj+bEw0SM~$F2T`ljz4bJ3$!+`_Xp7JW)Z=$21!|6#^|ot(+b!O}
zrAcSJGha_%Lx~5%kX0q1_@AbMC{CSlf}5CZ!THr4J=-BOn63?5N_3DhJZ@KzYMxfq
z(G_)wwEgpMAx&}@@gYAEt+DB1R~}La!;=R3h*FX^HAhc#B-pP(eo&CKc5bIg+}_fk
z6^P<{e^!Jgz2d;G_clhpQuWhUr=IrE@kc6UZ6!aeVWnDE_}$5|r9S3iAQ^<(oYSHf
zaO1Vl^cA?iWGshxV~4YF^J^rqxWa7=kU&Ck0!j+Jpc#jv=QWt|*?h15^k=6*M<r=U
zCRjXFmd#&ZESFO@q1?;J=63pUHt$J18ffuhDa~-bi4xk7F+V?pl5+l4GL3qIUmY73
zQIeFc6-gF@Vc}rJo|A7b>1s+mOLdqB0Vj@F`Hdma<4`H5xj^?e;J(#sf{|9po#}A)
zw+iv7qi6g}gejJpZbLYiSze$P>!7P=<Tg%luTR3(r*Zo7PTG*GspCvB9SpyFcsx1%
z;+lSOgV#|V<Y%|`h#X1XemhgEhfJe94G5{gFg5F)+ye^s;QR#86D8>i(A*Y*i8N?9
z#A`lTZYGeXi=kqUK$sfe^$N!M0CBe@N@2Ggyv(FI9fFS9>l_9qdN7P2%Naqd>2_D?
zc5eq-uu#KHn2rD1UcrefiyFjpL<B1Mw4(6PkAvxOf^dJ<Py+od@O1egxo<SFw%N(r
z7%9%L=O_>Fa1b?K@ML=`8Y>C7Ib9P@a*E76+|%c?#Q$E}p5S5((kWc^s7ErR<n&JJ
zQ$$z8SO&-eC(>g&tXQxrJjU?YDksCX|JuQwG%9?=lP)n^Cy}S3?~s67$#zwg>vzbd
zw0-K~$-$V5cDoRH#H6E#Uo?4*l68IJMPZE0vWLYH&J;A`<iT`jn)$Ae=O22xj>#C;
zDZh-C>wViyH&Pb6q99W`<E6PAPmaqCa5u|dgx42V?Eu+aYgcHwjea%Lsi!F8|E0v{
zZ$x>8wbP(P6LU=gQ*z|-RCW<A_2^wu`}D`f;xs{j;fYr`asaZL@OUzkz{lUsjME{`
z>h5e|MEb>Ymi!l<6FI(4;9)c_es%~**c>KnK@xblw_F3xHtMEq>YDQ9{ILQ>53ga|
zkm4+^Cx(ybb(q9wtF6w&m+~_y0tKsLE6HtNz=Rw$2~@YkT=QN8PIT!DiwPxeZ2f^9
znABo>F;$N_{ME&zfbVy40;L@9O@*`Zj^W(agi)1QCUd;2L;O6zD8{-?3r4Z3E5-B@
zxDW~)SIGxgi|4&}r(lL*gQbH_et#O~Xj6qF=D;8$kEUKE9Z1To!mM8JJ}7bF{Gg3W
zyonU3eAARe)9XF1Exb@wkY5#;ept7N>ak~9PJ#7V8LxQ3$>yXmg}@T+k6~_Eel@_v
z;sWT;L%kCj*&|BoM2;a{58dBGggEmaCTV<^e*^V=8Qd(He_jW1v`heM5ftas&_KdT
z3rbq!#aV~BQA18iQ7~gD>Q3u9r$%~WU$VZ3fd^8bGs_qWbIn7LAG@F3y`;c1rA&($
z$+k{A#bj8*h-R;LU~1ka<Fqx*&jy$eM=a5?^tB@=Gs~mL$6QH{Pv!K{i6J3U$X(x6
zS2Cx=t6s27l0x97=3u(?lJE%*xhZDiHkrd>swJEDX5ZC8mV2{=3t}bs9G$KmBkRrL
zYHEN<vNyXtLg1WT1Nr^g9o|Jc_#ht^Ste`j6>@C2LjOPhi#d*?nx|lq1F>-(Ck)}c
zkd>}d?IUZAr{~f+vr04DlulrK!o_4YIJr=m#!e9u<ZOBk^FJT|`OkkM%>T?VSL^uC
z@XtU0;O~oUXf($$wlK%(_v|Mjzku#_%r)Fxi>F$bJZ=JbFRP3B`CoJHK=E@1;wGG<
z3-P=`>75ieQ5tNo;G75BVQPhCNoE}{P4J_<`?t5jJPzA_uK7-~dkh+ghykb1qU>vw
z5Z6PLR9(%=a)xVW4aj~J@enBi6%g_aS$cH>amAUgit%IlV1}~d?bGYYuW6ZR99sc`
zJ(N7h<#hD&lqU6Hsxaw8<O=N)LNvUhW`H9rK(eqACL<4KffG&<Hz#rKbz=PK9ySUE
zLg_1F?9pC|;H0;_WB6`XQC29CAvHP9vKK~2RM~sS<J&3u4om|D<TjY4%z_Am0k@7Y
zvYrLMK2U>7J0QSoNwV&Wx2VUKYaQlpO(Ep?6NuTTnZ%hg2W{ab6g(dynqgXnCB!<&
zkN?&c4;JViJkLNOkfv@0PQz1KwgG3Qa}D~-S7v~bt|S@g1H+2i9iR{^fxuP+(9@J9
zZ5p{X@tYe9q+PEac{81+_q@Y5vyStlr2)4Z{LyViS%W<|U`%Q~r^8fhmD`kXT*Mg`
z6r0ocQanf~Sm40stVg}73Ffl605!xa%KhaGyF}x5r<e#f=9#_6k-6#1Gb#fI7J{Gw
za|jZ3Owys3FsIHQC#Eq@slDOsZ~PxUo;3zY;b&Y?I8hc;V+vF)o}}bwQ^EOFBf+mz
zVq+TPc@!zI42G*11*(@o(>TE~&f{woqEIXK>FX)=w<PF)#Dm}CsUJ6K2AbPjM&qm0
z05qGe49iiWusmHM!0gRwy)v|5f)cF;=UYWr)?mL_kL3o4^8V9+xBKT_|0#j8GPvK+
z1s$yp<LksUP=b0#mbSR340hogR<>L5;-~fjFpmb$e>U{bpdTkeYg4M?Y|;D5WWqqh
zx>hDP!j+eQJX;dX!(vXtGzChzl}=;5zG1(1nPKy^rBIT7-cDVtlRqy5rznAB!1?P_
z4KSGDNhpsGROZyWSzvqQSTg7q+dPJFf!YsLD`0(d1O51Ilu#f^5*ke?4>OS~O5kWB
zu3})~RlKUKBsplgnsGNkf%z)VFw5!;;ryFFx&c9<5g8a{HH%m;;s@2AD#(xBE)wW~
zDX1{77y_+UKoaUB4*VkR9GNhXr()9ya>%JXhIri8#*5#e^uwa9ebi7;Mg0zj8l?o6
z6=eug)>%~`MT@`+(2p(z@`VTIll^MQzi$-q*ijAK%y)e<*OFjxtTc(9fpa(Um_Py=
z40s99k8PmKHT9E1`EKIG`??NET2zKlKE`_>u1&CtaWWlFePXLt*ED%9`ka8g0m;hC
zOt(txE3;bF&CjoML)H+e5~Rrl3KWyusle&%6r9v>jy#J*wpis(&Nz1p=S&F%Piro{
zWCWzd32U7$dV|EU9gP3^&xCP~^~@Vp_K;6dz(tu_wW`|pg)CveV{Fc2W^Wx9?m;Dw
ze0JFxO@q>&clFi$>W+-8!@F5W&zs9B6HcLL@-L8U4LB+}dgK+f632&ACc<|W<ksbc
z3R<&H#<D!NiPGh}NfmJjI1~XHlq{yS*!lW@{Qq*4qyqs}(4C=}&{HQt{X+Fmpbni9
zBAe>=n=w<rdOpEN5v*)YzksSJRMBIK*9J8lkT7UCGC@^UsG~6~Dgi+3Z0YO_i%)9I
z5lqA@7~)0Ro$ab<Kk<K7rh9U8tODOEzeFx;$j5jehO^_%Ws*iY1wKb968HpZR<Z)Q
zx*y!?4RFOxpyb|uus8?+<R-dzDf?0Y)fzj?wPiitx9^*8RYfx^=Xx-Que{UXuryx;
zx}OVD0?Bt!=Q+TaGyo~RNiTBIIY2#~MPnPspoXph-DlblWt!3OG_@X@kd8r0&h-{*
ze=`2-3(aM0<;15{<5)RaP%+NP!X3c#XRly92=<Y2i~I+3R_QY{G$fXvIlKULHAzKZ
z0HZ+s2&cz80BJd=r4I7&9E;CY4Ec%&fO4_m5P|P!72_6<B(<IRE-Obhd+BnZTuPl(
zh_-<Wmq}y$K)?5ZdXF&`3a=ocnQzbPFHSq#c%?X0c(nCglJrR%dVK=B&34uG%-=oa
zP@lwoLz1Kn;U3Ic!b>Xb0(m`6?Q0sA#?3m#Q5a#RaoJ;(Rq}FJ@6WzP9aBJbA`SFI
zX`Re0uOy2TX~z)DJAP$F5H>x&cw3iIp|CfHhA7>ghVD+C8cr3AA4k)jCe4I6y`p1@
z#zqcQ#^S>c)*fa2Ox@pwVz@tRIMFyOyynzH;IF!9?P#$|o3w22jqxPTVFWFaQKxUv
zn1kX|6(<Ny)_+br{G{TOi~pr8YR*o_$a1=z5aOkN{UJm+?yek5OX-tK@*IzYXG=0`
z9psqXOZes>md)rEr7R+FbU=Ua8ceuA^&Q7JX$?32eoHyN9^wwWV(tV_d;IR!D`DcQ
zk_(iY5d&{HmvAdMKS&rtm2vG{D_e2gR2xwP3iy!S#aD?2__ny{S+0{+P{zAyb3?|+
zGyXE9WK^9PtDsyw823_+qpsw<ob<dS-Kp>DDAyglP*n48uqQ5@BD6*j3ayc}-a)A}
z4*Xn4IsTDfDUIgcY2ZZ?kU01B`YeDNhZV9bKz)(Y*&u_zq=gMk4b@{kZzd4|@n00T
z5{&$;5PnmHn8V$@aj)81!(Y9-dB@*gxpOgAcvJT`{U+XSi)e}Ug-x#95Ji|u;?|&6
z3(WjX>Pt?;pT{w2-KD*`vxxBWe|>L~b7YvpJ3T1nUr%cLZGnsr=Zv!No+gN2sU(Vg
z>n{{KLi3K#^x}Vmms9tu?&h&~!{g)a<re=>Iwgje*SFj4_W1X2xW|7n-bRjZ4U*gG
z|8H{5$k9Z)x<sI1oUUu%%4OlL)C*8DbzXspq#-kVBhr-WLTlcL7NqgP{@46F{2Ttg
z|NHM-`1jv;_>ccL{wur@!{hL7!}(_6O6DNz*K+I+&1;JC#jLC21QQ0JcMB|TTfNA1
zHU8_fLI+!tHsiPF)RHr~M}n(G4JJ77rTg!B--_vqq)ohJz|Ab?<P3VR^QQ-(X6hS&
zOE;HIKzG`$v-XZZl}zL;65O|V{O%I0?x#zUQ-e!<33K)E7~@xmh2l3mKGjd}@3)}e
zZ|^t%uH)YxNJ}RPB4Y_o_wm7o&W7%HaAr0`3(mZBJXei<N_Ht3R9_^f>l6WjYNw=M
zop*LBa$Ac;mO9Rf6*m32=@0QwH`AZ%YZ1nc#vmx3c2jIl0rbidX6kL!Q63aDU5YIO
zAGWC$RSr|U;;pw(H&T{-cQK0{2q#nXt%egz9AR_bd9<=Hx8+UG4+Y?7^-dj8!bGv9
zt68MFZW`A-T+4k@P~Ms@20;9xNo*;zHQn+fQa#?DP$_Ic`q5$KMd`2()2B<oUK82^
z$-nqt447eze0YQX;ndauE+8EEI?TNS3~q-%&*ph~icB-h3bJy+;tr5~U*Mc*Rsv|X
z%r@Zsn1|lO%$vuQ%ug!jc3>d3?@vkC?JAhf52%~%mXg<^?CN;CJ6eII?z@<p@EUY8
z(+@jcf*qy*fn=k-QjRREqxXG}L1x3ERw?QiT`E>uThnS%>rymSNv+q~$M5jmgrf%R
z`zfuhc?KGM>l{$ONgN7=4LjcV5+uPy8ab?N(e<c@o4Gcc2W6KxR9)mG!Bot<UV7me
zMTs2Xj1s9;APLrXjy66uoFzW!E#W2Em`GY^UV)u9a9HS-&x!{$F&HXH4@<==^)p{q
z8+3f<M-y{??6u86J4DfG51#Bm^A%+bc8BVViqX~HgKx(d6>OqC&=Rf~RFq0g^CJar
zP@@);oE|AecL=v*!=DK2_VD7fm4<$3l#JT<eaSKnOhZU2(sMd5ttu-Y>EW*~>^3i~
zAFbem8G3gq8ChsXS&DNEqmmUMZ^G$E1C5tlQ84BQ5?oVM`u}Nrx8=NXENS$bk5Lh0
zRoSv+;tUW}?O&lC;n){{U+I3{|1CChfrkZtBmq)xSC`B7Om}C4IIKgiOsdEla)K1)
zHO1J~8l7RSx`usPtmo5|F@^cIawa~E($vHJta0OESS=i^zt&O`VCp3*g>qb2VE%4}
zIlr27a+e?fb5)idMp*U^utc$P+ND&qIQqOEi!i^>E_Ib--yNl(i}TBP{X*T7k6~xt
zuYswZ%MbPYDIX=bW^-SaiQ~??!iB+NQdTfZfaDgSy3cui9p}Tx-<ko(8>*!X$1bH#
zuw@WT_yjl?j-4V>m~FzDiFMGZTuF9A)FCqjz4KeK`M$Mf06MjTvK2c@+@@564y37}
zRRQ^MgZkxXlKrMO_6^a7ty1l}KMS02B7;_K1J3X=ceTQi`R}E#zNdA=P_oXOMPW3^
zt_~8}-Tb1v*ktae`sPNa0I>$brG~$pQZh1N=qb}bMf#DEzJ(&^%!7Szq%ktQJ>eJT
z><Tqa61DpaO5wfGO$$0(`ljf}?3?}G5UD+sDE4X(uE<8ZZE%-y*<t)MyEF%Q5N}9l
z$rr^ZNk&jp5hA!Lw@=7^pMk`)gaeK-54T?Rb~^kf^QyvWd>8ihsfh_hHdXx)oG78P
z1yz~EP+iaMh<uz>RvjcQhnTfmE_?9oiLClvpcY?VwgJrnof)33dbA3<7vb&b6t6g0
z%bh&QNvF65lZLwEcy8!-wRN+j54a1{P{1sjV9tt_!|-Qd?n*+gC|UYpCd0ux$WN+@
zL*3aNOs9<3B_Xro5mdv(;B4BZvbgPeo)q$|bPLd9wu+eRCHJaMnC%)uKru(NV8pWe
zgK+c*b(BT8?Di_TYTIRyx~+I$s<5r!orU$}K_6^ahxwa|xGyj64=F15`vTRwDD#!n
zh%%(VUc{M1;H`kZuu51_dLGWh5t&#P6_8j<5&i5)80Hp8^b3h;E2rGzNBE!!xeL?k
zY>C|h6XV*CH~ZHjyIoz~4boh0GSf>$%+qtEx!~{;qk0l$f{gjiX^gYdsyz<!_C!`!
z#9<X#wJKls^`%X*XpDBaLaOVVNK2)1=^Bf2?`CX)tUFL$)vky4==f*1z@G4TiF9s#
zn?9fX7NqvOa&F>M;)i(D$GC)aS2>QsWTLfc1CV)p!zTUcehzwL+8l=pnV?Z0q11_d
zfSSKO#Uo8nJF>v>$0~ATym6OiLD=-Vr<e)T{5)Xhj?KEyif1iLy4z7)dLs6Yw&9-Y
zyWhi^w`gd>>5teWZg~oBSsJ3lXdvDqC=gvj62&W`Q2WQwTn**t%KUr?NycaGa)f#>
zt}i4)*#>2geY%cmgVtegD~(fkDY$qzA=Pk_cJ%4j@6K>$>vP|PGyZvf@=V7fbs-@=
z%8i~C#yC(##xw55f^MSvTqL_$%E62jq>Ev7$boX=7`ZrQSC><mlRT_h`km>YD*r0P
zpKcP2y_zaL`X?;kojYO~nZvv~$P_=tfpzJr#;|d18!z}Lu&D|7&I~$*rJm_97BF@1
zXOaIp%wsj}`LkawF7L=yZ&y>#b;fD(sP@U_dZ_gu3Pg5%d{&gAq=`;GK9#k+A$itD
z6D2hS9QJ@RI3qc6k15?_;LLPYl)tG`&&f@*km>Z>fFF0{BGa1*yH&7qBs=GtQHO{^
z(2K5$u{s1x6t^+n>msH&9=v1;5za@XQ_)t>9uzgC-N_L$bo{lr>VGz;@fyT7^tpLe
zMDwm}`ZlC}(tz^^j3P(p7Bcfc;HITtgkQhhXk_@c@o7``mSFXIkXR7nriOgmmj{tC
zETgb-mSfB7>mID8S_G>ftVh-$gyW=om~6v2Sup;|$2~{MsNbZk9^*VbNY=bE0!4LH
zm8&pM#<Z#3{j7(vzpQMzqr1u$lmSi8)Js-gZ2=mN!q2;&+B<T)3@59Oc$W)e4={gk
zVNRM%cRu_{M?T94r9c_PAr0n;<PSVAH%UwEoce;UcavYkxrNgjbxbdHy!7dJrGb|N
zZ`g#sr_4A7pEZn_un2y7g#1HucKOBi!S@FTlJ(1TWd<sWNYR2cujiH8O)KjX4$=J}
z!D2UzXm41=q}9wCIK}xfOU8+!pzOu?Yt%nvbi;G#|Jm$MS?EWkfakDMpAgy{HI*n=
z&VZuH*yh!|6fwy*X95eDDa6~zzZai9Kb`VxK;t+GAYBgUcLmGWlO(n`Q?4`A4iY)<
zjy9yv9RlxnVn0w1dDs^$udirGXDCo9l)p(vm9!SR>T586FB2Az4KF}UVahnTtG%R2
z;>_a2TVc{cV!@r^!5!+O(^Y?p^7rrVM0!=G0Q6OReX9_@QT}H)lUcR@J(X*301Yi~
zr*Ew^KpQ&kQ&Vv6jN3%Z5L~XIJkhpegt4n{{QPtVR2M;LDlaF$O2*Nd$3RsTCjs^R
zE%MQiQxAR&&))h=>4#uF*>rts8^9J~D2q8dMO5}W$WtwrDPE3u#Ofbpq1T&<$^QpC
z-Ar|$AKg$=*Ww?E?(!%dqylNI-0)eDa!KVR&v0u=2bOmFM}hVB;V+d1)<rS=#npne
z&eL@D1^wuD#=h9^owF`nr{8Rqc9#!t<tnrbE!&A}Sws2x(ap5_(GA=}csTy@BHVBw
zDv%a=r|esRW_Qlj+t%&u)k`y}iy*8?uRG{hw&!16ZNIwdT2}KYmvi*eM`&AKR$qGb
zL|YfrY+6X<iB<tg<Oaesgg;9f*s80q4<`p2CbD`P(3wF0=)ZkeU;V+*iNR#W1~2mF
ztfQPmP<bKeykqIKes&I1<%UNa%z;R&`6LZn1+$&Px47lrw!febNTt89pjR9!?m0i~
zE@?1xWLF0ooC^*->PfkDgd2056|Q#L+jZxM%vm<XT=km!eMoLDs7Z7S+W!@dJLTG8
z{uiu`^<GELG>M!*ST|ks26}*!U!VMSH(B+&4s(&%W0i(z;GaPbotOjZuUD06m?8@g
zfVrrd2J{A}pXT`+aBf?0P@!;HYz_BS+)4pj)7sfaZe%@@e&{JLFfEs=CTYbg$iM!I
zL#EDet2LW5v{4U{S1@{pF>8k~fceMo&z&F8`~GYG0X6@>9)JA&>*uN+QmACg6smdN
zS+$!8eWp@1e#nEIY|m@LUY8ta*CB%m0rD4!DYfAAqgSzLU5sBQr_6Rzz0IsiV;8*|
zSROgU{Ezys4zgk{qv@<%)T`M8x#-noU_Ta!|Ed4z{;K~d_S~LJlxP3_O93zt@t|d$
zx~JJvEl$Vt%c>>WatnoD42KVr+ndZNr}A`G^lJH<I}^+?4sUPvZE=CU+$Mjt23r@C
zC0c=k8r9WNes+5k*QZtAh6c*i;Uj`J^kd8QELzYxBWwX$cV4=cVsBly)&-TDkOrj9
zD$M^*?3H6~;Vfn`2Jd)=oi3e4p;MDWr|If2H$lzt*xY?kx@}Ug01GM|AV|T4L9z0*
zE2c37jc2sLq?F|+6wP0A%;kjp8Plf0U-(JbOI6=Vwgj^nDXzgRhUB4_)7ub{hAt~8
zKPqNMR1N-fMmFmnWqBb?ZE|8*Mf=}zDc$p4THkKR%$MNAM7Mn%>3bHB9)A2728a2}
z@TX4}MPwKgronrbEx1BbO;Fjg0dHk(LyW=Rl0+o!_`!KOXzu@7;#8Bu<X3RUNn3WS
zuT)>oSz8ney`&H^?a&>dHqbc4ZJ_pq$<_GX3!5OeQ$3F|r@?}TecEM%^mtYVzN0ev
zX!*R4l{zZ24XXFf`}i{sFN2&D?O`!I*~z<`C$Bd~EbCPj*4b;Aon+!f{qT2WwW4Fx
zFK)(}{~l+G%4P94YWIpEq^%lKDAn`u#20N;L!xyJSJXaJD$e)6c-Alea=1r0!PLg=
zL#ek=z$8Iobsgs8u}_JTx!OcsdMY66Lt9q!$=1o{-EyD}H~zsBg{RaHo=hzsMO)P!
ztIhQ=agpZW=JI6=(iq2cw$$Zw&m^`|OoG8tF13@9=EpQIs%=5~IeYHO6Y?P)uSa{U
z2C)F*O{K#hQecJRTPXHH(GsvEB;SDe+>l}P{(&%OWb0fBw|!^3fC2S(Y6>&}jq%Ti
zR6Ew%es4ho7291S&d4RO;~&VQc088ZdYVlhC6S+0H)@w_?Ej7R+<3#TD{6T(O$7-w
zE2~GU8&?u%w7IIO<2wKH2(?}Mle=xQHFX@R%QNFmXaDc3n^d)Rt|*%FH0;a|JukSM
zEU8;Zx!Q$IYv|lld<kQ4qq8IFu&C{V+Q#t2fFtYEnM(hT^z~<)DLCl~=Z~)MrjF_<
z#Lv!YXHx8>7fZPBar)i&{AXfvG{gPTHtLwm5t+~lT`g8>51X(AgL!q39|x2aM-726
zKEp8Z#!POYxlw4Jqj18%co1dMM8y3n%>8SAV0@29!)o1J{1oZt>;rL;{2op!mnn4{
z=NI4lJsap<oUnv`$?CZ+K;xf(jvgKNI8e97_j>J7x2-5EK!pm>{*GPs!GhX3%DeF8
z^&hN0ph=xBO;k%#>=YE!`wT@T3Uz1I)j+=QjcmkN@XZH*dBR|d@D<IFS!laUUVsRE
z3<k2^3X`}spu0+nS?YBdm;SH5JXSQjE}3*S=@dd2<-K6a&MK&q4NwmaB!;!qbx*uT
zViJa^r%<y9E6k~Xl;5K^h(k(^HYAVYEDaamqj}s-LjFspXS3k=1iQKE(Ti*N=&@8M
zi_<U7w5BqS<|ga3@X+>qIN_)Lk`02|cc&YzDn>TkSFU<Xo{A~A6ENM9Y_7SmaDZos
z06U=ubDPq~$Kum3dsEDyaH?IS&y=~9r5y6Atpa`ToD0xyw@1S6F~;la5M-%{R<XnV
zq*zoJ7Volris}g0#jN^av`2N5osCbQJIKLsW^Caw;rc8M(28=H6Zx#7C&TkNeor6U
zjP6zHs|m-Ri8lM>VRg}lrS{8Yqg_SnT~z*mM)me<Gg=Z3+KaMpu89^l+6C5y6b%x{
z)Cz6DiXU+;GUk7V<DzKAaA4s`3exi?dAb#+;sy>>XU4@;MVXBez-Bn}ZMiI!JE;Ll
z%wrBrZ9(-#-%e-Q@fyV6zyElkylbS)7E`a{iLh19vV>KbAKfqvQ4Qs(AHEiTX5uz(
z`Sh-*Cy6IW0J^#5`zD+ZVu=r98=FK<`lgTWvN%ABK$#c4>iOf2A}NW@S1^JOy(K+Z
zZl8_n$aOc`S$5ok$C83N@Az_Zpybvfn}A*)&+?7>-K%Xn+nsLpP0z^@jMJ+qAwww~
z1xUlw93cN)k?(I6tAbqOpmZQ}=k?VD*7Q+OTb^nm4KTA}=C7d~d5SIXr)Xq^36#Rn
zu93YEY@_m^iOeQc3*zhFU%OVZTvTry8PxK9$}3^yE`#f-{{2t7HIsx{+_%fFa*mO9
zraRw3k<zmTEr5EW?%xC&Hap=GP<Ej5?G)IfBzm54>MWPqut~c!ub!Fta(S+OcIy!|
zeUG7W$(l8)@WXJqqu2npD+|bvvNaDmr#3AkFqTYyQ(!c}PRcJ!DlkuiC7ny4Ek}OQ
z+OR9K_@!_;H$Cwx!HF1VJuIYG>2w!~aUCO7!iYRM91h-ZucT%VRm61Kfazw}`2v7k
zwB8{9_(GzYO-L0sq6rj&JZF}}aP}zmZN<f`@`ovG!8lne-9d}WpN5qb#&oJM;ncD-
zD4zc){T$!yb(O!_Skcjq<aJ=nX}58*a$ObV$46&|Y7O*q2)Z?)UQzOvjDs+Q7hN6X
z?06Jxd33m|M7-`K;<El|O1DWIVEy85b^J8FA`$mTkpKD5?~nid=RXYOf@bJlX5szy
z*Es(5*I((y_f)`{6He7nMTY4RnF3}1P+szRA`hm4ePGv7ib7Jtspf<JUR%lTIVHaX
zKQ3W+zHM_6%^ou1h^`NbFO?2@`tD90d~Q4GfHZz?Z$~c=AD|m*5VQ0o<-%}0@jZ{|
z(tT!iacNdS{wCH<aKs53o>DH{510+rlo#6~7rs&lAyILRvdY1%JaEN@x<%ILS6u~Q
ze$8FHcEB67ke49|(L-(Y2qp>T!Gxy)=4GKg=^7t$*p}KMa0yQ}>2&8@!}hZ<W)0=%
zduNHqvW$pow{sa?Zo+8<#hAfMxrR-55)X<KRO^@b$IkOuHKe?^Mbg3{riW5S;snO<
z!x(?V50seom|us}g%cZ`E5#b>{3%Fo!+9t?7<Z`_P1>z6M2BT*y4rgdxJ%ZBF_6V1
z%jxP@ucg9a?*+2WKCmgz8BE#%O0nvKERuFHH?!+V<4&zREYs_^3ST}O07DT~_ZSOb
z-=K61r|mhvK{bT9-hWIjg74$hWZy!*cyDGi4vfyg4iY8L0%1|%ZbP_rVHn2^eeF(#
z8|Qnl@X{4L`AM0OHT+Y1-_=2Wc451Ej(*dR<7+?u7@YeVFOLZ3x(CKQ&|@b$wP4q~
zpX(mD-ur1RxDGWNc}MRiO$xJ&m&F}2!wsTxAj>V6MG~Qww><1n>E;i61K9K!{rk|z
zdiz>7v<UN~RXF(^6d1)tjI-9Qaq(qdz2yyx%#2~iD19Th1~Xn54Ne&*!<b^$X+U(P
z|5Uzt=ic5@>?_~=ESmMNPl528eET>3@@M+%Uw?HS=hiiuuXwkTrvY%8ClusXCtMTC
zVOlOd#6MSsC;4!lK!Ucyd%{Wy!kOzb>589J5v%8?JDwwjST~J*#LG`5b=RS#K<Z<m
z#L3>uFw0>!<2+XGbX$dr2KMCSR|g|ittC9`)*F6WQFg;uQFaUMJRVkJH^(Qm$t8jC
zH9#0IAQj>e?Nx~R{u##BHmeLx`GrJDMq@b*6Q%%BDflJBP_tZThWTFm-ciM;wU=Hg
z6PrEXg{`|1u_}5`l{m|L>p;5Fs4VFW`mT9r#&{<7aifn#104YZSvW)dVFysX#Ke6z
zd~CqE_EP5-t7rXHjCyRQjzX(=G!nJ|=?Byf7MP04npwUt{U(=QYp?luMo0AbC$USA
z%OrLN_G5wgUz)_WhFp^jIXF_!+%f=63p`I}URfxQRk7=J6Nt|fW7z5gx4E`Ta#Kjb
zq;WF6gpR$0JiEn^hDe{f4)iy&+n{YwOmBBa8@}jJk-lbn3}H{>SN@(-h9`|?lCZAh
zJno*wGK07>?*zds8p~Dia}xzhmWqL8D4S6FF;lk|fec7@zzS23<Im`=9-prAigV>(
z6I;R>0h4#OLAgnR%=*Aplns5paB<pU86PH3adadtN9or%O{a?+-@Fd;ge>v)Hs3wh
z3mV9}DgRFUe!yVhdWgcN;6`HgRhaMH_h)h&Ou;w=106oR#y0RrkBlv!Lw{6dHRPQV
zyc{f>as$xi_SWS1Ko+A_oHe0<>obEn3&*8|T#ST!IIAN8wQ{Me_Y+Iz4Cp5TbxV0o
zrN7+k-!tRTjnA+BH~nMW5@3t<)F=E`sNJP(48{a&c)`sSgeBc_-_;lNqkE<Ti>KoC
z9PIi?^5y9g*eSi#S;eyluD7Ibh^J!Nq1>lHkVvf0b_ysHeSCZz!2ImQIDX4|u*P4i
zwd#!1x}!z~eS7bRiE>il)XTe>h3;yrS*QucG1of$REH5}9PDM8yLom*c$MWM4RgH)
zPQThunJ$ksAK?60mvdJS1O{JE=^yd!%{<soxrX?crth>!4{R7h&*KoyJ&0uA!AYsI
zn4f2Kj_gCXL*1)$b!UDy%vwQ_A!)XCgL35ssurnSeme)6Yr?^fXA$OALPvHXGMKg>
zd<vaxV71o50KjAmkUGrss&E$HiFd{Xskf3ART|O)Vqcy=@I82jMC7W#+*dRu7QDkh
zQT7-vt1aY0p&(<Y;0LeO1JKFLug{531JK-J*_9D1UAxmIbA9%9VZfuKs*3KKW^ZV&
z9Vy#2!0r5>*9a*kO{k&`U6Z0>tOLISvb1dr+@Z{PF~Lk^$uQSr{czn500V={QeE2^
z-V!o}d6LQ~>sz3kCy4y~xglRauk=egR9lHY>s#D#B@sNlf;bRgOs=(#7Xj8(4I8x^
z$|3Q=^F8aG_95QM>qsY<bV0zC;`3t#QfTZ`WYPjPI3KJ?sb&xnE@@K_GiHT|1brJj
zTiw{bLZi*6+%ARtMG$2q4^B)Tb!$2OkOhwO6*c^!YAi&JA<mdmJRGFu;x~%dI9(R9
zsAAksWo<idn*KoDOzI`IE;X$vW2G3D)EkH{fe2qm`dLx4AgJO76`tlb6b!yJCX3mZ
z=Ts!`v%Pd3XRdm=b+oNsooiAM1v@2S)jr*2yxeKHf{h0OO@N$I0h!kGN4kE}NiuVH
zD8{8$68H1F(D*WGT%$sG4IM&$K{2RC<czrm>G`|lA3gmV1+0M7^hE44<%)`aUeO@4
zA?^m}=LaQqB~J13?M2F`r|(Zf{CxWL^dy`eQB6*JeEeyDKFWhnsi>OlbnH$(LJb4l
zP7sexV7VR~gt(cVqa})0bv>IH(ediHpQQm1>f`F~)e%l*Db&A;b9fMif;P->Z!8a*
zRp_*)r8Qv#)D>G)wE_J>hp5L-&*MwN`ZR(bKJ@std>yt_W93?Tr?AXZeTt+7tLpFt
z6(k1LxT5_SbXHhLe?7hXPwAhp%c8zMo_gO*JEb^Z!)k%AsN`Ezba|Mp0<+9X{9gC%
zk*J~?$?%$VOC+X>*n;wRp1MY=5$B)=Zb$VecxgE&A;We7uUXJbw<|g&C0b|2=sMDz
z<Zr~(_HxnzqHbRiFm#z658)LAA_sc`QbtM~Apbnt3$dhD4YWIk8$M<9^n;>^+yrfs
z;=bZ75;F@}R#*5mMwtTD>mdpa!J+NoJpNuosRAWrHq{qV#>uAeqM6##@-oD!lb|E>
zqs7Z7Zc+tm*sb03T+<2JFY&7aHd8$xV`b@DOK@e+7L(T`l&A6BwgYu42O8s@0Zll*
zS>*TN0(q&AV5Y-QBA|7cag`=kZ(Ijhr{6dXnc6!lJ6TaTeR0gfIAz&a=cH-h&l9gr
z3yN><`k%A>hD-l+%aiq>Rofhw!g&EDUGyH<iu&z`H(ecOEruE{8&yraQ&J`0OH9MC
z)ntVd0L(g-Rgh^lfBuZ4SKfOyGXU2r{%(}-+xWBDhtklfNfDCrl1{qt$;P4uXeRVe
zPtfw_54pmphX+w0>zkyM!RJ>)a6K8JuQ<8Xx3PFdLsy4+AhHxk-W=b2_P))}Kn^rH
zZU<wS{8xb!t-IOWvkLTkIy2C$nod#N(X`kHdfdOu$%slTiOWf(bHuvKt|&i+nO0DN
z`C0bBhd9n!a0&IGW8aV0ZK`IgKWt8ccgd*(oVL$=umb1reVi2L&4se3LC!&6{P~Px
zkKWdvw5YX5vYRjR5X={m=qLN|8m`^bhZi<aO4L?!5{yV`=g?lu?VCxTcr*ivzp?c+
z{JXnJ<GqIRcekC~v8;)MT29;!xwp+i<x%ILA;7^v^3I5sB=udzxUWb#u)fG(pAK%N
z@}oc0a$vHOLOK-1c?q|GNo#5AFb^vv^A>n^PQ*KBNM)Z?6Cm(wd0CVL#ad=rm=>gd
zu~MSBD7~%a-0)SJXWSjeat+p%&FVSU!ok4!L<b}|CqUEx4s`IIqkZPNUB6D{`nm?k
zWDg_H{9(v(To*pHiYq-EOc9eT=|>H5S0C=UUMQPH)8Rww6tSMxrR711n1Yfz%=a#S
zAID$I0{+??CB9R1R{hLc6$9epU7)#?D{UK4dyv=1dqAfmzRt{ajG#CklceWVo251`
ziS*XAP0zl&n-Y=hYD*+nm+q;Mz*;3rpo=OL>{M|DGyr|17!--fjzUo1mL3bjiQvg`
zb63LY*|k5%PBom53R`oFAr6Q{U36EU&^dInD!mazc3jVU0TVVUy=#UUInJkPE+t+4
z&p>${CedHafB+v}xrMR`v?`Z~BrDMp#kRUV<8%Bcet&&^Q?KIf)upfQ(}y25+X0)x
z`0l+VGf>HDd4j0bFiNpSrOC@pFe+qs>mcRo(%45$@#q6N&on-y<U{K%)gBpfw7_X|
zb@Xcoez^UNX5$$NRri3&1kFFa+TNgw1Q{wge|L7SH-_iYzD$}%LlK@_?lkC$;%bI~
z88mf(bBaq`6V8_uiCCMkzPO1)@K3p}FHN$}l9QiEfF3#rWaBfSB*J#6-lQP{T%8Ke
zhq8mFBxHc<1-uDimHzx~|I5$Q?-d1lp+q`Xy64eemi3-usC@8!S6$Qh_iR8tz$GGG
zoX?hIE>OwStZ93oM7}Rv$ghBTmIk7Q4yG<lF<o)hZ0vy-R81NHcYI{GjZ~;lfZ8>E
zvUIpol<I5=Q!V>dWwEFhm|(I;9*)*v?vJE#n2Yi&dL-!v2tA{g0q7E%I8?;@7M!~=
z>4Sm71)&qeDvzL#_k-7%`YNImzb>k=S%0Yo>MjA&4AVdL+p~J!AQdf0i3mf)*Yx}|
z4SL}8a{2f&>piys^(ticOUjXbjOvG_UWi|aFb^Mpw^pX6%Ssi}Yr#%_K3e+%>$*|J
zqyyUe{T=JxR&9w-vHD`*T#G@23d8HpKCH7!=svw<qtfa<l@5OJDcrXY5q`W<HYwJ0
ze)?hh3g=%@1pADu2Y+2^j5gprB0*d$@60%hk_3p5>7lZPa6{<lq2B4C>P^>j?HA}g
zcc+$Pe4@SEfOCFOH6Ou2zrpL$hn#TQ;?&ElIJc_00ZNOZNvO8KxpYPZ<YX)yeOTHI
zpv$o*$*5j6A~j?G6lOKJFb4o3{}Mv~7rwk>VRVZ0`#?kIkX|d-bQyJUnO|~oP2)fp
z(tv{niJ1|ZNgQvjDajOjI!^+V0&9d4SVAgEv$nTg9VldqZTRVqHHzbvukY5;CMulS
z6!@U9!*HhfbioNKIp~j7l<(`?6qbz1zhV6R(7IFZRw8t9<<DYv9=vBv3vz84XwsV^
zG?BKeBYp3tTsbb@yOVw`bE0X1I<2YVYl@PIn`8&p^TcbBaxx^Z!u<T~6fuTRW4%k%
zjYY-~${l@*On9UhXizl=P;wU1rlAb_>j)%Tw_*IB1m%C0f{s}#dSM@@D($Ilfm0Ox
zH;VWjXeO*4!PUvlq)uL6&$&0|xQfuPtZhes{Co&>cR}Sk@1RIZ3XBrVo8f`iNg=eE
z{*QH-pWVKMI@_P&3Qn;O&C{fdkf7{1&zm`1{2P*XFI3vAB271`UHuOSBE3BrBVK^2
zW)3-ql2d?>F3CEHrJ+*omLN~gu~P+Oj-702<5Ll?mAd-jiKpkL-I)$QZ<G{BQBc7J
z)dF-+gN@br;zJs`e>bTa=(L;|wNCN9%|56r%5>nDNi33lmt7U(`)4PI=fREBGjE@j
zT|c*d0^nD-Pb57qMl3?=*q{)X@$9bGT=*Mp@|Qe(Sx4%%4?dp$xGb?s>=#W#q862W
z!FQ<P=Sb(fz6x_Hm4<V%|I1D(G<}I6PgCojtBw%Re);q=fBHt8;}4mw#q?|1lc?#w
zj=KLoevjtmmy4IL&`FMfCYaM>&mHhImjK52=o*Zkpd<|n<<&S|=rZdDrPbUHPuWty
z&^52<7C@;d*2^}W)gQyvyPniXaK#5wS1>}c4azTiIi;U<m<eb6d8}D`b3N%z5;?Z(
z5#uvqwgRc~mM;7zphs=yWf<%G<;Qd4Ye1Kc{Bep$iLa=c%w$`D4)Drw<2s1*IOMWf
zQQOkIOVLR0()EB?ggx<^>r_i^rm=x4OlU?DM<)|R+rIs2akO6_c|pnrFlk(6H2~DF
zr5rI)o$Elm`0LR;J{J9_%VE>>EGZ-*XpVTl@>{hKn=rHo8NXpY7q;!xx0@0Y>g7Zi
zT)6CmHy<U1cL_3w<_ITVfzXEYp-DScapd7=cWu)RhTJ&~<UbrcNrJ!fl!BxUT!jgs
zKeuXK;S2jyFbf(F9a|FNR6KCvNNw+koYw?2XB@>3Ie|4lOf=gsuXH`I=frkM^GTY%
zJV0yUOMv!mBidu(mz}5S^vV}3rPrW1qwi$-tQZ<3EPArpIHiV@co2hpTS29SY;)`X
zJlX&o$>pARa4F6caQ1P~e&-q+n(Kh$=tCK+ww~|rmDfJ*2<lYQrFEy;!NE(`WU+;H
zNqO<Xm`u-XL0a~0b|w2R-T4cr2%~oxyh~d^bD)mpcGAv5^);NoyE022_8!s;4KdCf
zhupRweA=qya=Jj3f^XBXDj=S~{GgfJx!xRyqVjauWIPpL7Ax5qQj%_Ppu@2HRI=v1
zbSUO|*VSQubcMP`=0L<v+v%!*e2$${n4hjvi{M$XA*=#vzEArmpqHIf>YEg$JQ`~=
z32VAdd1N9rZ`xvyYSmRyE?m->6J{OvYI>Jx8{|Zj0HQ#q!GPn1T+L;TWmf4VsOLc8
zngksxmp?m5FpnL_@t$-6&D)B!MR{l<iwc<}<Hjn+_Xg|R8^+<21P~;@U{_Ecd7g%n
zkDHcFK+__6ew21r^o~<2#&5uXQ)!oKev%X)8wfrZp3YD*N7-dlx=r}>BR+aFyHfor
z91O3Mvhm=2%I!T%LlwV5-K42hTTIj&-NGwr6_IUKSV%E``tUlX_k1Y4Kne~Q=(#OG
z%MNYaWO&3fx}m>p$Q02Mqs%+a7L>F)!~Yfkugn>qo2z6q)FY&y%))W3ku_#Vjs4@)
z@2zUeMo0sSQ}qCsg(V>?Y<y1Wj#u?F3&A{DN|rW3=>**GUrz^w_q9OkQqpV#f_;ut
zJ>bmRU#A~GE$Q2MmM!n?Q`>g#F>Ni<`;hi2*DNZK(i7SUv?fj`Zg_w!z7<kAOs^g-
zXx5Z%$0>#}4u5Vz^HZ<zeqtOref|O|f9$DVl7*QoK<VTUPow(2hzh?BTMoqLvA1n<
zl$nXIdzrq0<e#1)O`ADW^%{V_9sO``c^q?iyDU^TwVAAe4va87dMXyw5KB)sF6h*#
z|AU{Nc*GJQEm5C}H<V8d;V~dN;Tp`n^&ID?e|#IUoNssh+$0@(tyAKHu6^<4?OM~b
zLQEvh8qnXr4?lel^LOP78t*4G!BdzISsj*&aLH7uT`ujk0R6|kISE*DS<tQD*X3Do
zN|{JQ4<^ktGs2eCl>yYGo>>LDbijtkvHH8JI`hm37^KijR-*X$p5OR(DnHhMu9^Mg
z3!4AP-<WQp8$V^B5d2G<)UCrpB~08-FxF{+dE{|FWDT&fkCz2D@~L-ZJ|{6aW)#2h
zi$ea?8mR+?xC%}Gf5)c|Fx~i(t$@<E&*7@r7V>4|M7RS>2aQHy%TFKpnmS@UHX&V#
zj$-+x8ed+B`jGy%TZWbiZ3@Jqw*%5Vn8+`Oo(sH6r*p@ljxz1g=iOMueZiM38=`_k
zNd2aw^i<%qq@?AVaN4!b7`B?e)U8+Xx{{XYX|BolgM|gQ3LGg9o72mb(xyr2{tPEE
zU*h7#uyC@^=<T}EPTZtm#0a7~uHU_g@rX)fx;n^&@?}~_J4w*Shf4lsEn1qtf*%Yz
zBk3yTQZ)rqbjv0pQ61>1oO5&!VgNccOA>-lVyLKdw4&44NDI)Ae)vsx_wU!bwmB9R
z1Lg!nKZKq=^IVBm`pNIDigN9O$6|Ayk?>7}niAE;9d&JeNPA@+vM+$<4{xsEJoa4K
zTCNz~td<O~W<(=A|11_XpoAOR78Ih$#KD|@^l#^NC#{-w6LF40j|iW?Nt$`SS<GB2
ze}Z%t&(5_xx9!*uB%1p4ntp+?#Bm?%iE;t!iXp*EAqfg6!aV|>_JEom(HZdXhCT3Y
zZx0NQoFcO)lwRmiQ|%G9TIoi`3l%IoRJ@eyRkmTNZ-J>+vM=y>aHwQk$_y=HGNc@I
zQG?*R$sj^b30?)~(RDhw$(Sb{s>`QpmW0sYmZ)?G1h$}#F^4I$SFRDK0V*d)BNDJ5
zQ=QbXUhbydV4EFA;5-Z6603W5O*wX^XhBsAP`*b4zedCKYn7q_b#+PSjzhpp3W|F{
z9S7ER`qucRUefgS<B*fP2$zZYkd<EOi53DYh@N{qs6yKk>L$`x7dL8bWn?!7rUHg_
zMrCh|_4600lb*)duYCRGLiOWHGjew{|K=_;awm`Sjw((*N19>?bnkB=EuYij#x`BJ
z_%I*tdogHclAyU(wuL^K2wfHCl8w4`VkpkP1^ZQv81uav@lDbd7!AeOFLDyIZIGaz
zTgUmH{-4J@fb%>@qc8e&Ow5KxPfcjOIS>qZkGa4ZMD97X;H2VcI6Y8z{C#Of;fWF~
zb;i~Y&-*ws_i>tVaz@bxuifo5incNYv=$(qmtK3GJQ?QVA%o4Gv4#!$KTjl&n&+5q
zcfv(E)Iy56q^E?AJp;+}P>kE7ieF#M&o12^(WKLp{&S1+6AC(V&s@mXIgg%3glMY8
z2Oi)VRTjvy-|Hd9-x(v9t)Q$q0X8CzVu|>E8h@&!rT2|SvP*UkSE0c&Ih_)xa`{fG
zu4V1=Bwf0DTDh<k@+rhm3tOCZo`t$O3;in4o6WG<LlrG19l(PyY(<NXNT5O6I?neF
z`O?UerZ?rN<m}#}o06#;B7fAmG0?|`MLH{xaS3;7hGE>YiZVT$PvdKN9b+N=L_&na
zozpH0ucidj=#(zYBcQ=7B7Y4)V|<>K`nhRttN`6@*GkSL!UUUy9ZulodyW(#Q|7wX
zfx626)OVjF1H~uZ`}A+%l_%r2XMUMlK7sjugZLubq<^6JW(Ea@f#DShE{Z?xQQg&n
zrtI^(8hzTK-<KEE4espF*Il!-VhMJs08v1$zeoPbGo=J`nFO{_Nlok7f^vGrhy1g$
zLoSJret5N`^24C-h!F;IIK*6aDVkWWCKbq1M%RKfrj+e_@vaMho_U1E)DuK(lE{TX
z|9av*n-=h*8R+jWruP!FA}jqNxQfb~YLw8UaZrg$PVf8{>m>h@EhcOeR(MNxVfc}f
z?t-r4W8D)&x2dM0HIjZN-yU8q;~T-M+!brwuYcMIY3_%@lG9uGy0Z;yi#&ul`KnuI
zf)5wA8~b11iR+#h^81`TJsU%$l1HVgP*So_`~)EZ1kpEVSQKT80AFN})A|nRmT`tv
zb9h+k8ead@!+x}P{^_U6vq$R06Hu-(Z(%jjhpu6t-uUpQ$)y^xD9U3)!Nj*LfKt7h
zXyetx^f@(}hT#vw%=V!cw0@Hov{+2Nc}0mu)`Id}2tR$ht0KvBgOP|20=eA*Op#t!
zFCJmOUxT^dy=n3lpc%1roubER9RKQ$e|0;DdFPAyvDU?2+P}JPYlM>Ak4s@*(0tgE
z1GPp^CQYq4H-5|H799tWG+*N#Um1f2BWg}Qy-tAzB?V0ByNk=Yqn$+>>99Jz9OTUs
z$qLhUkR?1^PC3)>n{k$7V#D;xV;Cdc<h)?eGDS}*EbaL@^4sH%3UdwSfyP(yyKr?q
zXZIWDCp9YRc$`=0;Zylex~`6NH9WINH>kLAlCp_ddgN6|8?_*~7V_;OOu{r(oFCn<
z^o+-Uyq^;~TNFs$PI#}{82BjUfISLT+;xj`oE8$K=|8pw=AkQB%rE?4#~U}j$<{3C
zc9Nl#qtZ^4JunoV>Nu$$y4{v5VS_yS+)d44MW5b)g0EPOqy(C$LYshA($9xxT?=!N
z2G-#Z?mXz?><)E2UNXyQ&^gNA2R7yd%jdN|9|;Tx3yNwZ`3FHA-AR^*aHV-ACiX)O
zC@D}{=Za51)ZTehf9G?<1~*gszoRtDH)&CQ5HxmEh55k-!!omVnQ3`@hCs3wI>uA>
zz4S*T!II`~gcU>~WA_ukNQ0Yej)&Vb14ZoQ)fmsEGVct7EAltR_t$dZ#ZeAa^?K4z
z+5`H|Rx|;P9=*BruG^e34-Ym{+o3>Ts3)b%@NxJecWltGg95E!n-p5kbDLI4l<rX#
zKSB8cl<(QEW0HIspVKoGNB?v_Br+FtCi_4W4yQycoqdx6Xr*{S>e(Hoq|Nk~>bFl|
ziG3$14)OfG_eX+1O%YU<^_AdgrHGU_zqwAbQYesm*_|II;7qsa-zYs%pa;W_7Z+pX
zvED<dx8c6*9{t5mNKxy?dUl_3q!z>A6HTWI^gvRB9k6S;$Rt;hZzs%MDc-yB%rVxc
z9SW=P>SurdV747P%-{M)ikiYGO!`fVTH>oY?cNA=NXPZ*ElZIz47%7{1J`t>tcjFL
zYAXtvR8Ae)3#EUZpL{Jg2Wk;`lP)KBfnTMbIosyvN%URy!*7Bp|6tPdg+w&a+?Aka
zt@Op`=iDqf@%7ZYJN@{F;ohv7DhEII!<}=Vod#gk1IJ)>1Qd2spgi3HIuwii?6*Wh
z)i@Chs{tZr%3c&V6c0|aMz-pjKE6BgV7~e7^O|PLV><nid03BAQH1c52t;i`>4sv$
zBsf{7vPoq)k)X6Idl-DD6q#lxvGpS5$*9TW7q!LY8Q#gszkZgoOe$#ODs56XKzfjU
zd;hScrU`5GF+Sx>-a3BO$I6oKW0kxuCXnH$XgUa9itv200m6BLyco|v{qN(f#EpyG
z=j9oUS#MmL6rq5mC)ot^1k^4qQ8*6Nm-L-%BQ=S*x>qp8-54KdMfYVOPH)pZ9Euq+
z4Rh9FUIYo%Ec9og_J$z3x~N!Hf1K#~@t-mMZ@#FTIPu&C#@B^GIL(4n==tg;jFQ<?
ztOclZPK$rinuegGi7_p(Aay@6o1kr$bpPm2LJ-=l-G&tGp)Ueq9nUvvdRm<-mK6H^
z6`a36y0|$%#RB)(2hY0=$NAfvLgCS|Xx}H8<SxV*gz2j<;@H**@(Zu*&sF+o7+1$b
zZZ{(0qlnr*%4vt{!iJeF8T<4%?=u1Dnmg-!Mb%alNWD~1s&vkco38uH<iKv4XJ`F(
z!xagI?gwX5*dQfaQnTuKTj+SxuuyG9KM#tf>^vP<AnhM|!}Z2Zy7aZqgN*BnGMqUj
zA!c43=+AMh{S&t?k}W`IePp@za8f#oq%<8V&mR0>S9c*LbsoQhK3Q&w9O3-z+|&O&
zKm8=ZZUSeQ{4SmXRl5})qt<4qCpiGp{o<hsG?HzJ)OWQV`uX|0dyDGr`E49l@ehAK
zjj=CN6P8W74BnA|9s)kryH9VwTT*=G7q!&W&z|nCE;!_y%Z(ZCIWwB+(EB+b0kJ*p
zP;6DsbWA78&6XWXiR^qwAN_1Cu|sWm8dF>XsagA)sjh5y#_AR=Sw&&()*BkO49J-a
z+D2er2ug*#qSeoidvvJi(lxE9n^sgZE`*>di~wcyH7Y@Ef1yGs?eYU!{qR|s__Gt=
zhqR@LC?5PF;G<sN=I-C&Z&+%V^l}rP6sQqOuQS&Tpi@d0c@{O~+skFIaCl(G^uH>-
z&{sP=!e^ctWFH+d_(ZYLR+c+rvBEsvyT8Qv&8a6hiczIV9Yh)T%PF<?cecS_k@J^#
zmS0;@7#xCIC*;+*+YFE{dzT76vTs@3y;%o=tO^_BZ@Of~CPUr5vt~ym$wMjxfF6N0
z$0^pS915tiN`7tQp5{~Dq93t&SEV}7QKq|9lL}Odth8lQ;s%=8YoOvLQ=Izo>Rx{g
zPl)jPzZS{))wFZBARS7VPP3o)Q80~o%7X(fc{T;Gi-V7ax=EnH<|<Z9T+uLNTz^K!
zGNc3L2ZtY*#s4PNDhXHaluHL&Y*NmUAVvSF&U=3PyR-5cz((uuwD-`BO0TMSCWR~J
z2xsU?3#{R4{jV@Na^u8<zayj>Jo95Oa8B=lYy$d|MCxGL1kudGhy8qfWCm)Frm?_1
zK%1N<T<2)ITu@SZc6S$aE4OrgL5T>P@_hKQqfu`8mp5CWwrhHx3RJdaY*U-!mX|C|
z4C4+ZZ7pNir%%6La%e3kg{9otO-O$qBTY;A^z$`-k59plUZ=009;2rm1dv6N*q8gA
zE+}F59K^H36oP-)CG?7YdQR0&VG2k*^#wg;MGf~UkQRe30{$&2;R~Vo^t{sNvtL^k
z<|EL^1FEe5d`n{og;R_U1?W_oz!tQqow=-ARE7E39Yv?{zTNTs-4%|!h6$cMz=?<i
zszq0D=KEdFnI3gbi7<0rQV(5nn!HwNhvMrqii8o2bb_rbrSIxEf1e2_PhoYuyhA5e
zImjG#u%3aG+ufPsyBfsJP)^*rN1wi(E9vQ70*@mjSyWHbO&=WV>6k(^+XwpW6`UWx
z&qQ7`%PhKZpt5rGBuztOg6tU$ZhNIUPR+l1e&Z|42~(e)1&yq-`}1eKW5$gPL`k^F
z227E#KvXcj1x2<}(ozBX9A=7`-B@#SIGV@WJf3^3s04+IQA!8CrtI}}>7l@UaD7QD
z3|LDY^2@I$U7?u241N0BIChlyi-h>pqe1$+x}ZCe$nwrx>h52gV4qs!0`hbU29l!q
z@Jtjy!Ehprtkh6~YJ%B}Yad>=Df)v2>FH|H%u3Lt|CmM88xlcOj@Tmoy@T}mm;43(
z=o4m!evJ}Q$Wu{;R{OEYu_VZ1DW;GjLyS1+66q2!8$VY|>uXAZ(jOeiV6+JM#zLr!
zFDUCe<(iq)T^%Ln!u`3E+$|NwNitamq#x)$@~V#Hl+7UKguaUONSko9Q}xux8|q&e
za)P;(L(8#W=^F)1rK$5EnsCPIdR`xOAdV3%xzl2%uql{+v!F{QC)jQR`av!TM?gQ`
zJDbkVKnEyg^-qol++yxH45J@lCBCVUl*BFJ9*JAIpzlbQ5GF&)@FnH6Ej14fRaf-A
z+l@zT@v2_FpJ>>g%6LA-qdjXnl{mFngvutI@5eYrzTq`pO2Y$duSVE3M-5!akAqnZ
zCuUwBDFpJtHxA@DP+8AIvVmLI)?E#mxwMkNnhI?+JyCuSE#d&`lU##qvtyqKLgb?8
zT!OqOK2P&^DLgUk%9otCMJh18@9H3@UFyAG)ym?coVi~k@onL~CK9YQSXPFU6Jpe)
zom#>8et4R`E}87|$f%cg6X0`^d`jpMmsDXs)^eWNCK&8JDG=)5Dw=85BHK7DVV(>;
z7?m_jP>1<|lSz<An~kU9b!nA6<s_g~1g<`tXf&#LKwb9etHONWtvhJ01vXqCz|AEm
zET|hSsyTs|MOBd~OX`WYAU#x|ehWT#N&TJh9(87y1#7%ysyc>CP%BHSI+Nq&F-TM<
zy1v)_q~_YwKhkf0EQX>^JRW)5)?e~)MA4|>OV3I<T!K0;sO;)6`2{K;t70fPJ(NgP
zTTJ38c7i}<&)SX3^7ZU<>{N&OIa#2^g8dRp%Y-|z6G~%n;&tgV4X)UseBvR@qj)u(
zA60q$$5M`OFW;sJ4+7MR(Mw98Aasyt&qbi|QJC@(sA-Y*1=kr9P6MJ(c98tTL4x^q
z4Q0BUyosdb?hb1ZmrwjccMpi~(7r!JO5(nz4Jj)#4NXXW>+1=|yTRDC(d@(XS4qyV
zQVnLsz#;eUZmy_7yWY8MDYlCfX&p9b3(h>zILyP1--p3Za_QewUSgG3@^aKPnO|YX
z1ydwk3C3_$=e(X7=Q&KgN%24y|1E9Z^egjr$6<z)Q&Gv&m=M>zrL22i(9u~2r~&Fn
z=e<F%Zj9=u|APgc%+sTuqE4H2r53@&g`Fmu$=NMPf5wDyB)aSPwprr1tK+X)$|TT&
zFrc~**0Fa<Lt$fHQ7^Kc`Hb@Y5G4%`rR98y@hKkbrZ!NE$|)BTrAxtGfS)<PoVcF1
za(41$t*$1jIWI%(UsmcswN2~Wl(I2CkPf|qdY&s5bmi#f1+`5;$KIjv@Tlk>uMnX2
zQYEy&u%epJ4H?ag_sWo!U=`&D51ir{H_ov`gxsUQ8sJT&rxcT563ZhVWdx!ObYg1J
zNkaRxjudQYs^mU{n(tV;6mv!r`JCHLp&hu2CD@?6vll80CTGa5&-q9r;Lk4B84yG|
zxg7RU(;iI{2&|<;#$2(Ceuz)0ms3UDjnC0PQDt{{+_LeOisn2&2zO=)2l*m*=13hl
zJZRdt03G)^&$D5U_ctsELY{Z>B(j6~;B`S=XDz=6NRsyaP9@>Tr<d>FUw))p{@0q~
z;U`^Tz{-=MyewZMX^1FAs*(jMIt3|yxdwcPGI`6nI*?pgf@r_ppwhMIQ#z>STZLW9
z2oR=-P^JQT!%;e|=#km&ohqp%+A~snWO_YQ_bq`!cLOqo4M6wYBg-s5T-G7RtOF{f
zsF9)c%wBy{1SeD3xehahrgr1VJXU-s+7^`f#2`FL*y6CcW@Cy1DH(}U-dq8iZ&CM$
zl{9DOGYOCf3Z&SlXGon^^q#f=tz7jOdcM<bq7szFSaaG1j)zi4Z6XwMxvm0DFMnXQ
z5-DTsW&Zpz(9h$jE=DK|LmOi`7W5KEe#s)L^CN0#B8S8L*oXPCDeur!Gv4{Y#NBa2
z2|sK{M>o{Bu)}<tjFD*4jX34Jk;m0k9kf?eHk~<N*lO)0j#BS4UK=t~jX@{W%N@T^
zIZ*V>$y=}iXe_BOFSCJ<hxIA`<nOC(2bfAqnsxFDR-LS#Z}=px!A?~ZLGKjhN5}7^
zv^j^OI~MQNPVk08eIbQVDV*+EA&M)MIvKj@Je^}KX)x}ws{iYkjdujd9SM{U1}p>3
z7t?JsC}A(cbg}Iw70nu3D3=A2&E^+Wfz(V&CKr$fG8W3s#&XytUY~7Pdw4FZ*;xG4
z_iSbTHq}?6tHWI`seyNHo4C#mSHDJKKV7U|@RjA#E2K*;RaXa!WPd<CB5keK5}#<&
zT#`tmQ-}|jUR$gwN1ByNWfRibtL?}0sDCX-tIy63`BzsB#OFhWmkQY)8_C0%)-pD#
zLO&HvC!EJH^Ijd|b2vyzYSj~Q`0X)|0S_-*%r{`1Oc!^Q?rN*aj}#sKh^aS+v-Og0
zel+y^Zz&Ql!+crgK9V*us2k+WJfJGh*OPtsiDOd#>@!dk7vr|C8^=#GBc$?f!P#<*
zOqzfy$HP7R=Po#ZKj4IW`0vmEbdk91L)Ocukr8C`!&4ZZ($0FaPe1<J=WCga(~5oW
zI1*-1HNhw~ju%afC3Rx8o2f0SqDS*vIzYWgUo`z*^omgo9<C?RRyuQ-!*w2ibDR%J
ziJC5_Q<y2Ur*JNv?4PB(95cJVbvdDSrXc(Y?=~LhaimW5>tU(^Wl@QyeX4b-oc?^(
zYHX&DW9Z((!O4cBCG~Pv2)Tr`(Y{BtwVMq#Y0z@uQh+8snv0r&Gcd);m;E`)I?VX+
z;On+diA_(u9Ekg@_-)U#(Jb-qn}A08;3+0sZ~MRF&*3Xo=-d@g_dTd(7!>8bQ4SO>
z9g=u%TF_W-NKKtFNqY41NBWJwPAhVl6iTTQ_#2i@G^Nk)meP|!1)9RV3dX_$kx+`b
z5{i6!_J2NazPeB?AT?P>Yq;_<Y6({)c5=>?Y8WXFiH}8mH)&L2M9|&{?oA5q6v)`=
zPLh0_xsaiDRKIP-**{Y$Om7q{R%Pm|bAgKny~9U`5OooWuyQ`x43cn373k-YY873i
z0)ikgZNq=(FbrRz>Sf<Htw|WlJ|Lg>tC+Nuo3_GUQcv>PBj7r7^)k(TV)V+?pfy~N
z3!t161A|HAPxTe<#uNEpfnMKTHD%PxDB0ugx5jdj2#TErs-@L^&{52^OV^9>E3GnG
z$H79yMiuMH6MRxuHBC4VM{%aNDQ?x2Wh*5{(?pKfAij%YMT1)2`$-Y31*yyZ=JiND
zimPsriZ)^)fTlN!iV^&M@|*X`hpyoQIMk<JT+gGtJAenvA%(l2->ROeS@<wD@$cch
zp=R+oJ%aH&{2kJPVznKJXwNuxPbJ;#K~lR<u?54l8Jh+gWs@3&&XTIaLaWpiK|agt
zwGh*sOViCCD)J!Xe%^Fmc|`D(p5Aoo1N-jjjZ4|{yq;=iKb9a#S($+Z73#Z%?dhu<
z?d#Y-J&jLlcq0B*e}GbyN;{_CSAHdRH6<xJ!A!=wpe~C+Oau<%Jd?WG`Eo)I!V@k3
z$|jZdr=MgiFfXV<GIM+)2l~|OqME#@EkMn4d{(H2KF42v^gn)$?w1K;aA_6Nz6}pL
zfOXr`+{(d-Xv*z-JroA5rjVz0KD(aQV8Xs_sX7dc#>!VcXR?VG5~l#|&z={_I8IG!
zg?$^&@l;gvs^l{-=b5yeDQQCMF-V5guBKD@sV8~?X}_KTO)HAl((p9Q!-DbgifY#{
zV&b2eX{3rCxcKW!51eLs>HQQr)z-6`h^8yxy1&Cj&rZRp1RaX2z8c5hPsA!&lKvd!
z+H8#b-#vbJ+pc|UUN11f?p%OPC;RNhZEv$7cO53)PpA5Ia`}sz<UDdR5Us%VWJR%{
zoKp(J-k_U+?vWoiti7ww;nRI@paK#wAMDsfhGSA-6pD%_oEvvmeu~^;<Q4B&kZ4Lt
z2P!^6ScTo9+ozs%UmMmm{l^&Z<Ia!e53iQVmXTIzrwFR(I2Drgi*|m~K7E->55vJt
zdUc=91t<wEWm2l*`;_avJNWrPSFif@w-*$dk@#5-EDk!Rzc>4|Z<`mR`CvFF6k#6M
zPS;sGKXi4Nzq{#%M|$V}1Z`twl_Jj57K6g;Y4Vx<npe~2>;4ht=OfA;rRVu1nJfW(
z-G+khQFQLea!N~1(e?$oq|f(#%W@uZNk!VZc1Jkf*tS&JgGx@(Q;@}avfbHtIStvu
z{QUrPm-19FpT~XA@Qqu>uHqN$PXjcmy=4rD^hSBZb9%VTRTuPkm%BFU2Q;%BI?XPU
znr;kErw{L5?7oOv^>U)Fh&JiwPlw#873Y$I;x~O0%I5sj>MK;84M?FF%D(%tK!o5U
z5gj3*^7)w^+;Rmfef$&Ym-&$@%rAqS*@5tg^g}S9+w#+)J1=-4xhD(0$|CJU5X&<e
zN3)#)3$5Za39ClkE3is(+HQ6^94W&j{zVmJfl`+W!|oUTfquSPNiNHsP;8dOzgZ(U
zE`qOxnn+@8Xoul&AYiX)7+Na|9{nvSlJwmA#CXfIppeUmb)<L`1Ne+?VF`qXQ1iy)
zZRxd1Zw`newiv1`1p9-Z9D=Jr-#c$@l*6w$bCu(RgxhtbRGuS&4^YIkFP<B}R%RVU
z1|gVJh56o{d&i{(Hp9)=8=@4DMDE$>Cs)8nnVTG}t02!9mvOe-zU&E}`^7X&Yg5cF
zTqrK6z}OcQwe?q!#=RA#8u)Mx>2!_CA&_feAuJihb~QzJj4&$?eja~w>{GmuW7rnC
z8)Bi%vXoHuu@LcMn$0HUE{vw*p4GZlcYyM9Mww!a;lJPzN>wB=k?P@g<#EL?IHxC(
ze)U;@6VG35<H{%T;J-g*H!A==f6xiT>2AX+H8`pI%aH?l{L;0Zo|-=WlZ_t))9K}8
zI6vjHHIyfAh4UVC+Z<8XOv`dlEk!nP`GuyrUZb$I0hbFAzk%9S4P%|2FkFtpb}+6%
z7~%S{AcVAC!kD&JI>p##099_nZQwmmA<*RESd=fO9XA)HP@TZ+6F-=&CWS1E^8M-}
zQS|>r!M=M_tKNHji+=fCyfIWjPTc~1#}u(b@c#g)%OiR%Kz&tCbG$V|WHYbO^cW>w
z&Ib69>=i~fCAuoe&pQU#w%H&Prj9!8Kc)y~`<}1oBv0Q4G}0>;_j|ebz2s5QM@I*v
zS!_K8EAG;Ik{VnEW%WvgGxFunPJNQBx}wrk>;fzDSlm48ho9wP&QX5;7e4$HE~MiF
ztJ##)IQG$b)D3MtxFc2}e!w(ikt3bd7P#ofJyv1fo$%`JNoPw5d4N>;>0W9;DrY~W
z1nn2wlm07eueqCp!UP#2OToXpU2}cs1W9kS0KNX*T@&Y7lD?Lf{owd9;%o?#Q8is1
zWUVH#r@PVIdk{(tMxZF+*>`vRyHg4q+JL_9=G!OBshZStb^h>VmZH=FP|CGsBT%%K
zpAM`icGE)Lwm~(tcAT}D0kscQ%G_)I`lr9w&dDS*a>15j@^M>Fl4m3{-;>#39p|3o
zU}O_IYWV{Ust#9oE3dk!v<09ChdV$OcRx#NeQ#$fYz=s~s@Lp2q;5ytM70&Oa)Vs=
zGW|Q)ttyH~e<A8u7x>0y)hK9`u6;LHA)t4#7F|l9DbRj_xIi0FeJ^9IJ^KzMMT-c<
zi6fx-opOypzeldZu%ifhOT~<CR;|e?&-VP=i&ch2D?5GrhO^c3@h``w{8&*e4G$&E
zJ<X{&@aDuQ|EeW1n%;R|2!#33z}DqQ{_`R+Is~!g<M{7h=58~Ph9&FfT3=L=3@0cc
zao^IfUQw!keVvDH+j7WBDmz7<5X|nu(qV-^o32Sjp1HjY*#)S<T)s08a-=&F{?G!F
z=^+ZxFOh5UZ$>(`?x6nVTafOzn!@x?O;+uf$E_EI*N|1pjN(hGI_p@413_I4=kLmj
z_CLSrf1ZDC%Kl}+${mkj2$xMoB{y%dT+Q95{h42$2b>?}m*2h6brF$7eOdq%;mGz@
zfC6C@tZ)LqC>c<#{UohX-J?|S59@f4Tp##wG}Y1^FQLhp;$(s2<o8xo8#|kH)`1R}
zZ%N(un4@1WQmBI1qi2VSIiWb{k;3u#5$1ad^L^6?hpnW2&Qo|1Sss*k&K<=21NCw0
zp&{}xB6IWBU250J&XebW?jDf--Yh6y(PgYDj4p436vF+=zaIk4xELd!!{2{fdk$Lf
zfS&Ss1@fDBIRx2Yo--yGAe*kb365E1xl0kPf1Ji?k^-B_5V)=Qjmp&WotIQ@s)MXV
z_fu)-paoFLKQ;BxAYWyQ`ba|REzdyndVW@rZjIX3%{yT0#uVzDZ`UJ3=}gFnX;a^)
z=byXZ0;^U$r}rkW>VM9$(tGsR?rSApeM)$LyzP^qM6sqjy8bCh?#aDTSY9R(MY<fa
z?KvU+I7bQ%i1GVLHU8mNEy)}H=tb3Jk>OBl7R8Io$&auWgRgA+K8=qSSZG%LioasK
zA!LBAB&h*G>=y}f0kmkN($jzUK*hx*Wv5bN8w(H`p7*l1XryzXyptMaH>r?UaDJSq
z5)Z?hdBP*GiBU`=O%ony=>iC_$T9rw=cGbXfJ|>d3)GMTyZl4GNjJ4Q{(TzXex|VR
zU;Xy)zi;;@Fg-m~1_sFPXi4eLmrXb;C9-r^`FjdWX_La6Gg#`{tSic09kf}h_cfr|
z1?}HStz^h~QaqLRZ#YgQ4FEf&!3VNuXDiA*2<YX{)Yl5k_y0;egr0j4^&~03VH%LF
zsNfX`ksVUbk-lf7!wZq0hGF;~)0?Ch;dGp&Xa7J<XbGqOH;aF!n^YN;9QcZIx?a#(
zxTd<Gdt&6{`in8XJW7<dzW2}sKrNRCr`&x24Nvf<>L7o22P~8!J@r506Ry*6c>;G*
z0CU-D3JyMOH5Fe?lcn9S;M~!D8e@oNtNm6l-;|Zc6zy9FPKsdt8;(|Mlb`!9z#XfO
z9(AKB0U{EPlUjKcU{FoP8~Rmi@#a{C{IPhm_us$PZ6xc)WeoLII=Xd+b^hNDS9|}H
z-D-MkGHZ)BJ7kd)yG1Xj1!pv#s9CdQM%=_lS99Ve)eJr*Z=n#(DS<+b1cjS6pmBdv
z>1YzjwN6^3ph#qOOsji-k84)u%xkJzppy6?v`7M}b`L4Tv}17V_(j0KuIo91KqE=p
zN8mkY8o5?-x~5k107D&VDU;&<1YL6qHPb6@K`R{49cGJa9_}5}wXNyY-B>iJDl=<M
z5}C+`F|`DKIHO)jTc#M<8McP;Jl3G_NZgvB$9G3@(Wk&T6kC*2S*%;k)z4w3)pQH%
z6&v%XTT=CK-(BE;X&?lmlLpFT%Chw=x>|zX#yZNKn|zg;o<ILBX`h~EKas1aC{ytS
zgCouN6Ii7epe_h?%^S8+a1^BX^fMVVv3y+>ar@FPs^R?TwmAO;TVT#EF$m<q6k#}d
zH0)d+1**eDH0+ccCCw_aQcd`x?jZGyRmim(&0Wr{D(bZ)ND;akPC)Jd9$uY@aR}%o
z#EHLFM97zKQDRNfCF-G$S_0*i7E&Cf>6Hr1&q=iZ-0SLc^Hpaz8C2<@^-|z;o8Bvp
zt5spXcV4GlAUcS0`!<|?tIjYy?s}g>c=mi2IE~AXQVY)L@O<{GP5Ga3l~80^4ux3=
zH;bbcB}>z^ML%h%>irz)lS+5~(>UvVxK<woc`P_MU8R=D<muxZu1v7`_&ygFS{C)2
z)+CWP#@0<L+X@Rvggv6?BFsy{{xX}MFVgh!>o7mMA^g2^7h5jUc0FWoAnJwpX?;Vp
zoy^qv5UQKBT`DMv^Zn87>t?u(1&$l;7`8x-1U`4^jso%0u-r*66xJUpZ|b`SxOGA7
zd0Ek)qzSvw7CS>hLs13H8|HXrxwnY#1C{C;R<$b^aVIa?E@y-F!7PRjMiT_Q<q=LX
zm228?Ry-xHmy>fVK_Du1I(>c38c8KiOQV$+*0Vl(u)Lza*-ywA@{xj=3tLdNCU`#W
zTWEM*I!!wTn`JiVvZU9fEDlpPt%z{Lkn-L<3=qUb1-}Ed2&OkUrEOkPB%uMCeuhP*
z+VPwj>7AlAJ%veR@MjWkhR$r(zg7XvYLKhlQ7YO>yR_sO=~I~E6|0%(u;mdbvbw7v
z{eXrmIiBcdb<8FKI-Do8=0Lc$miK3Lkv^^J6g8mdpWffK+*TBvz*xYq!{630b1qz?
zRQ0Z6<QKjxel<;z4p6wfJ_i8zi}a4ByWT+npYuo3SzI)mbpRtU$gTybi_api7kxhP
z;ftzXSqMF4cV0sad)=PgP=a!MG#BeAQ*IykyPOR79q%Bb+A2t;DaZ0V!wbozJc<9<
zD$4inX-$s)#_-Qu|0bzFhnb(g-!VF?YWvJ4v1oFoLliV$PW`Z)#0jpzeD8iRmQegT
z#{Tg&z2R_Vrd$NXHp6T$EH^#yu)Xk9AFH4|Ya^K3^#=-@P;S&Hl0gcWog$ut`f049
zjx#eFPgKuu@oHzz5N0ZRh~aW(gJF~*)AzYZ)_^oOWJ&BHpQP7uv~jTyV+yHpylE(r
zPP~9lYI>uJx*oSc&~9SdHz4j_vR&vfu~u1;N8MoB3r=yvtT@(}&u<dYncm__3fni~
zJc^kQH-T_XSH44=i(-7~S+|Q@G-z)o2lM&{AZ_vB(-(glk18iOyAJRVOpj=v{>fia
z%U@AANKwzBVV@qH{^z-EGCah3^|AIWEDz@Y5K;u~xK<jFE~C3%rnow5d)1J3)sXbz
z)9dQ^{nWsNsvDs4G6dH^H>5%4$!xv8px=ZflLPkTZ^|ZDk5Z<!!Z8k)J{(Tj7!cR&
z&U+L?D^;AI-QQ#He|dz>zvd%_hTf?BL4Kzic}?&Og`%5IaTI3?TZ75y`OEwK<tr#Z
zS`xf1P!Sp>$e;`X{V^H_Vvl}8>gU>yfsiK%?JZE9v(EWwCe<-F%+h1ipyGqFe5>My
zroMxA7M7fRP1$qmNwb+6ITf5IYZ|8r`a#i8x3%Lf5ibMD{qX3Siy<4xt>gwt4d{FK
z4GyPau`wKXEblIt^=L^Q`GY$n(^i!w(xq{)gUKC|7NqeYaQ;GJ4~eM6geuH80}SH%
zo$t2a-ZbH?yC1~cZhR=|sxXA&H1UKv%ORsoUV%v=pAO52l8<{(HzA*F9sR^`k`|kY
z4^p#w`SN{khu2V|iT7hT5*I#_#MNzFF-s%AUJcTeFwJt5MD~Vnff>cj(wH{t^Mf$*
zWf8Iorh@u86rp@y09`J89pqFZZvg74S8v9V%Zr~iMf<i&J~k)}SIq!5+4UB6oGWJg
zZp!g0Fn^y2;`;2@aD94mDv_q+4jCxe2Nm34X!m;lh7t{?g7dsO9&U;@FY`;nrCpkC
zDa{`y#~&ur^g}hbhV#99m;u<X?8C#<H@5Cm{1jl)5Kyf=0IKu+!Zo0cQAvX)ZvOV^
zqdRi37f_KtLh4Ab?$02->B@r%D@hS)u&gc0Wl>E}GroOSLrFt+#!m{4eoxHtxU_Sw
z17-#=%7Ou_j4h|{`EqI^>(oJh=0b$0LWGE`Cqs{PZxm1c^S0k$Rbw~`Bh&QlajLlY
zNr^N?d{Dt|K^mi7G;r<m)1r#&;Y@ZF>EgNWQ|y|aC#I}sb3!_`8ja1Z#u`fclg}nK
ziXq<cFJGn?g&|!E$(%1hF<iGQqzPzpwGMdYGP^xG^WIj~ZH^L}TB7NmM=6pmE$`%L
zL9VtfIIpwVb>l5%i6ER(Cv49(dM*?o$vaHVb=}h)(D9qz{F(Y*UQs)c9I{6ts`&PX
zNu>7uDofFh^yi)fr+dusTg^n2!UQ-RTI|7+nygeQE&>gIY#x-%B0bd5gG+aR&vZVx
zqNX9IhD^Dyo&9cp<)63Ajraw{oLquJ=N=B><sP*U50q;Hy1@DN$Fv=C<^&dmpj<Qr
zzM@6a>}Mms_RG}m5O(6ds{X7|i!Po8&4;ZghbiRr;5Wg{V4jAbjT=<7G#WFT5Q1Zh
z6?463J!Y@6C;SZZ_aVr1j?iC@{rLPWCc*CK81pZMJGZnvnXpMG$&mpgN)Zwbe2$fe
zP^9gXj(l}Ja}9zDk4LidceLj6Xq}~U2?-Sx<&pv_$>OE$&;pf$yJ+;VHwgIezxe6S
zTaPeG=(JCvjmK2R(J5J<P=|q_(A8l+KB2#=5*4a-CL_qfJ4m^)?4HBSmy;B&IOF&l
z-SMEq^#)mXFGtP{)~n*-GZ%jKHd_kRQRbXE?zr(SkJOEET52<=51(Ru6m!uT@q0Lp
z>~poG4Jf>yUoM1Bt;xMX!_lP=53Bn1Bo#<8N{y4$hV*6prXTmE$;Q=pY(f{eW%O^*
z7E>(F51grG1Ic97VXm*zL(mlMI8pPjQgw1m+#zW8>5?ig(V+O3_j%~I4)e3anY0J`
z#QtW`oV$0HWFBD2q!quKIHSyF1B3>kb)wdtneRCm#V9?z<9a@aQ><xkCptvKnxc~&
zHuL>o93RNv9*xlS(0e`Tq5u_&0+b>0Q*}Wbs^jr`3c9q#QpAk#J{VRc1x{DoPwA;|
z0jk{Lym#C!K*`4U)2EoEC+?dn29yI2^RiN}?CL;2J3V81*-oX$TQ}(x=AqgghJ<9$
zyjl`ioafUrPw!_1WMR0vJ2Dt|7_W^@F-K1DQo9rwYIf{Bk4Ws)w67w4*rGp}B8A=i
z^47I!;aPGZa4F6Yj)x?E8{&Zpa|3Ao(EH{B6b7lmC6>M4uIN;Of*8%@Szr71M9d%e
z&6l^Q|Jw1N#Bc>UuNtJy%o*Hq4P+yhV6O_do8R~N2sM6%U}8yK*pVs;nx89D`Ed9t
z%&#pgDrG;zTa0V5%;dtZxZS^<6*OEr`RI>H&pOQ;q?2J}3)14q$3GMBO+9gFXn5(!
z-Eg_7@UpJwKfu49BqpmHbt#L-Tp=F{_oMX6zN+gVf9f;~l|DRb5q!L{Kx#xkJJ`3J
zsbO$iXYj914bJv)VUXxT==Dl<J+G63fDnkaQ-S%>&6Kz7Kfw-rd_~3|LVElzUa8Ae
z4k*PIi);Cc8pbXpA#7B1t3W3wPHnZodg}A*>Q==RWita;2(1Uh%KMHhVtXipU}(4w
zxI3{2pL|m;)5#|fUSY}N1%gy7O3sS#VKQf{uIhhLY*-8mBkiyhbAca!#~&*}?UJ_d
z-&WX%qrJa6@3_3LD4$q3W@)}_8Dyj9yR{WF01a8+VD0akKPBV~+Xk>?(CgEDnTse(
z5bdl1aYqUaOLZdVUk9JZU#QOJZfyvVVilxb6%@$5U|W|X4T1)Xnvnh%gxu$CKn_Zn
zGI(6lY3I7h1+Q%rv^4s#P28QL8mzO@XK?f`XjThwTqW6P($oxH%S}swULUgRFKt(%
zpJf*ZEnK$hT#lIF;foB@U_{G}@cR0lNpi%Og~#o`GH`%R02wF)Pgxz<lJYK!F!LPL
z`PZMNyvdfx<-!k@)G>YHzB2u6QTJDn(RG72j5kpL_3&u-p&926w`VwRqUgqB1@1m(
z-XZGG7L;GEBsosHuQi<bi!Rp~hLymXetI3nus9ROFSdKJRR;xscHtD?t~@t1dOlcH
z%xQJy4@hFmUx#hVn;IPmwTA$uN?B|J)~E<6{GwE=zP4xK%RJ6?L$D50g;Wf3FuV^e
z%;L&=fduPJ*RTcZ_o}=0?WfGu4d?+VNSE>N&eiISnab)78?|FsC<1M{)1tbZyv(K!
zf=0~@h*Lbz%2@Nk3NS~IrUfc#7k5?fCQ{ye9qx3}iPxQiSf@PZH~I4NSHMnA$Sv0Z
zgzDn+q;<3})^g7kOG2(eLAQZvRhKw*KPyPpai(1L*Glh-k4&%Y1e&=QfaWizwu8|}
zE2`#qwE-zci(@d8Npp56{Njq{0*1wisxX*4lgHm)&O(q4-yVF~IJMGW$Dej>wZxlO
zYHkKV01{1A{KS353#dN$X~4V*>A26F`BGVc2F&4p?s2I)mK9FQTz1a2$fmM>`qS;N
z=v@l<=k!vUF!eHig<sFlKYsnauTy+ZO4Q_$p<(AC__ycJsa>Aa8qD|ZIYt$K4vxfR
zTv22{kN#$gf;iSym3@zHkYu*R1<-&AK1I@8Rh-N$d2^lS;m8M0e{<ndQp>{jDZI*)
z5syHTs|c@~fG#G2^o#hW-(sBp#J&mVULP(f(b!Ix>BiS7t?G+I&uJ8-C6sMkDloIa
zp-<~(i?Yhs=8TEtMMCP3L142SfVoKi8{03kzwZF}o~KXtVGhs3-_K9u^WW)T@`?0R
z<@onN@4wq2&B^Jz>kLzgf<nU{rgY{pyXpI~T2*Mrg{-99vYv0JKmLBZp;9~JOc6A?
zRn{^!5vPi9&*vu43Cz#W_bRi~<q41d2Xt1Ymxr=E8e?(B^+RCxJq1u>XT|3oq`!*=
zrI5r9c6=IG=l}JzPFZTwrv=&3C(fgr9!&Wj?C$l;r>(MBrvwP|kVI{5K_{tD7jv)D
zITzhO>8B{toBAzfJ@KdeXTRVx;nW8*|6qARD%3;bkh@a2%mJ8QCDMybD_Ys0@9#D1
zzIacZCnx)w=JynQa~OFJ-2fKnQKO2I{d1h@8hsj{evbZis`h!<&sSs+Y+XQXwv$a@
z)8EQ-pu8&K{FFq0)Np=4Y5&bA^7M&s1B*xFK|pGnxQI5vOJOs#j`)XXdN%>$)bN(9
zVg%LaY{Ib5k^`58cQp!Up4<p@x}#9u^Bbfr&r26a1Jn+dF2kj%lQ!YiEV-M#t9gq`
z`E@c1x>7Ib{T|?a=S%u+{6|k_T|+UneK@@5TJ1dyc+%<DgB5&>@)*z2C@ksnR6qXn
zo0M<%(1vPfi^^RZ>Vy1&M;ej>W>3zyY@70ui7+|5ui~tWeLsvrw@nv&6ip$YAIQ{e
zSy0ViCNCFn_X^C<uK)I?zL9<O+c+(whZd*8qT0O$wR<=IW9$U)k)Gt!gJ40kF5db)
za#UZ=x|Q&q^~_yVP~!(}$P8XFpppZXlPFUKCMHZHIy?jA$m!~%ft>+!kZ7hV>(YPA
z<u>XJR1%3sU}$MQ^)ShCMLat7OiG|6b4h^-#^r$Yhx%o|hu3)+%Eb5`5aU1T=jVT-
zd;LE5(9<iQUwc;ur1CK!orV4d34@0LgQe>(<vnyG^AlQM)6Z_$QCa??6UAJBBxM%R
zJw(qwzUCiaO|oG%m^a!`HnbEV<q$|Il#LiV&0nQJa?7Ht0nD?Ic6YkS-LxT?vUx>)
zXa!TBo^w$OoIcDqQyu3pz9gLe85!k{kc0$Mh%@0`vi4v>t(iw+>OkG7(_a@9dD^6G
z@g`}QqQQHH=gDS!*OPu!1?4*mIi^^~`ZHe_TSgfVjXWU-3nTHKl^u*IpH5u$b(D98
zb@dfn4-7z~^3<0e%>`VlJ5iX|fOo3fgytEuR!K`uhEq<Xn8)ayjtj?Opck`7RhYH5
za~xhC@@4H?wKt_fRooAr39ZjK_0kxVDziJ!fp)Vc+Dj`9rNtJ_3le5AR2;=&T4C}D
zST81ttLk#*^&IloCuKALc(QH{>XROu0IMs#PC*t(*U}Toi1d9k(l}2L|N1(!XL^?)
zu<wUpz>#4nP3OgeQqSWiHITV@A9IQMAjf+yQHHZDM9nV~3||HhR=4f>HEKGyWJ{LJ
z<6SdMMY0@W8ZLC4g_btL2BuIAILU7a%2vU`UyvVnR=5w)<D`0c4Q3O~WQ%mZ@Tbay
zQAbY`y`6)F<cZ>UXz_-URHW&epvHM>D-sXlF)Z02m8HnK(L14Nt~?n`zy?q^94DD1
zsLo8QN+~D}me!#HtBZa=SB@Lqi#c%KFOc?oa$NAk4bY7AR+Vu5qIg52n@|(^tgozn
z`uRge8?BAuALFDAGR5~UMEyO)V514o`-(SeTG^edbJpC2W?V}0(7R*R660xIr_|o&
zFsfx)<1+MKQ@6aZs+00Tx~qbg1G7_dCZv#$w;18tT!*>qgZ=N@znIp{6PxSM=7LkV
z;k0CgtuemqSTQ%<T8qqnP$Ap*RdG&be05sO-vX6YptF2AaXInr3!V|6E_G*VCncFW
zRQ8SSIdNlw>S|DPu#dSD1kfpcE-qD+$H9a8zxw|ZD#MjTl|IAy8-iV@5$bdy#O(#J
z|DN`bITZN|V>XUIRn~VY2@?)r^&0j=nyU)-cW0uh$=Gku>Za?83@l_yqo8#tiU<{e
zJqH{h;pE%!GKyizGD2T>i19gHgd4oR?SRP?09q0b*4!7}r7RrGbPKrItqvpnBrH{h
zn|8ym17i2_#bq1U7retkTe7ZW7easxH-j7WRG4f*d&J?Hp4rE}yWiFAccWHn=clFi
z1zO08GPDq%3i1wGb{|Z@?G~+Z^}P`CK#Cr0N?SxxLwDr6k$@(&6dPF5MM?Kq9WA{L
z@w|Kbc6lqzsuN@ghxr*%!cFVT;2J-vj@00OCb$k3W^>Z2cm*h@VEx0jyQNH+!_6Sk
z^|3DPVav8ImRE7SAWopP+OoQpI~V1R?dq!PW}tc{+*i{7HkFJ2#b&_f09uzfgP4;p
z4)=W*u6=WE73+Lz*$k#k6#QQ3<+G+VnDaJm2|oA<z#u2F<%KMP3}0DZ&QICiyRPMC
zcr<xmg0OV8XjLq*Ao&a=4K=(U-T2z~udiwe8C1G*dtXEmTfnOEDocPd{|h!{8Q~<v
zG{m$Z4kY6%n*f_rf9jMu#_KOd78&=Xl8I8blA>5&N_yA^wzG<R!}{(JK8N#&+_l|i
zm{=r^>QFAOq{UT$>+|i98!vUR?^PjyaAR?^wr8GGP#P>{RyRtQ3(!iS4S8{ATXjO4
zTdYL1dO-W$(KmOmSkKqjEg9rAYS`z5@GKIRJ!v;U8bEO;aF;F^Os(|^vJ|wy#?g3K
zZFRx8K$9qN`Mjvp??cvi4$Au{w|y;KJ?cdsDFdwK(wXDDT1O(p2+*ZoW3pMC?hGmG
zzq&I`+Urm8toI2oM92Uc*4%O)YwH+ZH667UxPCS{J{-Tkwzf<hf@Z>C@4z`%39SnZ
zZE%tviVJJ|2vg-c_|7#JTHaxed!bi@Pf-KB64*oRp_sp-H6%(c^mVvz<G`2o$Nn?X
zl(6UVg%lq9tOvEOE#{c#EI4hE*~aSu<469(X@ayfzuc%`J0L(14Q70OdcwcPr|;&w
z_%7V|Z3A>us}X)b4bB!6#J<y>AF;6Nd>e8i0j245H8WDkCSw(|P`;VIyyChhcj8=2
zMNp_PpiVFQeWX_S>0q@W9mb2vZ*y`6Ngx!K(>>rBaR5-w^7S=!XPS`inDxfR?Io}o
z;cN9T%!$jCMyj+83<sJ5vBk|PkXEVa>Nt-d+IYOAqN%P#2D9)X0Q-)W6tnni9$v9o
z15mag%_3{haF<OHA+dv>r-Cv~G_;IbO()B#Bb_~X5<oD3(+4~wH3m|v?YkDV-FQA~
z6Cb6)WH_&buf}6kS;D);D<2#w#oko0TcN%mLXC?LQG9Atf;44e+3#^$CzCWmBUi-`
zv*%q;oGVOQHLU6LbFn=K)b8<XlS@p`kL0$t)&hJp;G99ER>#HE3F_Qh3Qz}=q7^EA
z;y8^jBHk8xSilZ8g<DbY^YG4~{x5A)B@#$M>}w8`^IWX&vL|l!P<7AM8(`SiT{t2P
zE|Ye`fPOnTu4ll-N`nbmXRmtji~J?F*>_$MF*C%xc7`t}@6_q3Q&$JtLV2+FWb~2A
z+JJynVIt@O>ogIF0d^&tuzr3%41~_YA=522{hR{$;j%iwly50il+*G`%knM04J`bK
zWN+H`!O-KZ>*?;V^IqzUJL~U&oMSM;dWbV*g@`7cngrB;#pTZLzdRTEzkz6j>GQ)J
zvmg#~&05>f-rWtm)r(Um-mSUi|D}O2nIb@t?+wqh!EIk-KbODz@;tpC<NWcGqUiBi
zWRs6!QLs3Ep+s?=%0126Wkf&@0mF8RF`oE}1}E}$`Uj4`yCJGff*^))rcBj)US}zA
zT9d~^F5rwC@7_o%24wzoiH5lj0;WJ(;&t-;+UN6is8hfNeFO6S@qX&IxBXKjd4=Uq
z%~4=d+G7@PQy_ur^oHu5kTR5<AZ{dSt2A7q4}F*?5`pG?tY24$)~4vmh4eDv&Sz(x
zOX^xVi+|LwdJr>ATmE#(Dal~MN<vr_JaJY<DS!$;4FR?QrQ33rRE)#J`xI`7DUb<r
zI_#2|c!G&mI<5da-SU|=_I~<wrrY!Bdv32hbN8V$j&K<_OL44Hg236OT`Dwh9|yJ4
z#w^K{q!s52`o4?wJ#FrU(2Xyc2zyHB#%)KG-MQ$uIg0XN;+7WF6fOA_?fc~<r7H@(
zm|7%~!@Tc{sH$I83Mp<@@eL)wtHwTblJX8!2BY8XyK8=uWZs}HFk>bP#&L9wC&{!;
zsbU<YC&@z1{4xrlbjg!CMay#5tkJF;kW51X4oe!y7H?fo!`D+y2fhlE+U{vdf8_%2
zpV2DJ+HaQpN0f#DoV9F0c^YD;iM1-u-wg`I@#>`iEK;XZ$|uuj=Z|~XKJM9FxPtO`
zmr0BkmLB1I?xFo0vnlTML+l^+nuIol{Rbb&ZEpGQzAs)Rk;}O?kaUiGYelc!u6cGr
zMmXOasM3oq(y9DJd56qM-_Ma|(!F{ah8H)+=ON^>LyAIw4k;#kiNjPcr|RjsdxG4(
zqjG?kHi1CBglQ}KUhvfovx8$5GBW#b*M!tWZ60ju0XrHH@pSdGYIy+_#|i<|={ESt
zNM+szQch11ZPWBS`scS7T>txHwI`EnI=jrmF9nj?uoP!Nc>!wHD*dSLfv&Dkem%F(
z>5@-}OQoopzxKY@+Y42YH)3t8cF~Q)^B){u;{8Yu@+As;YGno$Nb`dBZ9oy<O@^{_
z`+PPVkI}-_>xTIA)>odbD1V~TmLP&c%h^wdoAvYJpSY@WUPp~5Eev?gY&{K6KXp!+
znqZ!aJKcfF2X3%Q;W`O&0vo9JTujxHsgueyigUQYJ2UR=kTrEh1#ioN%C0FAO<ig~
zu!CQu^+-gCkx+s8S<%s;vE>cU(1bG`eG9>)^zhpwm~@fKufQDM-Xh3L&fk!E$@Os*
za?*`2vWor+lqdFJxre^1<E%%fkM&&BYThbUx}|#9k*Fh}$Ve7>+Ajc&xV(lt6KvpJ
z!<GP*JebJ)J)Blg<Mb^+L#UR>uj&#{-6r3Iu!wpNyGH|SHbF=M)Sf!Z)gDb3_mVBK
z-|;1&S~HkwA;o^iuoVq_MP;txHbCvBURWQ-tmql%B5u6@h)&c9c>5R)A(i6P^ZFB{
zJ%eom`rkNA5K@2!v86d+Y1VAibM;x#_aK|NuI1BDc*27V0im>3Z=<I+O;<;uGaMY#
z^W?)m(BS>zNYRG#bV)CFLuTJjmu<x*Gsg3RNBHEMfY-B%Gn<Y{yC~-uKir;#Mt%Mp
zTI8VfVZagwX)GoYYP+0$SH-wHN&oMcwNqv-a&I8^A&7gpTuvG2l;x>;9;}Wu`@2AA
z+|uRbr{m*yd!v^xsfnIU7I^|wOm2T|)0r~!nYyvKV>|_Fo8Kpc@9zes`Ga3<0@5E>
zcX6`PqrK&rL}rtQEW6XumsYYPr5`SR*EQ^bZc0JM!Sg=D7X#?rU<??lhg7=bmp(@t
ze4pp6nt-0R?ZG&>cS%vLYzVke1CO8tPf0*Mg`ZpfNHx4pcU7DPVp6A}Miuz+w-4Vw
zbPZ>}dzpEUi`1yyj<H=Gs-KWL)JxOzGeEO(J@mW^2~odE8@Yz_&zd{P`Px^wO)&(j
zTyf&v_PbN0SG!27spxIlBnemX!KU0U7{`Hz&M(82KDY1c_x*eK|F9q9n11>{gzx`+
z#Vcbb6@jRS7>b)s0%)+yWiL=IF-=H!;(N!I)DK5oox17+x}rLzPfR1xPpa6N0!=b`
zLUdJ>>1z51ikRb5>-LnpSE$S=?Yuo@3R?&7q1wwAQ<g6$NSe7hat879VSmO)FP8^?
zo;p26td{SvHQ{;g*<vd2o~K*MQpV()a7Kt{SOS*)1>D16?M9<u_Z+FIbLWeL)JwAQ
z61L|{YRV!>p7yNZ{HVsw_S%yq3ms%*NCjK>iY=*ONvKv?E~^3PCraZ_d{FX>LN8{{
zmeZRQQr2Q1o4f#TK#;#&^IWYNaWmaZb&#LkQA{^!dhW2^PE~#s-=ZM8yDZhces^0O
zB{4<#_2t~X--qH?S8-0|W{arFK+w??JCsQ%Oox6NLai)mJopjPUp6}Y*c_&CayLm+
z7-IHe@KUGjf#r7T3UF#U_9h?ukGWTW2=sS4_TZ5W3o)m<Lq*tY*iUQfSTF(x>0l+a
z(ikrvNEfE7btVr@*Ozo|ie{dxZMyIK)3dZp!sS^9Ou`i-Bp%wm%bI3&Y2Q_GP7?$n
zOZ~J3_u}mR6kHS+D#He%S!q+SzK9ZC`t5Y~A^jD|XRrb^J@sMpIK9lrK4ivw2wTIZ
zeXP??4Qs-bH9Q?$ycCb`l}yR<5{>#P5ve1M)0_~oHvGhT?#Gb`kfU}F@2K#nhkqI+
z%RNS|;o!*^l$i^YGgfe29prmck?eBR44j%q$PB7GPB5HV-)Xw^14;BZz&r{|&Zy3(
zI29juvtYTx5s~*oQ|y`k6P9SyP#SU_>7KjjhOE=KJfVt$5^pRf&e?jhM>PFE7_vR7
zgIqZq@sUk7#OY?eY<)HlIH5-7sl&`eQf13#XnydkPzkAAzWPcX9-m%_5ama80jOcO
zlfVep;Cn%C^6VyM%FQ+lj-s4XUDWhEx#(S7T|3FPb6m=fJGIC9s%Co*O`M(d0u3+^
zTCr+gCG~ppn5^LZjdbMaCLkF?#a&i6do#%Z4Z?sFW_d`t+^i+pB$Aw{;r#4OW)KZ0
zhySYjAaC9Z;_6|DK!BY$fmQTCC9prbzQb%&4d#6CMSkl8{Zof925W^)_gq+TqKsWT
z_zQK|3}kkg{=w9cJ(q8_zRzq|8XNxqYkRXL$#E=M@Yy~iMLI0Q-FzW-fMAr2i;rA?
zntJm0{{O!i=mD1l8cBfWaU-*0Yw2z+5X)Kgs2*ZO_vSYI>k!-{(swsqr9hO@Zg#io
zta_7xQmaYui&V2Qj1-hQdO>T8iBjL^=jTD0Y;ou#$H~Y1`84rKH8w!dY<2nEfVG)m
zQd?T0Jm2DD!k$Os-IHstBIiqi(jGtm1=exKp<#6|gPq7(<-qVyJ-SF`9*zy57&^bR
zs`&!2ZAce_UjMh`M~PPoMPJjabf<zw#I|hU86WA7noGley>uHFlxCfhcq&L4?*Ky=
zYFhpPYu5#3O8tyjvb~Tsoc~K8|C>Sn_rDv5>eBy1ZJd8_ZcP8rKmD6?eXo@F-YP1_
zu4N8PIV*Z39Dq*`oq5g96W8+{zdZQ|){g&W4DR>&#OL)#C(Qt`-;_^RKmnBVpn}ab
zgUS{4Ny-s_z=O&f^YPDn`qQL}^Jmz0Ix~%fc#ayl2Du4$mjG1cC1d2LY!<m7REJQ%
zFVD}<*O!-<+wbR>=jq4o`RVz0`ZfLM_wVPY-z_<GG1fX+$);Euym!qNvb|wXfX%{g
z=sea|^_vC_E<H+z=k0Gzn<YlR15z^8J5ijju;l%s(<$+j`qBV+A{+U2=h^=09R-B_
zpKTMQb8hHtb=hqNZ}e9Z`D;MxybmF(oZS8J^lv;Z<WCjU6jULN28YiqF;xnrtzj`$
z!^gk)XFr&K2S;W{k;G`->o5}|A@f<rRMxanw!t?@yH%xC4XoN@i<?SiiR>1jw-A>x
z(b(YA-oV&fCUH|V9k8PO;UA&qBqgXUlHPQnVr8=VbT@a*`6X#!1({-L%_BgF`s?|s
zf|6g#U>bmKDMrjVEzHF*__(^NM5{3$nz^FqHeCdC=|E|bTaO&*=LS#`V95iR{Y{Wq
zfX^I>_y)TkZ+^$$E01<nj<pN9Z7)L_9Gb*Jpmb&Dqa@S$d4{1B{qE}<zz|a>uw!Wb
z_4)ZlxIgjRvJ3GmMI@M7aBleRUV8Przsml1H$n_@=l^{Q=a&Efv#`bpq2+7R5fOff
z6Ne;oheaO$j`MwEbxVS@nC;;tcTlNd8It80QkXQ)YZ#HnjnSGYl>&b<`<p4vns{Si
z5Tw_f?>bXNo9>b&plU$38%yzs;MUfm)d)Bp<hAhKJAOObRn|%N%2l8r7wRC;4IW+Q
zu-{^yuXcgNfbJ|~RHGrJpy%6nt{1GgWF22p5XE(-jveE?pJWQR*|a)Dy&?P1=3JSJ
zBG3}|sNbxRE@6hs)ayp1@h%g_0n_nUeu~~sfBZ>L0gDQri!E@bZ!~4`X*H*EXay%!
z0k#Ij?%p6co1Z9*7DHkN<Q%QZ8<IVcj>kolDq!O#DCU&mbrc`x3^Vy_42vP=D7Jv-
zn^l?7#j?7xceM7Y=C~Lw$88&lnu{g+OH>wBo^!GQ)XhNCV~P3I7NE<z<-4`l<SJp(
z7en<X>YIL~)J#{Z>EL$awJH|WEp1TKT_(TkR-A4{Zvyct4`IoZJ9G**C31`a7Jjiq
zIR`t)Ec<kflYYKn>^BaMOrw4yCN6rRuIjca<@Ve%qGE?e2;9oqmv;rXS&y9I`q8hO
z#NDw;S{%SF>HR>R(DmN15);SiFvv)b>Mc0A&p{l!iHDEAZrh<QixW}}mlYbE;CGYH
z{qKgW^b$-z_G-glAh|J{RY@VSf@C!JV)Cwtqo@~>Z5a-4e9-YU%`Jg(+4q}e`*TBq
zv)85|Yd~oI@g&0)oiMdTQHLnMY7M&(Hz-fqw%qsh@hy_zbfg5&*d33fg3tY>2CSEt
z1mz`trxY#Q@iG;hhaK-GLfu2g+6*dxL?t$=A4%htRYj>o{uw8%$TK4UlWipEH>YQ=
zrXmyBu%%#%m+msq9H7vN$~DF+&`-Kh5t{b?YK$3PVtb9LDN2PhVY>L?2X1I|VA-_3
zxL1uBCEu!u4`lSnnsIoK@7oVRwcH@dKFX(chFI~k5xiK=m&^Jp;?lIX_U3LWFP^8a
zZ|NVz(_s==3U^i9&^+mYWhJK$$8x`nb=nIq*Be`~Qj08IFt?}YLC3Xv^>GZ?HE&-C
zM1ZG_jFc^~WJOZC6yR;!hB;+e*4}JdQ+uwiW7?)3Tn~$pE+vIVGxmX(eEUKMGQABg
zTjC%O$m9bTPhAe<+mP;C1TpC%(=2QCiWka!&RSd675}U8p17wL(D8w=fSAReq?SPE
zDCDa>mn%#w>WH@60<~%$`=Dlo$Rtw<&n|5kOMit_5ZCbMNFU;r=R6Pw$0@-E*;Gje
zG(EPm1PU|3`aQ69kb2^xNQ*}|#<#Q0JP@`W4U&Mx1-)-}{);lc8{Ij#Kz_4>Kk2Nm
zp}sdP=+ty>^|YT)RA8lN_}foyvcoHiBrW0^XRHR=wpD{gBA;|JJ#~CeBfU~AY1N~Z
zK?BsIi!`@Q;@LjVY|`i;RtWP;w?ewQO}jRvFHp28bl_bWUQ7rtLwcnX+LZyCm2F9u
zC)QCqS+d*3>BLwHI<=yK241OCVu{(KdWgc>2N?$%ae$BtmLZ<G#}rs|REYoE%~<2A
zaPhMm(8QCmmL%1+XT-=LI7^14>O~9C@2`&?wbIq}Mv9f<IhM@KIF+!zrST%Z3pCHS
z5`3~gZ9scjb#rp&JsNN1rrsS;)poK{^y&mn=9iT~u50DwtXFzsUBgJ^{$ba7DYFC*
z+aQSB@HH(efgZBTWUg?G^80Q{3n5UE{=)dvJ&Vjbgfbio1NezK$`(i+X~fYCR3p-U
zvRUbWtj93t+UQqEVx1ahDC<M<s-jD(z&@+t{J7)}m5Yw6)&C7yNE8G*8g+JoZPLKy
z!a(uNM6N2(&kGkN?wit9-8`Qup<D{%9V;CsHfE@^u`K@3wc*?@u&^suoo)YZ!W4?(
z%x%*AyZ_dIT>h#5DBGk{A_@YDB+oT=O9rm2mV-}>57ZZHcbM50j}i2lQ^@;>c#TGR
z%EMJD-I#FtVLOqfbs!s(ib(_H6lf{u?93tQ`;t?l9w3xVXNKloJYwdKBUK!!9s|+(
z+FsHV{L;^~GO=1@d0NC|E>0@nD~0L7`3ef1=Fzeni5e~6-$Rr%9@5`VYeZz`BPN==
zwEVgcLhuFEd_gB(aUJVLx+nqFaJR1`Ho0kMlY7~R6pactHyXGSm?Lr86fd=`sqtof
zBLAMW_hjt>Gc~2i_DeFTjQ!?F2T~gFB`Qe2KEJ<Gy4+t8B|i^$Hcz_cir<`i<2G;t
z^m40FLE}Dqp({G*J}YQ{)-hiy2FDv^f7=?|X)<?6;j?0k%L5TLjcU}F_VUO7iO+w`
zpFc59Vql}UW!xUrXwzGQ!8LSh6V4C}KUo3hSrbr^fqso*$_HEEA_o1-i{b4p-%^=k
z+`Z9$_?<Nww$uBGTeRgw5lq%K2q^{)e1<fXGDabjwHkJ6y#jR?)$5jWi3g?BIEaKS
zj?pDbkzUz6_w*B#af(7tx1wo&(B_!FLwXk&P+0(VSpu?p?28hqFK^R0$v~e5I$ix}
zNh4aqFf;q|g=D~_WK>0ZUnrZA`Y1ikK>G7M(h#ViCsP98$+UDn@?3GnWgR2ZUpPkj
zNlNdqE8Vj&{*mI6QN6|+C6we#YFPM}PE-2&x&`Wgzjh}HBa8q1@Q=`Y;h#8E-q9dF
zBd^E7Vhz7?b&VKx^jlW9Uy0HGJXV=J=mf<61%wuPY@IGPJ5OkebkK0LYB*W*%(2pr
zUH+=?u0wZNFg05}^nfcsf#ieBfX(KH^nXP#oR%sj+b};rzrR1Tk_ZhuQ#&WpFJUnG
z0Z1?Q?KXtxUphXU75R0<sYm_a9q(dEl542%VXlaG(Sem!b$r&=u6_Eki{7mqQ^H!#
z&Yt@Ivv(%GzDEDS3U&!FrA22ZyN5c?@l=Ij7@B43b(=2*)L%(ZUwg%uG!!p@nAk`S
zD_Wb)j@w>Y$5an$lcmVdq<b|Ou|n#Lr(cLuNaIeuyt^xv+ZLoqZlMT=!6(Zf{{ko6
z!M*wMiefq4+U=pfqO;PX0P9`TG(oYVZs|Qu;!7uy#_j3H)8BsfbkvGOInria&Ql8V
z^t7hV63-*=(%Nx~+0Cub<kOgEbsZCAc|aX3lO`lFXF>-9`-R;XWD)wM|A|!o(tPlb
zm_B#PTcY~)Q$!_+<^u;RleQ_lqNH3D1RN$aF_%+?NmPBmcB1Yd=#a`gDcA(kr+D9Y
zX`Awwv=oa9l61A}KtCI<sPpv+GJ27;q!Qgc!A`!U%#T0$@+Uy!^x+>8Gb3STO-m{j
zjPi;lHFvi@g47Dms(|5!167NFVL)0cI|<dw^!+tk(L&rPWY3Q}%me9fy_Y{Xjb>)0
z3kUvbx+zMajMN)krb;yctvHU}fBH5VJD6#tj~>8_@r=g<pk_|=Z5Ra^8i0au<P3F3
z0PVFNK-cg5NfDALQXn5Owk$2_g0#zu46REVP7Pza#x+gHse`3EOa}|7esM>}DAH^0
z;#YvurQhFIB7e>|K+741Sx#eZ`8ygGaX@ECe672*;8Y2o8RrdJVp88si%1VBJU;@j
z^Qvw|>x9c+6(wYi!Vnd{zT2(wJv1PLD?c51>!53<OQf?>m~*3~dJ9rTtb%ziW~({&
z5StSM;FutXTvWO&7p75AqPX+rXh>B<nm+!0BI3yH#Z9L3k4W)kzcfkZ=$;>@gqa<t
zslQW&`Ei-w%2;Ue#Gf1{il8uQ0yXs%LNi88NO|%+W|*sd=-XFu&UZfJ#8^Iq;#~N(
zdFLNk{ERjV;BIM4KA!Bu>vfT@^Oq{juZ@s2mLq)eWHoZ+7YO2=+jOS5CTQRSOeuIW
zIeMyr{A`i5dhF0k+@jjzt5b%@pz`*EfEFyN8`O|~U-<mvH65Ls$_FT1hxh*7yW!oR
z<wDIIZq;RnOjWdam0#y0<b_^C`qe<$^ymEv@|YR4*EC$Q9dAJ>mW}539<=Bx(2t7_
zI>e$0&5r-`qg)6_Qeh?=Q8~rW6MIC{f(w#;Ke3{o=$Z3jGB`;7&EIuOtZ%c>K!A^T
z9y&4n)9dbTm%=x#ww_;?ymVo+2w^A$h-aPX<^T#jXc7(aCDNq^*}?lKC_?%<BmMgP
z+CoZSSkmjM(}@D}HBax+Ld&?;TkHy@7`0P^q#0v_<jI2!#~LPDtS>2W%BxFCBOK$f
zP9|rjM?b_qj(-jTY@1EDv}36N2$@mGMDP-nF48-RUWva{5PsLK5)<6sJz_zoU{8Vj
z)M9p*$4?3A{Dx9Gf<&|hX<EzoDIR_M3ytzw$WWydC7`wHc$ri)&Apu3gDF!(`StzL
z(0Gie?I*L6p`~mDVZL(rP^ahKX5pne)I75-rUyQ-W*KaX(jo2qBLn+1zJ&s0ND)-g
ze=3nGnct=cq<KIqPi>9A%pLOnkL~+As;+MHdqBFJ?p>^`)cb3yNEx(gP5V26Wgh37
zNBX4W(5^`VUwWf>iM&t-6H;EXC-Nc4k8zm5kw-U*qnO>Lf6`^|5TLyOitq8Pmm|}M
zF~m7=i@nnXH1|jR+#jhOCPJ!5@$dNe#^B~>G$BQ2mzlf-Q@)~E4Y&=d8t#X*2>K20
zlzKm>3pd{Q<nWQ!Uu4mf`P-YKl~1a0G^c-zlB*xY+1Dxm{!dIdzUf1MPE=yku6?=j
zIZhZqy-q@_Rh&J1+y1US=GeiNnker|GG3HH>3&W_!!)tUV5V399_H7@^tZl?&(GJ}
z&~;Z5vNVIFiJZZ!EoE(0GGey$>n62wn;t0ia*6`EQN<R;aPbQ2=8AqD$rkrC=jP0V
z<r0HH4}~&STu>!aJvk<wMSB0^&jSNw|2sP6!MiuDDC0yL&NK9C)Pk&{OfWU*Ft@Up
zbL5OEePjFtaR%ReIOciIhD}iC!gJoHxuREv8so4rn$`bg8grKa*P4QS$?bcCDS}d=
zD9En^o#p>M3G{d-0_Q>Ib@opF-!_u+G54HyDC}O!HMJ}#B_SaMrB$G5O@Bc^=mGQQ
z+Tf_fC?*n;A}BRQP%1&+279Xp^Q&$~q#lw#AmuVe7k=qHIUyZal*Zq63(`wJiqs$U
zn@^eLP5ma_le${MY57}U2BcWf&;;|m{-bQRZid#8LrGUL`*uKG2%9i|Knbb^>NOn~
zKh_~4P2@*Po~cKky#P*~rm?68<$u&2)0QqiJRflJHtU?#H|WPDo?cPiOxExE0HJbQ
zyrD(tjPXV-t3W^2?2?$@O-naxZ(sYHxZO<^)J2tNT^`w`g9lM2%Qk6&R4Xb$v@Gdy
zE8Jiz>+B|-EI%}gde)|})bFn8EK>R-q&pwqKjMfyA12Gf5});Mx=RHl*%VBj8~m!9
z`gi8M!}J1EbC_;wGuBW}V{PL!$@|}#D#)3AHvI6AC}$>QE2Lr$@~cRR>h3{uPnnqf
z^CJD9KR*3O`m9=;Uj0*LvN^M-3tuQ3QgG50K(*ujJ1GML`hC2ld$YHj50fi3uGIZl
zdlm|?)W1`jEFCSoOZlR@VDqF-^XHGli7vBiBWvpv;&&;AP1}{bJ_N2A>4F~P=PJ<L
z8H+pfjF0Lllh74XCyA1Y_pH@e;Ph^C(rCkZ?t-$dYJniX<J|AT3~u`HYnY!KFw@B#
z-qql}(><&PpQz*%XEyP$UC19dso$E-08tCJ1S&;Q#kiZbyr_ezTS*N4%3>nI-_fu+
z$eTjTbVgy0xhyA(@?ezt3W8<}%;QnFhdJbH_tqVtCbJ$}fc~lfc*r~@kYg5<o@wS`
zdFcy9Q_AaUe263c+F4Tm!`~C2?!?V1*Xa1=p<jggQDj66X3-2)iV~*!dTyHf#pOv2
zh@MWz_y0>LNr@ER2^t+^AD)kmc|EGywcvCo9X_;cj7J<<<n=|z-rhx`IbEut{M_81
zA^j`hM*wT|wFrGf$9CkpN<I2}uy$C9Qw377<v2ffa87$NJsP9m<S4{A&OE8Lx6?|<
za#hL`h0Cl++Y{O~F7PkU$m6;4Bt_Gb=IXsM&13emRT0F(!c`b*i-r5oKaCHm?1J5Z
zHN8|{Tb$Rh;_Rf<G@Xpo8-d<?Nolh&y=KkZLMJreQpAO*;oWDw#!-(m#M3<ok0~b*
z-%@a*p=87YeGb%jVblhG80b&qC(t!0ehN_FQ+yt)Pzucv6ehAupx^g9;1V}o?@z?f
zb|Zc!HMn$_c0}Aznmzg)s85g6D_+`$t@^-{x>l1H(q2V9g1z}^gS&Y_ozDqL!->M3
zn(m`{RCsZA*?sdv+oy-X5-}fD$Jm6ftmNC=YOEjphV?XO+c^2iQyWJM(+BQ95?qmr
znp}Epx}aj@m+98@(D{}FqCE#rHqubIrLYw(>f_Kb?@)o_I6pRUeq2g$@7y;LwJ81G
z-;Q4?y*GlL_8vJ+8_y<OoL#-9<cVEd&W|!>_L*PcA^zh>7EdpqS6{xZSORU}bdW&K
z4=iiq+OSkD$*+D5=Enokb7&6pnRWLzP&tPAEkFZBO<WV8_%S@-<~Zf`0w$?lib{En
zGy%1CsgXJwYi|L{;`=!M9OE4BzYk8opB{1;ZRaqAqae3j1p2PXN6b{nZ%4WvM!VsT
z0M#f0hZocaFdr4vYu6utl;5BNxe_f%vyAam?6dh)AMb;yFsO$_x5r+NB5N<0Ba=ww
zeEj|0#ZO}EHN3R6ZtuZ2L$vk*m?-Q8NrJUwgK<O|cxy}tBD=RNYRk}AJgZ@z+xNb=
z=@B()uLfNpP4CsTo(@C!rUmH%P~sTDH)}P2ty?}j)WBglO013fKowHZ!fI+d{Cqi8
zA5A=4GnuGb8l#tiv|-qnUm?XMC2L4OKR8k|>I6CVH(|K`a%$VJV&rXvJ*2)5kTHM+
zq+>xo2kM2+^G!`lx?ReRKM~oV`0g+qvneUzG(BH%UX7v4wBdTXYzc4lGwV^kUvK_=
z^LBhlolW6^>d^;XR#c|<b6(MfT2#G85jFSgw{s|h=+j4@802XwA)^SA5z9kI$!E<L
z)Lj~2-ifX(Kd!^@665dF<E&&dC`t1(r6eQ<v3z&4D(?_0y8`m#^YeiKO11$7rwl$i
zwhsmO%@-jl1cTBa>p&j~^-Mcohs?V}Kl+Q#NSP{#hv1H-HRVXZfb`>Y`it*Ry6uTt
z()GO`UTr^gR_fR1;rbfX>x;krzRu*_$raU>avN5!Xk~Nbo|<>IH+(ugA@gMiN5TQ5
zwxCc2ar5W^tY1X>_nDQ=#a$h$emRZ9O`vC*bd-!y1<<l}OG6F~K%+OpvvDy>m);&P
z=8wG*=zFI~&%h;MNkTmnF7er(z+dVhF)?&2&bk4+tY2OIr}RL^0y*aHa65I`^I?s2
zsgs~a)`0YaVTL^q?DOkcs#l;ff>_jtU8JM7S_{%p0tN9QKll&6pK~CJxhB&=4~!FD
z6X@Ar(kne$Q`A5GS2|1fzT9~D?OUepM~xM7C|QwU!d>hnQtZ@#bWhtkj!9D=)MGJo
z+M-^OzM@ntXbuJ0*|^-uslmJtN8{c4o}U%WV&qwA!5Z`$-5yY<F4?R7G|>B*#=EJJ
zuF*mTEj&Ycoitn-AVE!kp7tqmHry7}uN739Wp<-6O*f=8&fdsP<&7t!t5n2ojSt+{
zaxotcZD)-fpRH<Hd<=OUqq?Xcmrecg`B=s1@6vnSCPEGtjK-X3xui^6)hjhGX$9uj
zRMRt5&a>;=>CvM<>FO%H`RS?B#oSOR#)L^SUABPD{9LB1n|^Q7KV+zB!Owkn+YGn{
z4T}I<QjE80%TtnI)@;3zVA8Oz_q^;pqmx45==3^iZWj5c7O50uP|vr3<*$@obhD3v
z&PcD2+IXF_QRjZJ0j6l5#;;ReYKzZbl+2*7nBtT=&g>}ewjyUYIg9^!WKfuz<8(=p
zzCTCmSA}v^P3k}^cWO^b>1kUk%11Shl<m?lJk2X{(u$gMI2Du94TFq9L*K%rtoLD|
z>U2k0;%oUmpa@)ZyYXiWy17Sfn_Q`)M9;xKOR-=-0kzXij)$`gO=0c;{o+8w5a5As
zM{AGi)(>F^sPJXCakE9@(t<)yS)`E9^Uin(cV8Z(?6vj$IMjd5*Ht*}?yPe^JYAzS
z%5Dvt!{b2x>_5;1w8!quv$swD1FCJPRZ&jj32fK_s>TGW1*nZ-a!D>k>REN~!FSZ4
zFY3|E@v3<Ai#0{+4B^iPs6PFWp%kTGY1E&dbMgLvu}f{bnoI}z|7DkouIuhzQE{mQ
z-8uI?TipKi*PrN*n8I_QKF{Btbbwid%;uc54il3@Iu_4o{2N-DH0&U4y37PrVU|yy
zkjiyV3TcQc(2t9geRR~j?T1&l#3p@O&|Ck|a3flIn(~rBHc5)7dL>kW5%E^2yH}J0
zYVw|Xq-K^f6Gb9$60;d@3+vd;2{vo#*0Fx=iWG-jj*j<Il?`2dI#l3l>OMf9-m3kD
zg>DS>$4@Nm9*57LxAO@YBZ2%v+O>uSCkdwFYf5jFxYSUlzdn*x%<hQS`{6NJ*ZY_M
ztMt^&lLdc|uzL2M>g+pskM+MhpMNnUu=bz%5kM>*+#xlh+h&(aP~yCY?NH$N6sMX<
zs+~b$tF}o$FKF0x$G!yHBg554N@R8fq&2xqT8Wh!dQjPF!5ZDp+msVNcQ=jMW~D)M
zqXF`2f-2_4s(Lf8s>?RyEl8bL&Za#XsIEhf)HdccAF=-X-9;rr+tE*VD!Mul|JAOz
z{>{30it*+uQX*`>%z2Z=Pfy2vh=vpzn%{<|@`8~ht;i0PTbHHq^4DP22Os+TmtQs;
z&l#UizXJ8d@XyZ(5?<S}O<IRa7Mhp-hC$&B?&UACB6EfcfhZ_|o1W=I3FxFOy2{k#
z5tw$ZBEB3ef%f-OF?GE);zvQE8viMdRk)aG$QcKY%!wLS^|Byx!;Vh*DP4<YBsYbp
zR7f+fcu-4|3KXVjIqM()bb8v>FePmZ5M<JuTKMCxq53Ss@!bIRHXIpx#(OpOH67O7
zmr!%&hkQR}DrtQ?eyq|No8_OqGGkq;0&~0pVnt!~0l`|TOJfwG6TRV2|7iGNm$TIQ
z7Mz7U;*8cVDIos6ol-5Gr=J8Y3Ji#*;v&qlq?T0764d7lxz1&fA0<eFhwd&weq4-s
zeNF$jjO~;$|6|4<Z^k*}{>S0>(|d|)?1qhk@%M5eD4zeHPn8x=Wb+T9yIxCCY}KM?
zPF@Q7a%x?hXG~g$a|QBLgUm|tunPKXmeV@Gkh+KYUBDds5`(*I@)Un@`cUO&w4&f&
z!TUuOVU;ZJDnAQr@2=;fz_hQY{%ES24WFQ9r@%ufzJCfK?W|r3lu~9w{4r1<^*#>i
z7$S+nfwIzu81!(}op`lYUgd)omGt3LNM&qq%k-}hfTstbYlF%o(^X+5hG6bvRo$)=
zX23zBI;CRQ)e5SjQKPn1J$tHb+n52;zmP7Cw6fk6tE$%pfDU7jHKa#X-!Qh(&Nvmw
zMS2hHXiuNND5^-qyJ$8$)YWl*PTu3Afv>)6q@S(+=_&uSBm$4lgi%GPl7KOK55gF)
zTA)_68RES*!!}TrfvROHLQ|;<qoqb@cBx9vo3UnWO=jP}gC$K!v5vpa)clXsiv`mX
zfYWDstx)>a<xXW<C*w|)mvT+NHnINEfAH@g|L0E#f3AQ0E&LJT_9ox#zv*voA%qqy
zrH-xYbg4?EN!d=BA^?;rRaq$VP!;kdY<IX%KR+t3YPzjnU;FJA`Zj|?EM$H~b8yN*
z9#-)k!0XgHOc7umY9x`kzC20~A1c9eKtlqt0cF7Aty0{6T#W7tRiN1wY$s<8ZUPNj
zU5ZiGDtCvjC2|ozkr+2K==*wkk9pp-lA3A_n9nmmwu?a{QlXc0)zHkT?8t+fLxon$
z914&!JiP?9)xGT104H06JLPRD4{j}CSuCJ>5@W;=r}t~HOWe{KN(frOZd9vAH8D?O
zVjyajV8szw&lRm<WuPE&8~DSecsa=WZ0z2!WwTiDw%UN)K4GCT$s*}xJO#Zo{5eR^
zDmttAjWwg3RKwx=%srO4$^6>ut%lllFiW;bss|cX-Ud8#K`5A?g9k5ypA!GF?0!H;
zo!o%fnlgDdcmvWC+pwe`={jbTq5Y4eK~eY>%k<Rx^m4h?W_8QtIhY|dE(Z<nkB_Xv
zy|)40pVLM7>+N?;QGt4!s_ov+MxcAEI*D*^O!DS)C8&3W8F^4U`3?xh4#>u3^>y8j
zvS(9+=wKjmVh2bjd#4d(K~j!w0L&lHiK`X)g{^{2-`_D6R#!Gx#&>q@xZ&FL5fq9+
z0k>+WS=N$(x^Bq@HQlK#Kx@Y--Zd^nyNINNw_>^mbkVd1R=r?VG(6FQl?>kZet74C
zJ8fe!kM=><91TIsPn4E0Fd8e@bfOSoRe_=ci^?^A99e2VR&2|C_Vf_I$Vdk`*B8&L
zR;$aE+*j(DFW0P;VS3}cIP~4?e}?#<^mhLz4F7RxZ-saw))v!v0!UzFT^O|FJJv9(
zHA!m6;nJ|MmxIhTo2`YWkVQ->!N>qbQBd_k!e)`b0r<mA{-!HHb}cwZtB~husN1E7
z-{0+n)w^0xvpt#aq?4sV0al(P<#c*CS(R7!X}SZ#`|?J);l19x)Q31{qGC>5oJpHi
zbStFuOJ$I_kWow>=^t%VI%t?iLi&xHc#1;Yb=eNTy)103jP4Em0F_uFof20cCSS0Y
zHMMGtsd}&~Tx2@(Zi`^kfRw61u2&eEOW~g^Z`H6M6`<GPE~^7w>va05Pc_+VZe%OE
z0jZ^^3gsX{Ln{2_7jM7JHn1&7kLeZ7c@;;Ri@}DvK>>za#;%YqYucg3-kn3b%>s%2
zI#+F5Qz{0fWHo5e*7!?h(5~Ql3TerHedMt3v&8v1;rzJhbX#ii?6iAM+cc!jiZ{#2
zc*D#G?bOZZX>x#e8}=&{`5~1Okdp0XxlfItA%*Gdce!hEw4yY@u*CjytGW6czrJiM
zbC1uH+xYsE^O6Kl)ft+k@*L`Np^}O;IO?^J|1o(+fTmiw*oFPzU$s=NH7M%Tyh@QZ
z4$ZPtjigs<TvcVrFQ$(4<1c4alJiwDIE!YmF#PnV6;?~6s{HuwS8&c}D&$psLv|gm
zC<4V0lu})=ojHqiR7QgYrtdm(*X28vk0`+DY*H3I26{M?dV6qsHV@OFM3oXnRDsxt
z(i$l_T*xM*2cq2WQC<bzs15_k9${4|Pd}%vz^9(NYc-tvc}igDPsjd51wzxJquP%t
z8-a#%>Ln)B6wJ43NI(9%wCEi6*{nIFCC~h9SFC8B_^X2a`1-$b@^d{Ed`hIJ=_l)U
zk!q1A=~|KYtM3jo^^lvxA-+CvM|tvJN^m>4Lt1w4!+OybuW5QpKC>_60nV?B8D#%I
zXs9Z`7p5F$P->^MLV5`r6H1|Es$$^VOxy~o(_<>krM{wHYXkUx9__x46^fT3nun5U
zs;5I0FD<S%1-Z@^@2So_zy`FJ*Ws5{6T;DSX?0gU*sp-iLArh+(6859VCq+dZnQ2=
zXsQeQ_4S=H>HgZ2o{zJ%aqTH9iG7akl==&BS23rq<a&K)^$b6<>|56NY0gOX^zV<#
zs?*b&wgJq`QxF&w&;7B9yW(n<@Zc+9W`rMnZ7*xm()BuT2b?(35B+AiItskVEw{kA
z0_0Iy2j5cY66?~r%8`}4d^YU^#<g0Xwzv$~Q>)+sCj`2`Fj<-2gB~R`wtM-=Vi4oF
zMZ?BD?b%=(BNvBGfasj<jV`DG1yqup9>c*JR0~FM4yVkY-fqNN@_|7sN5>1jK=`!?
zPGTArk`=qXXb}7u=w%%F>|5o~8zl%mR4M`}q1F`Z$>e7PDd>1}sc+QJ%b;%S9`LdH
zgLG|FOM##j9OkYFuM$w}%R4|_?y|lOpl|qbrJrBkwtsAQt0u8Tu|c$?f`ucJ4M}<y
zuPj#{O|NV&u2)yI(yogAMsdAPw9QDtwZudaMgsgM&hglrCY)D#Ld9P-zxiM4t4Rp>
zrpQI}K+wr`$V%D|HJHbdwZ)3Y^XbaOU59{DdIGc;{5WN(V+VkiREid;pCqc$rx&mn
zvoyZg%C8=^>B@AQw7ed!(M2P$5KN`Eg_f5F$d?{u8`J;c_0`oM9-OUDuatsXZUE)R
z6o*US9WK)Zc6TS}$5^b491Oe5tWv}_EvGRfbIok~`ZlQR5#CL(8Pz73>u7S>C9K*O
zd@EO$E(|Sd1K7)vXyW+0NAzk8b%9_TkEoTi!_#}`7Us&RMf-zQjjSkmvay!OEfJxR
z{L|=jx!`0g3i(7lS3m#y_j)xgrk4Q(Hs^Yh|I$U(s!;cocC?GH-c=1{i~hXw_}H>e
z_d6M1O5g26g-~r!4?~SyS}%&TVd{8K84%b-2vlU{pFf}zJRhjsU8y7ez8qTL#f{J3
zHO(*k9wbXl-0~{0m+MU`NUi1&;%2`hhFOr-nTPD9X6Y}OokIR6@K7Zood>J}6MDQy
z`gMo&K90UNgsD4CYVH^BLb~%YXRpgOwM(BGS|!0Wt*LOx?QXd-6%?i|iN7F-ed(M_
zZ{5iYYNmr{i=j|<R?_M^TO!2^RRg-SrsWIuz|fk9m!KqFZ@!$xb5FX?XsFgO9$!!K
z!Ivr5*3w;ykQ!pt1^dgh7azKsWZG0m`Zcyx1E>8|C+u1Ul|+wubxoSN-96ZRpt{ir
zs{!mqm%(UXRsCzoP%~85E3p5dR#ZIs2`byC#*bX3I@B$zp=GqvKRCEXf?AeX3ZZ1u
z_@Y^1@{Dc$^M7B|Eq3XjNdHMc%rw0c@E67z+nvt)hzV|};A1cr4owaA`Pow%Ceg<_
z(v<eQ-vj!+gN)J*o9^7Fm5L}ul~B{N1|_P3s_gwLG|j!L2Bg){^jrsMVbi1&WiSq2
zFh4ve*9U3>P`9E26U>h}2pvb)vZ3@(tQskVhJGsXKBX%jsDe)6v;24y(8C}4p8MTm
zZ%>MUr1ypCDz=_MFfJ|W(W>lycnI<X9QElYeWjbpMSHJ7f7-lf)F*@FWAGg`vpH54
zM+K6BpvlSxr~^IiG48*pkKFsDtRmogOK{o84_sG8x{fBR8<2ipf-mS4lbJYl|0d${
z?X*5+PE4QyqcmPB&C3#8o-oMK{j!dfHtO%eUxy%D%Trxz{tRnD-Sxg40xN_RrOO<J
zMx=*h;19Y;(_;!@1x$hoZsxGYYCUPYw_tVr-7n*l=(Bd^sa2glXVMK7EHo>4e8Tk}
zL)KO5E_IZ6^nKqCli1H@EA6xs;-sLfpsSRgeIr&#iwZg@3N1+E(8c~bkW7i&-UF-D
zq-g*&&s92*J$t=6QEn-oj3<gbGLC_!Z~x}s38dfQQ=E#VMBy$8w|kmV;ndW$pmW17
z=q`JAt#E$a<NP4QPB#`$)34WnyH%w8o>Yn5Zu-1WNyhZ22bID&cslUriQsZ-<0d7W
z!NbKwz@HvVyqEnho95J1f?8@Ppe8_~EQS26!3Usj8lehG<+&tE^>RsK7wB!c-qQNL
zWNG(F(fwt-0_5(8pC&!@);uK~aUlJmpi$u!$0_)NhH)sVx}Ky(KE$Q&%<Pfkb|qT6
zRxdQte(<EJ!|wb7YP$2?IQL(NT7KvoAD@c!7uK#KXwk-@%7WBVxnA=~s$5iQ@&(m|
zG|B`?w|n$fW~o`tRCQ^Krnt}BVolfeD9lgQf!4T5{g0ATFcP6nD!YO`QGg9l&&Fl?
z{qbD|8*l%Z6=ebvYSGn}RhAy3BU-zR(}S2O&_|P@SXM6lCT;GY)mL?|7`-x~Ipu{o
z%c;TaK9EA;9GXlx$Il<KKt%)2{~IQlv+6_Ir=FPUnja~K6pCP^@ZW;;I&1uHHTK-e
zBc+O(MB1alf$1B{LzM!)j?+?whX-o?<1>ZtMf%Mi=)N6dHA_WO(k0~$CWi$7{^sJB
zqPY`PKdA%7MWt!&qQ)zwhyFReP3hFTHy1*mUa8QZy{N`MuOmNk_zV|XDkAH?jVvF9
znPlCyK>a4WoQD0{_kECOotq~I1zZCiW|WH<z~1dyeQ7{^ftu=;qRwN56w4$vG_I-+
z1K-8k7G*S_Ilm1nd&ExOo=t3ss)&+ebIqqi*FO|*faCFs%*wwlP))(`iIJCo&qI~m
zGzZ4=+QPn9>_Sy!KIWUC#!9vSF~&d2lGNifaG~P8Gm&ty*#weQ197T)ur9;CI>EHC
z+nV2-NPS8BOz{<o|0v1N6f?wzh~X<Z-z`@FhHO&M+Q+^qq|asQAC><mS~VEe1`+@v
z@a1LVTIuSt+4Z50`TXeb;n=@hXo)EN>vX-LqsHF@%mA_wTa(GxnftXLc0{&%^~gm)
zYuXD3Lr>mG6GE&@sxT(1Z-UFNi|dlSoK-6tBSA86Pj7+a>6KvBKY9YPntpw+E)ImV
z3e3+<v2$5PlgpP!kGhan6<k>{WVuS$0`{^*RMQCM`w!$Q<x)`h7*8kZ2VW#u3F)i>
z6@<&LKoim;0*vu*T&<L~qK78QDHj42<)Y40)VWekleBfHv*AZHX@?H>&mXZi96$YF
z&2#^nN<fnN6eOfP#5QKMZ*SPD>9z~dcL{wQY_LOon}rmJJU#gHqWYjvjBAScwNu6E
z=M`1e3GyFpx&LdrKMykf6iMqE3Ibr)%P!QS_8Uy$P4zg^E$*CYQY%Iw0OLRDcicuP
zAx$roo{yAYx<A1AIpO?X;Y19GzNt1b<<r-<!C0@N*C?{Y8x^Vjy0Ru#lz(+k`OjGk
zIw`$Jjm?yof5G$$9Tn88OIl&4hM^DP6hlES2|y046o@pTxTkmsXvmx@T<Spa`QzB?
zb!}3W4Fif}jN&0x3MCD{TGb*6a3*uTny^-W$R9jZ39*2xTCu8u=T+V4N11Vc?BYz{
z|KCAFRZmJ1s!n@T$8}|3b*RdLAK|vSNo4CQQ}AbDPiD;?R^@ViwYsL?F!{VCjMo<x
z64uCRRIwWd70(WOjDZSd6f6i-h^iP)$(p|~{C!2ms)`cy$=8%ep`#LQb1g`<%y}SP
zNjU&SDg+10gA|g~8?&k!PLr<pV2aVS-ozswg>CNq;!ZV?|I|FKl0_A85~{5t#gPhg
zgwMov<9HuL86Rn==Zz|%{D@HxsHyM=fXk`|iBqL~3tAF=d)t%Fc~Go;8vBPk8HRln
zpZzPLDxX`GEl|e>n)q8~%aNfXvfW#XSh!b(ki4UsMOF2s{`J#}c5xk>##N&FNRyf<
zlsNg`W0az(s}xwZ7^l1%uzIf?_!RxV@YAVdHOhTCNNA8NVKF@=B&ShnYOQ$EDLn#-
znp#v>YX9}wtjPCO8dazos$_8jjCdIMzo?>J#gPgOsu4Y@vYbGA+AzCbrku4u!R`k^
zNfcAcBi3nz5(75G^R8mWu>D2=aF<pE2yLy%v@W#l1Do3`FeZ1!Gr-?fBlDzk1K>-4
z-BPBDKO%tS)dfQlQW~fO1bz>wj@7Jem`9Zx);8T)SBPg$i~0gjoo-HG!=tFj)lG3r
zdJmwqUu@k%SC#Z3i2Gf|dpVk*Q9~a?^yi0D$hU%Y|5BiBi;4vYIz}y_5$Mtcdfy<_
zKClb&Y^uuns`o@z+=HsZ=v&hSwO_2&T3l+aZ=6n(MEwt@>x&YrlB>2wo=>T6*2|x3
zmV(ASTKx0J4yz%x`{6X*(z*m8peIq?j-%u;Ko))lU8#Osh3U|PrARrJ`zxcAh9Yb7
zQ4U2jSgBR560OUJ?$x4D@rvB~?3d5VKiLMxQFP75K7}eqy+R)Ks$WqR>VJI5uw>K#
z7b*LLnyTg&07ALqZPi&)rHb=@N#p$=9-%Fem@+Z{FivgD%hYk<Pg+#hgf$-iaNY+0
z(4wlb1e5|6o~@`NUXg+Xs_HNgK7QPxR%z2lQ^YW;(J-wl5*4mY8{FtqTF03_{`(Mb
zX7I+u%u<YZujuMLnWj1=MD=?bs41f=IINThpC<h`fPONd-|(u_J!xeC+ikds;n(1<
z*YN7ZAl#E1-dC}dQYBv{Yz)v7TMfGZD5{hB-`<+$5XH2useo6>?S@CHALSHR=jyDG
z+N7KTrH_=TXd(G74MpkK&eKU%pvCNQzwS?^TdF@#roOk(ULGjTpHYk_kcIXL^J@nt
zQLCIA;)@wxxO7&jQ+H^nxFw&EB`F}EkQJ&>G-02=!0Jq9D}Gj%6j*NxghE?p|F2Vz
z<q3Z{#wvHys`NtnbV(H%wN`iAG+aJOG4TlI$EEkVcIMbu_hmG7)=qPBdWS3lI_kpj
z0rg`qrM{l1rVrZe=+gfnFs1NBLPZTK`nLG?McXIj%)bR{PlFWG^%?4-^bJs{?r+KN
zFI|}F3gF#ot=AbfsVCKdes0lJ;)+uJh?6#zQ!M5Gx+tb-UQEe4zi3a^aDHE6k1C}(
zh^GEL1{88%X*rcm5g!kvbRk0!Ml+oTq<>eNt}6)Ys!P`dWJ!w?#n;rjSv$Fo6cS(a
zO+P8A^#`3a*ND`BO0QCKqKXQ2fz(V6r7cKNMVlg%ev?sgT9z=7ej-kNdZaLy!nY~s
zr@3)_HiJ6C`EmJWUS+)6Rgl;4@yGZi><bYf8HFTm;rUn~rMZ9MKvS-*Xc3#nxe~F6
zd)-$j+s5V*^LYsqt@K|_?ysrJ?THqozqIzH+vj!t3Y=A;n9OrDqSF*n@wv2eNtMVB
zA19Zzjrm>gOy?mQ5NV<A-I2`?Ls}$z(4&?1QU}`5nC#=(+cYn!&P1>D@>^TnQGWfD
zN`0vV{T#`OrN{5_tM6L|Jx8l=(Sy#xH)&waJ{Kk;eGO*H^<<uto!e8lYT>M@!iP#^
zK&YSID9I}lg%UZgS+q|ZkX|YoX$(U+d+Sd+dEn1dnMeAKMIGaPl^N<SPy_hsV4e+s
zrk_Fh;rz5m%4RxKXhXWCxSZ3d^*kSZleN8$Vd7r63~%>0{qr+!REb*J^;urG5B~NP
z1q-6l<J0Ntw|yvz{2XfVcBD@<Y||J|MkftQIiqGaJV{f~=ey#8;zHzUpK2<e)Pa80
zrKkFMnv+gQv$KQIWTUqa6;X70_1mDjT^%oaOFMtQD=E?Ebwr#~aCa(dyc{a5V{*FJ
z09Esx`L#jj*R_%-z@9<(v~$IpYS!te+8H9L3Q8LCK9oxt`N^lBQ#w>gpVRx)mCCLY
zRQ8&R&;qsEmJI!$$K*UM>CijSJ&P=gYo-TJp_I$w9<??7J;)S2J*@|5-)b}dn?3|x
z!SXQ>B1p*;uk@5a#n^>v`S^W`_F+EK$8mbv{g~vJfg*+Wz0#QXD`t}t)>+mmoqj>h
zh!(7o(-3k$k4BU!UwH@1?~xL}pCuBh)LS4e0#qX}QsvpCW1#l|YEe1EXz~yrNumXH
zBSH#JyA*!$x`Zja5t2+u>Ol9C=-ZM6JP=CLKoTyh0Ff7^i!;Rui9zPBSrb%)bqhrF
zSTE_rU9-{V)u)ICF0aKkRRn%8pUMU4W0+GbD*RRAY<^HZnL1hTU_)4}X;>F}gKdHu
z#qjUH+DHAgko@ftBTb!<BM@|^oa1cdLf>CWJC6I+fF9(0{Z9!=t|wmy4_ZpmHUn0_
zyi?gVp#^JvidwyyE-KP=)1>JZUfo-KPQP0F?98c-#7sy^p_GDFI*(8-S4<%T8Wc~D
z5t59j8{YeV8ARA(%{-z*qv=4UohqT6!?lMCsG{pM_Ni&Y`mC*RSV<A13-R4uUo@#@
zxmon%G*)u<0J$gSsL)H}rjDR|aj0@^@l}yxbnbn+I^*VK*Q0^I$B1WeX~SC1MC9V=
zIpf!(%)=qh@5}9`anIkz0W>M&opsWx+@nj@yxuKKTI48#xO3XdJ+Q&#w7<Tn-xvS3
zlq9p^*rba4`0=L=0HM5e0d(La>;O&Qe=>JyK<c`{**wwxoQphb(ZDZ9ufKoso_3jr
zR{#7TmtTJPbIYBw_hkq%K4|gI;wjl@f5@zpZ8wn$II5#6{n>zY=f0oN+WrCMQIg6-
zfkHsr$*9$mT3aN2(p+Y%i36ZF0b2hRAm36X{hn`ux2lonsGK6*z9xYNTRM=WV^K&e
zK0jUmQZ_B>^Ae==9b<d%fBw)WNdne{An%bNil^W68PxRO6|?Adpm`D{hx`nq^d#rl
z=kcx+MtMac4Q}2+_7X0xe5V#f6p(5|IEMP)4E0WlW-g++Kk=Wz-52}IChrknDnNTw
zLo|&;bwxm>ZD#n_)|3uaxD(2ZB<lq_1qo3;QlJ>!oSj|irD4{C6)518M#etJDAT`1
zfy&Q77x_H=Y)@UOBh=iHbUIzFC0}X<)l6OvO;97N?Ni3zAKFj0RZ=afdx7C$6c1Ap
z(s>G1bkO-h_p|}(MU7cru)ButEmd?`+WA0vSQyex>P<P_=Tz~cPJ_q|P=BGdKR^0I
zd*ZkMP-XeiES_Xd=#_JOSaXh=Zn94Tb_`WS5_huE+9{--$0q!i8i6a{RI$TEOWz;r
zDI%S&!hlW`BL3hiWUsv1My<-{gu9K}rbI$Fl}xvkg$?CCQfOzTjOs5S#S%ZJnUX@k
zF1-N0*P*CTxNy<)94hsu6dAf7p(LKX%WRY*sqEDp>gM6MqXdDSBkGO}C^3c(vy@_0
z!(!(}IclDmu4AnW`LD=m;%HUHd{a4)P#sy0GXYxMN^Z%e;pBrl`Z30juifqh47`1|
z_zSwlAA4F;QN+_ilD4BOn!OOGTQ;h-xslQW^|<A8?~^MxuT(S6_L`zXrEpoHM8(Ny
z>37;L*FQNqwJxhZ*Pc53{9*2-=&9o?*ZfYe*|K(>8ojZqP-$J$bMoh<beS0rrjZ((
z=#ZZ8X??q5WWLa!r>jc(Hl$}MN!xRXy0o8l<TvA?@Rr3rm*WpbcUz=X5O%EsrSBh>
zI%NoD?f3P`Jw3HN|07aurl9)&e5y_uk;QWa8Ikg0l`qOF!e-UvHo$vR=s$4s!NVE?
z$q{K;w9|Z1`X`?RG%0E@_wH3feoHA~dUWC=RccDBu%yoP<y4P%tew(zKF^}pk$zv|
zjg?Qrln=yewpyJ=))4x1UD^X?g>(t4J5o*pGpUaB>(cc%RQ$t?PA!8S{rIO-pMQB-
zEwNHjsrIH@!Ce$y$SG6VrgaTBGeo4<G9}g+^L*|)!udRVk*o@9U0b?>2~TX-vPn6L
z<y&^KrZd;DsvnooEuZ@~3VbhrV<~2RWT?**b6$9Y%1%EgCxwxm)bjbQE4F6us<cGW
z*7{3<s!PkCe%V~prJ;ffP}4^3Vn3(<VScLud$mqmC5$X@8t_h0t983e>aqht3(!?v
za!cdmkuo*96atJ-aD7Gb@k4R-%gNdIqq<A4!wrQLlk@*DF>8F4J}dTFKVy>`swoMu
z>!6=p-2dS&1jw?aHL#mRig+ZRDjVldF)%0K?Ng{`1aZHt`clQ2kotl@^EdlHHhum+
z7@Ib?-G!Ph6I2^i$UA|T9{cq9`+#4eoDr7`YqbM(d-D%^DTq}H;Y^M-WG4#Q1l6Te
z7$()JQ9iW?hid43+_ZAjtt48Lu79}BdfrjtHWVpL5M%GR3iN|TRQ;ry)*pU<shS0J
z4U9dYoT!VrxuEWMfVtGMKA`riZJU*!rg|lOQdQAb33}ZC%+t~}q~Dk67)?WH@tz9!
zh)149Pn||4ky>#cbpaJF_3cR4H&vW30;p-+Tcyf&`>{Yqxxu^LMU{H`{;g1ZUi1AM
z3NP6pHGO;ot318JYTbg>P5SwS_3IF;(e61sU9av2Yd1IT1J&4A+b|@6#ET~{3YUQT
zuF%d6MP&_cjx<D3Ths4#Uv0?+z_TG9NO*)(L3%{+cadmTmrsB#V%0@Fw*vKJWPinY
zH`u!`Me1yu_oT&ie@d)-uL`1CDEgJ@zkBw}szco<{q(Cj-?M`h(^#5BQk8N6&QLFL
zqUxSsfHOE_EG~dZsaCxE{-Rmt1tuQ>yMO_hDreQF{~75h^3qNE`3+EMAc)4%@roLf
zOnCo$V_#$65>yc7e2+cJK>Om-lB1VG9W%WIeo-WCc(g*Bc~;5<V4CO)TD2UVNi2H@
z<fUb0>lE+@L<w>>fEl_JkEdN*IJ?WPXj|Gm0?>f<GUriL|LX^t^Q`y>bf7<Er1#*c
z0%BGuY3XuN3$D6HA|Nhj*VJx|f84b#T6rLtNrx0kZ#V@Qk5e#9ASm<O+H$NU{L)C;
zr?G7zWizoB?-n?qLam^(n00h^ZGr0g%&gKn{;k8GV^-Z;OwyukDLB$1U?l;yZah<m
zK))~Zptt{P*HqehB%hhGRW#7SulBoGwaV<eCajJK7EY6I(%{69d3B5o{@9gjs1^`l
zq+<BoCyKsd39dBlfII?p4D~AoSGZHndu&Yvq;8f^Ax&zG)s^d-!Yi>lShQO!P`@q?
zvc&ycwo9G#h!^_8edkh&rg0UR>1c(aO6fvzS@Ej1hC_uyEnoxa7YDkE1Y<wyC(JyC
zcyn=^vMfsGQYrWl3<*48_)nMpKw0=bsG#z-T3gZYkJ=1~bMDWB`pKvU33&aD+CCG~
zq7vkFjsmxFe#|)cMQMJIi<$zgKZk3&v7|W)biGovq>lU30M^}^8p)Lh3;IcvRoa-Z
zZrAo9K60kABc)4wHRS@j@cSUJORVCuh)M(2<FWsKd|A)RsNiM=9R#8;9>#R#u@Ysl
zVyPL_vat22>LFcXG=!m~snE0F&sB;`ttjg&GEdgkR`ppvcN;!`>apiaaM=e8(+8if
zRws%@mBpe4^7ySIJ#4b}Bka>=kH}gO1Sn6<z!U=>VD%WT(|-l)1EV0Bx`;AhuPtK1
zg1S^f?Xrgcsdeo)iMB32q)eODNfE0^sFRsuQC0B-7#ggu<7^TB2oD#oHM#-d1nc5z
zRTrn1t5r;cfOV`P#b_>2F|DTvpwBgc?5TCuk=}Bhij)KfL@S*xzQ1N#ZzEJ9FI13z
zKu610n(QZ{mD!g>0w|aOMl+TTzBSU7@c|%ZYImt&oE`wr$)EW*>91bV9+GKUU;PPB
z8oSH_q{P}mmAH3>MIrb<S}n(4QL?%Z^>c<gm8-xA7wSr?&sg85_v`!9)9|l-U9)V|
zVI^?RtkSJ2MS_a-nr~FuU5sB&zZhMqYikP0Uvw=U{luJTmR{zcb+jUVi`}avl6}{u
z#|laS#j*mdU7}S*M5`vOth+h^G}G_j>6bIyHO0m0NcsgLMFr1Ja?N)EJvcOkt4PP`
z>iE}Gip)Y@C4a&cv0!$Y^A@BmI5s*=^{Qi_OYd^(0yX&kNYzqXaTr=lPipUN5d%tl
zakgC3EKYw6v;F|+_(&xfM8I^XrH}<{%I&j&l;>5T1ey~_<HY@*(mo<C4ugp9J)Z3b
zj<E{-s8F{Sm`Ub~T8u6PK?15(pyS6skbKkkBzw^!Ob>0s^xsPNLCa#7hQ*YKW?Hh`
zdll#R#hYKZNKJ~cceKuyt8!&UW4_WrM!QhfkX-0HeSJiKZUgDp9a3?(hOhl_vqM#x
zawSbzbb>+_E3xn+>pkD4iWz7moSq!Xs_Xf+8&UPk18eOgY!K4?6E@J?iY@D^g}^s@
z?^25vu=QGXd`O}U&Y8Ui6O4;}qU&$j9i=HuX<ky1?^8hypkU$flm4s+RPe-Ua$P?7
z&{gO`{O(myCDPRkCCugVkH74Jg!i=aQ*KuZ-+MCr<uFwN(_Qkhuc2fx`)NeAOi;x7
zyPKwYCETFMO%3{L{DM+^2&i)=d#5%a{kXhR`+V`Eu}n;Fr(3OBf+>>!fC3Ce^L4qX
z%Vny9o95M87FA7cNi}yU!wVXK@%kLq%iFdcQNvty=Ta7x{^N^BfA#&bO?4~ObC#q=
zwH2uy^izGhR$(*Xw}Ez~>@P$U()6hfJ@oo-gnX><mjV<C<2*3y*~d35j8Z{TA*H@c
zi|Uk&bLW>xc5;YP4*t4%MIG3Y<nAvxHBfxSESM;=Q^hTmJFBK?<r>&elH9uVJxLbC
zW2=wz^&j+raIzc`?V$oF#0vJ}PTOU#58nd&GCa}I4}VdlbjtrFvMTh}e8;zKF}Uvh
z=%H9&;L8(nD*N_zZLs_D<oJ>WpK)dJHRT<E&W?lY4IAOYGz!AsVgSHC!D6^7K^1*r
zO1eVoM#-8Mq@+*LA8P-eZ&pwP$P*nDwMt)%*_B|m%#D?{Roy5~X6r8u*h{$coxQb)
zVW;x4nw2SqEY$vWr%4K?;U2i)sq)yXU0z5kT)*G^U{g>v#1sjvkQ&!2tp^z}aBM{}
z3IM2TDXJNrla;S8;x3Po|JQ=`Yr*=ae)YFuc#hB5v|<p?FVD2Bx5-Oy+5xbS<;yy1
z0w{CuO*xZnLi$&0BCfIbfIBJS&>?+ox?evI)c_Ya;Mn*5sgQ8)TOb>RRayE^FvSW7
z=KIl2mI@7^FaQ2)e_3K!%+Kx9;cd`5#R3f*<XV9X!$_Sj+qrJpS+XgkE_-!xFTvdC
z#xm3Jdi-}@ST|l}XOGw{xZ*6ug~D1!4F%dVbwx|C<)z*H{BdBSfc|uiJ?Ae5f-3@^
z6{MoPY=MN;E&WS3_Aeg;TX_(gh2eDQ1Nzs#ATQ}Mchz}5uwe(>A0|I9sQ0f!RO-|d
z%RE&rbpp~)g*{aXb$n&*NvwY_a+%4&3f+J{6t!zDSsdGWl@qRpOY3s`o8Mm)UWE?R
zfe;8PVwVKfF2yS^Cn2jk)Zl~n(GinzankAMKZZ3Wq-zxeCs!3Y$}X_ITI5^G=DphZ
z5cKyCK%b%kjiQF_?WufwE?hezh9{)xq_EFlaKqiyxT*q$Pko3J%_?qZ2B*2<Q;}6G
zENZ~)wb=(d5!oBS<^%7kAZ$p+rs%T9yM|zWkTEZ-gk6a>7DBchY0(Yve(VQVfs*-C
z7KR#3{P_hS7fOJm%8+w#D|fq4vj>c6g8FywDzS*|!uku13$9@(wWW68SZ7Vo7Wl$-
zlrtrWX93f2NI&_PoIU=T4ghOHs8|Z4ix@jyS<9QMRhHapN4g_qkX?XJKdD3pPy)lY
zU5ZyYaitJhC)NV0?p2sjT**@PD~|Qg%rvLbFjx~h+^XDBov_ZyfFKn^-!)-%PB@n?
zE7{X^{~Oo+FyH?!o+AUy3=0p*4e-GW!39~UY<FId11v!KeSk7Mt2p=1KW-caXg^MJ
z3#TvE6jnt(1yr12X^K*tpQM3~SMxLdx45WR_v}=R7&i@RHQn&~J9Df182YrVYu;j`
z@pFevB^`d~SOm$Gc4Z-6P1zOhs12l_3@Ono;{DKy&woHkE8g~0w#P+Hq2adkdxs1=
zVf7lPst8mI0b?ydTtiRbcu_w;w<g0<e2Sd4Pcf?T=H=FJTPm*%!0s*pm?}ZRRFWn*
z->oTZT~chS45k~PfciB;{r)7gy6<mV+KZRJFl7IVR>ku(aaaBD^e*3r{=GqTPs@!2
z8=@NtW`O9P=u0-BDJv{p?yw%ODFHnqEREOdV<Xp4+k>li;L`QLX%{XCS(Qmu0ZQbR
zf@2%+D7G=&^D9cDYH2sNDzya^aX}mj?yj)%`yT`MU@re`Q6F+FT5{@y79#=3smoeL
zg2xk;QOBZ=_4AUpf8VE*^v<3t%KWcGFyRNa3h-=I$`&0sfqb)C&a(xn4Xm3g&;#q5
zQ6v^xymw_bQBA9G1;9re!VoE5<spDo>%w)0TEY39Lh3&eUca4<jc(<x%~UhJs}2@m
zrQnJzRg66~13+D?;Y`V2m}i0eG3$?IY2_guQ;Y+o({E7?G;$v*s%p3bTA)hje!&Rv
zX}=wCdW3O=s?rT$C2Rrs*XKw@)?02{R60uiTDAO#7Xa1R!3D%v5Z-)#q>2at_7%`v
z+J8(Jq>HnbV<I*v1HTdNs{eIJ2Qk|LfTTeyh=ZiG+HxVQ394O^e_T}cMcr^M>!(uH
zq#6;<%NXaRT0p}%pwp%cI$$BWO^lE(?SOMwbGnKxVSZ>`g?UiqDguom8TeM^GDd>*
zcJl1vF-#B_!w?QP>gj=iMIIE@3YYHG1jQ{-{bliera!cxoU~v%ISlri)uG>S;XU~0
zAz1sa-bavWdo{dLn;2n9&Otse5I?^t3i%o{suVoqpYMzne@;iK7q@GS7E!+N@hQfq
z{zZhh&_BIBr6>07s(z2_7-XUK1R2fr?yNjXWFh#gWymAc4Qn+Cd(OnZtoxxyj&h1j
zPY<Ka;fI8P23dL+rimrkj>Jje_R@gXJiqmCh6{c4?40y+SDv);QX&CKfiwgvI3caj
zCe{fyNEKC-6K%3kq+PPmaO=(8;GFgCff7{oaRU|22gl>2E{rldBi7FYtSPATBCESa
zWJrsOx?j?~cuw?nrKJZCtsdg8eaMqYo6D;+->-Rj_Xbl1O{_m)Ph)7>8#1+U{qjZ3
z1Wz@$Jnv?d(+AYgufx@Sr$*_MjMm0?WXUPnPnSn}I~A%YA%rF|jr)qZ?>)^V^sADY
zE1El6(KA$E)Foh!MHBs%$`NjW7f5`t5`|DiC=IP5vk#S?_pS+Q-1vdm)aT#1^kX}{
zZ?V%z8y0Mjf}H5eE`%iQ_bSfMmM&C0+lkKR?#S+RuN_)E<yPf7rD?B@mVX5G;{Ym-
z)!UPKPVW+E2*ZmRl<Hr{;s3r}_hqZQ2hTSRqi1h;%dF5;20??!KGjvFwAs4D;q>#9
zi8;PER5g(>d@WhjuBb(sj>6_T(z{yJ*yRs0zqG+d5gt(~BK|~2qN<krKCOX{WdHL(
zH-vRgm!dN<O+imhSO?aG^aqQoNL2p>)>y{$BvBN9;GB*|B&2GMCk=SMRb7$biA<-g
zBZaDVx|_P5>6BH-v{p3vy@E*F2Q+v^xlxN)P~}*#j<YuHi;J~CUEdoUW*AFPm#XLD
z|0U2I2g3TP3F-66qM2-yD1((6RDW>0s$x-#844@2sCE@8y{dQL9Bc_l77O(h)cRG`
zqD}o>fs%$^gcU_9;ESlrDQcM4fgXH*J4?QtA4?t0Ng?*rY3S({P>c5;{)8RbzEzMO
z?NoHqwzO$Xv+xtRUEX8vgYVT0btI*wKaMX|9qR{LE0@fU7)CXve$VoKy=y}omsS&X
zD&Tj#dgA4(AOVe%2E>>8)c@&8>M?`XC`NQpDB)|XimP>GsyD3c1(S00kL8An@kbn=
zk)3S1JW9zmU?o?pCI9zVk=}*s=l@Lc%1cZ!5NIchNJ%_o(uyX|uYod6PhQ%gK+5kc
z6f4L}yACwP*0+>UzyPY5M(vw&Z0HpB)b1@0R^Z_(X(@R_Az%&Y(~~hzPd9l@v1&@G
zUpi~tbOmgOtEx29wtFm3i_!(|00h%4v@Rup{phUmIm&(=VU@RM>HbiiKSkN;+fM>u
zdnNv<7cqv`VNX_>#L69pDfRBx7Zj|hrmtTgQaOL3#z8u8{XzH+C2bNLKrg4?k7hLu
zSjDRr{UFRppEDuUJRp2q4ge!<$SA@4wHQuK_vbQ5uSr!IT%;OO7FsV5U*;-?e~WKX
zKR}y9k2j>`3O&9euL@o9{cWzWwE*?$Hi>Q;`3OJ${^QjTkxE0ObVVhwP*l#PX!Lig
zFuyM$<@(F8M+3#9HCD~0S_%{NqL`;Ew10&jt*WX>Q?4SS{(VD5Uk~EvXG%>IpBSP;
z3Sf#nrI4r<Sx*_{-WtmE>c9JbmLU2&e7Ug{jB}qt6AaOppTA81^>p-WP}3vzJF=4X
zPc4e&LPfAy^*0^;l89btl<M?CWj*x@&a_dFLHc{u*9Ojf4pr0TpMpLOvGQVqrVuGb
z84EL24NytSen0l}<7I_`TFgXlj;Ku);c!sER?z>zBJ1KE2;2rjkoMCkQ(A!ynUY)D
z4R8Klt9q$spDqYbwuMfoyCZD}QSj0OpZ5@IocCw}JMyU7Woo)+{XgopfKOwK8zC@7
z9caT*Yq|w$ApO6X?d$rnH59qx?)kZd5XcGwUFmd%bu1KAg;Z(24rC_$9m4#`+_e5U
zba_NnPvbdAP`yUw3MOk(FtE$2;!j-@Qt#3~B&i=q%4eTV07g;8`8sIpP3{8q9O&4d
z%)kAv|G4~9|53Il^Noao>9oY1qe9Pod5#};x{=VX@V;fW@OPUx(zIY>fC5jA*gwFL
zq0*F3nP~@3H!t|8LXDJqe;vMX74eq9W1M+t!yotOu8Jnxa#Bq9rPNu&N71jK`q@EA
z2SlY|59&9ET38X~!81X8jx71E-`Bk<Eoy`g1p6fL5g?c2s5r$oLS9rozM>*`{3?rz
z*?C{y>3aU1LR@wIecLvrnVi1WZoLQHzO=3~?H9qXB0lGx`+d5*v}{)@`bil>VD9O0
zt@brq79HBYY-y6_-j5}FB5y(OWsj07<Wnh^CRNI&Gp2i}JY1ocu^m*O*QCs?AIx-_
z^zge7)>!Ql`iJaL^5A_?N&A{q$qo5_f-i46Q5-MMm(d~Q38aRN(1J9c*3TTZ-aYa@
zoBkK{2G%J-^<zb~!1}#`by26de?zTr@F?2atXY+a1k_R@l7;^EphB2jn3iQdu1->D
z)t_c|WpNTn)1(L$#cJ-QqEZ~XSk#HENhvPXd*EX-slMh|$>kEIi9L`1&gcE8P^Tpg
zX%BS0D~JlJkk1sxO~TZb8w(=3ex8u?`o2kJc2lao9Wb6N6s5_c&aiw+39e3~Ns=%-
z6d$hVeu5H;e&yLvU~(uz*Ed171-C{CwXXW+YqhqdAD28QIm`t~S8M$>$x>$lU4hvy
z$P*S8MM{+UzZ(@wnie+3^7z*r)oi0PL4b5(dXx=6Q$_A?y39hXF6ngsb^3=&6>jUw
zI=!e_Kv`l6T{xLeRv8Qmepeaj=aKQ>@q&Io@>VI^=1+()P*FH2_^C=Ui(7CC>QYwL
zb2TizC!yy~U;kr#Ees7y!1sxN{-Q!H{b>Bx02N>TKT~M+ANF&jo8D$vHMY@Eny{Ff
z6RTBS2Ii=sl8dP)R(1QKO8+Ge3DY|fphd1v%c{v(d3RZ}eyX}u_bIL_AJ^YL9y_SW
zR?i=JzM`!Tdv|_Q6<9S@|GFlu@X)(WxcZiKU$qdXkqat{t6j(DdX?n5go4yl(xO&B
zKN1(8Qd0~@x#?iJ;pdlCegSeJjSSU_*Gd*u6y>wlQv=%czh(o{mJk=pg8K_tA@>X6
zjI?8w;`4naRvj_{p!)r5F(6)tJFbFlBY-%N`!$H(=_YmyP&t+Ro-bYcrw(xrsn;*+
zL<{ya#&j{BQcz3>AJYepUV)+#kYKX=U#Kjd)38r>gQ?{&j-=G&j@JX3QgwpdiD9J1
zw`q~9lCTHllaAMPV${5*cR$Cfaq~=_=BbWJRSE-APz|1PiPiC3)p{nSI79tfEotV|
zX#ML||29Jm?=SrTA@QN7ww2dYy0z_0F%5T$t>Mlb03xSG*|!*(QnGvj!epbg|M90_
zRmb=okGemP9REp&M3VKl?p@&Zpi-F@ePJl3R5ri|IerXr{wLd%GL}8nt<R2kjYjnW
zRh9HsNnY+(h1#pgn$+%6UDJ=ROB9DfZx4kl(|E4~@dxCpB3M<DwI+-(nW|P<(T}<>
zK|G(VW^o6Ha+L~c6Lgx#fQ91gc&t#ktRu~j|ND9V-@(MfB%uZmJ!5fYaj5z(uEPK6
z%EzBbm8zToIj*d&zrWobn}--hz{@JRwu^#--cqf|I#lX%yrN`bNF{(%8$h^rn_^@E
z9@ILy!s^Rc03wQ&MI|i?<w8oY71q^e_ebADwKn$jD#SGfaIH=4x6=S{K+&o8lIDAb
zS+M$P!av6VAI$oj20`dL;2=LPzi&}oe~0H^-SGVT`8qtCm-pN7a>dM~<zIh_0mOKw
zY({C-{r#VbKPa!K=&xJp5U4_xP)t&PAJvZ!oAR@h=%Qs(MfF+NNv9o)kH3pmkM8fE
z4OqJ+?(fxhz2bZZ0OD5&#8J|cCjh#*sJ!L%zg>(Yr}lNZ{rk;_;P|4F+o?)ssW_dI
z7T)VsqXf~IsyH=@P*q@(xJ#V$zK7coS9O(`uP=AnmuS`^=^#$-`!VcW&@Q$#^ipLv
z?Hbqyk;l_2WK+98JyTG7SA4c#=^|Kl9k--7URB)wgemlUFV?AGb^QLLGo`a7rCyCX
zMEv{XKCPex;p#F4)RDuCKgg;V2bFA6AM#M0<zN?q52I9;QeUus>|nhN|Ix#L(vSF`
z|6Kp``k(Nh5Ow-B{dm#oPcMV`&nl{R)j{;!4wT^32pU%`I@k%Sb5lRi2tjsl$IJTp
zG3ve4<-JD1pS~r<x}RASd147=3%kZ##(^opYS;>wd-vx%CeYb9aP+58S>;cs+taQ0
z?EJ1i0&zVB;QL2aQ?fA)MF1UBDOaW|O2Fx_Je0`l=c*z``$$<mKUEk7_s}EYN;Ila
z%@QO+m#?Y{&X3D}*}oF0HrVJILp1=S`DJCwUKRQ(YP?kskrLEfcbru{FaT^(0)Top
z1iww-AHPhg)a+2+0QB(N-)Yb^D!ZnmBuIn=gvsS7E#h8dh*|?!xt!8T5BBcaZ(@9-
z^^ZD*hptmG_6wA5c6l_A_$FPqA3=5fu;HEEwb?u2i6TE@&h}TvUEGqJcr}u!D<CQA
zA0?cbQ_i)gEYK3kroYRhL~AWwqK>Md2+olfaddE5dAJS~`}0<AOTXq#J~Ec#$d$y?
zY^wIWKY>%VRMN8aRJiQE^fS<pOaB|95AUc@5zqfMqtjJ(a_L5Uf0QH`K*o1H_(&yE
zR-B;R#5E!P^OTKzC-#^BqwtH`mvbufD8K7R162)~F8z*8PpLw83i^1|ov47!y0b-u
zAAcO1&LnEM`lZ()ZB;JlrV4=^bbG1gTg8bbg=AZosdUZ_X-(r=+wF8}LSFQs{mFEi
zeDBKC6#S1+9)2+<Qio8pnDej6HD}1?1Ba=pY*5qK@b_iw%Ci%hC;rLlrHXak>+k=<
z6#dwrR5Hb+>0iSXKa}Niu4oA7o_`JJ#yiS(?u{nt*23eh(;nR2mn~^1mQ-fd;VMqN
zQFL{_<?*?^o^L;Ncgfvp3%9&<o2HD9bCaB0-~Ht8U4c2T={s)FtGmzj#3K@Eo37am
zt7X|KZB%;sKMrxyUtSW>+n}$p$VZwOiqo;uyZtmsJ&Y>Zs^onnz-+%ly1--Hvx@is
zf}(%kubN1T3O292>{E&pP|&a3<+EX3F@@$|4#WP_L4PLGfmRDM;(Q0}VfCY|XA4$5
z`R27R$~^IB?^Py#L4BBlRX7mvS73#eCK7xT*4xsccx!X}5)YYtFY8(i)6<m&>)fxg
zmb!!Uo091(A4uF3RytJn6}xh&g&{G6s@NDM-qlfoj{VpIn(nOs!KZ%}8RYX7Vav<d
zUr{Rmv~5T{F~R+A;xCB-jE0ITf@`mfs3pu7?C!_UShpMq?mUW4nWxY0nJGVoT(&Q$
zKfbtJDY}yKa{TEjO2ln7aeVg$b+&61Q*lb{@hKpw28s&m{{>CO7bR5^|DgE#vj+Dv
zy;sAOy!1aVsg69wv<pItw1qC#)fL|#tv@L)b)56LI`XA`P=c~B&t0Gv<Fqo1ROF!b
z04Dd)bTira-@(2QvkIkrMA`Mu@{fJ}z`as1M4#iUD$pFYjO33MKP^7&l@o}e0G}MA
z*HwUG>{pUF1-U$be0>}O{h&dhXUa|NIa%MGng6lyhl?NPLm2ntZQo*;7IQU3QK+pa
z-Ba<pp-^-5R_VqVfV+M2HFeDHQ)Y1ra!c3)N+sdNkvNzAlo%WltQ{Tzd5ifSIGk9T
zlB;gzlp%4a3QdA~1yX+rW{FtcxTuvCC50-oLUGGg-&b{?L};v0mu^`LTwP31Vq%FG
zg7&dauEae(0G_$1yXE|!_RHEZrJi@|3aKQTl50b%S*;@K)!1j;H>Iw&sWgeZON3Y4
zFhh6hVwTHFb|a&5M*6*tG-1^lk{Z-C{Up+Lnl|kun=J7^df6sa91PgqdlE9FMXW;H
z&rV!wOZo-(bOU{Y|4D}l*S>J>>LR04ZrlN`QH9#VgGi@)z!gZ#T9D0x`E{UKFWcb;
z5Fw|jVTuFPw6dq7s=TJC0FeuY5f;7m;#3v&6=}0&UY4jig8Gr}fTt%ncqobzDHX&&
zH``JU&+0D4*40wr+CQBG<s<Y64PGZj)kBz6UR8do0q(lfKBD>LdIi}BuvJ(k0CS7%
z{ih|xtC~@MktZdPeSH-u@%~@KEroyAYj+*&@YWr$KPN-VDOT0vnd&wuHB@17S@8+d
zADmF7i<v3})d{S*)Rr^{EB>74vwF-$o$Y}VtdgU@V-wi3{2%{_m-gPeDwQ>h+n}mK
znr@?4eacIF9IHw1DrhQQI$E+=Q+=^^wWe89_Yh_}R_0$ytgkOqdhLE%2immEVQX64
zRmNs#zbepNvhsNK^9W@E>Pj_@JE0P5TkMlPnWcHs{wdbf-G6-Osn&4j3TupKjMQaQ
z&#LgtSyohb#frK~>EjqB`6Y<qSfS`IxhFHMMY5vCWTgfa0~K!$qEF$k-c(ygRBQ*x
zB3Ln_2VouS$3?%U2!IUg`^B9aL5iAYSWkxlTb9IkSV#U(4y-U)QOEjC6}I%W^l`Ib
z8Gm-aZrK8`?ND?vGQrwjCX44Iz(ef%Uqc_JoMklm>f)Rm*u!eOZWGvMD_oGlp|?#l
z7PXblwDqv7tM%x*-rMIct6FfuVNgp&;VMp~11TRq`1+?E>UZ2z8Pwu6&}xEO0s2LX
zI!7=cx@5P!Uic0u3Yb=pwp4YnUzdEUCbgB=$60q}*sHauN3`qrRyMo6c1<g*F`P0A
zDkrFhIO}noVaY$A)vn)Af#vI3ytLWuH$iP!RvU`y#a|KG8^s`n4aBpxyw<vcN(oFl
zUrjq;*{S`h3idg>pFOD7<N`(u)D2y=VdeL8{vEh8d}2s_fmGjdZgzl9RpB@qC-rwm
z>W`FK?v%?J(BipreyU#8aE_^q-Co%7e{fn7;@+3GK)UpWrRL<I7NloLmR;NAGBf&R
zM_+f$=~t3+h049FJ(%AFa~ucAkJX#ZLkug}^0AT7m2t_kriIb>c%!;YWj!g$B6iRp
zH`9TjvO80QhONtAK4eSCu<GKe>ay2g9qRY@rJvYV&3<q7&hWn34&MvKHIQc*s5o?p
z_Y1?pE*pC<gCKW7>_dI5wO||v8(6#z{kT_1#?`UHu<qmYf<d01PQLf;3+>D!79q>7
zC@A96O{%+f%oiR0jib}jUTS-O2j5HYGpI$h?dh=DOT*$`Vpus_{d`q7O+pw}b+@cy
zl;1ht2bbF5?ZYM^h4TvAw#ME=HJ9ps#Tn}d*H1D*TTOWPR~#*tRkdZl6~?B<J;;w4
z<aaJ=Ux)WBWaM&>=%$9({O7%>Ky1#mZD_@CEk*{F-&|@kEZkYi43Zq;zc0eGr)zs<
zPoJwhzc}{&?fo6AR1?yuoQ)lA*m1|cMn-Oq52)^M*n})=S{PcvGvqa^+8u$M8t`$?
zsQ%Gz@s{d3gX%b2C``nb3FgNpO#FK3d;7=wKg09?#OFB=v<DsdGKASOEWcOZV?fJ&
z>c&g;a5?F8&FXtSw<;}n>{%AxmJlb~sRdgspQ&`_YCsVUC{Nn8{gy(|b`it1u$s6`
z9LLFZ;uNtOXiY09@%4irJJ)|v6?jV|oTDwenncD0x3P*9_y5!UQu+R`)L%E`7ni+S
zu$uB-#Y<JgnUGq++X9o8VbM8!u+&njgz#l84wekLHdx0B{Usl=2w!&K+4yl@C*)4i
zXj#@E3#4OQRY94THQE@QwYNMhEGcn0x~oicS%|Beu~h}LX;H8ifz<^{s2?TN&&#j;
z;nVYPUl!Y?Bf@CO!c<~+0JjjeXX~nX{hbb$vX}a@erCY<`u$6vl_3tAM~^Y{OJQaP
z+A&?`e8<W#jbiRCZr%)Uf8O41H*5fx_P~{Gf)KVg!xUo49dV0Ija!D4kB#Ts0MS5o
zVexbYY7X?dETsR6CG@?-;j(Q39K9L!|B3%sO15{Qbhx*y6IzmzrgD%|RZ`}}A2=4|
z*`FQen2T!xVwW^MTw7ekg_OX?*lfyCuWN2;UCvv<6l51gj5j%IUG2lnPuLABdlf&_
zx|DarM3w?WL^FjT3zBcH#{wu{1-KGm7PcSkQYXBoeQ?d!*X$=KHZ|EM`gkeFA!NaG
zO8FmAU8GgZOzs`ZyJgI9(~tT1ub1tnZ~A(>{=QzH<<(!W*Qe_y%8}tB!@g*k!boP9
z`^F1K(XBA<KxFULhD~7FpjVx4g4=EQeT~C#w36CxVBFGRV5bN&%(DoC(xbfjrh~!f
zhN(es{-uO<I!Eu|kX-fEov**9IFDd^voku*QLDwn9)sjm)NX)$xfLzAD{cVquRnu2
z)-~Z}-=kXS(p|((RKl9suA|B!0r{A}|MzEcn#N`QSM<L$9OH*uieawdX5;(jl1@9p
z97)`%eW63oaV|QCDl@X|<>+>r(7yS;q-4ZL>hED*(>Z%lkT?v!59!tCE2{DJEj#^p
zpZ)H`Ym1}gbULT*c6D8F*I!pX8*=siuyeLnLQJyw@^A~p{J5t($`x)VBg{$PK(CQ*
zu4Q_hf_#rAMS)MZ!8}<vyoZ6Lttc8KH@*fl6IsRSOPtRO$}cbkKgaSK678?g^f%e*
z;tL!0Go7Q`Z`EI)%n&+d2KCgx4n_^P>-F{a1Z(!|%w-tOO#YXsU{d0vBEaAHpyuF%
ze%>upmM6?5UXoaJK00Ptd3;r>vR<Yf*MQB!YN(fnD#EY)AwTXvWGVChn+BL)eNlrr
zd=~Y#N*Pi*2&o0BbAeyEmSv={b95`&dvTu9=6X`^edll}0-FDGiF5^Wyk}Im3DQHT
z0_<NBsI#q~`txJwM2z^}+z|f#m13bM+4l!`;U;Zn3fm2@I7=nTmJZGR-F5ufKDA%$
zeYxE3ynQ2nxn75tJV^KQa<q<H{8?~&&=(<VuXJapXZmnIepW6P5nt059OVeFgGnbA
zDfh``R9$&ZJIa4R@p7>ADwfggerkI2Hg0J;6>;#<zrUv|=H>V8cDttEyx+Rcd*8+Q
zespGpZqoH+Z=PXsMCPFSOUrZO#x_WpW>G`;`asLMxTPnB7nuTIiMgqce{e#ob{6wK
zhPbeRJj!@|9bSgp)PJT{>ES|_`IuWu-<G^_Et<CiW6=e^=SdX{i32?yXnv1r!$*Ep
z3P&uLo}qtIoM&JAe_n-EzhA|#l*TO*B*i1Lg&>7u8{cxo@clKPh|ir5`TX|FsJH$}
z+(J|;xF}pOrgJx?Onkc<@nkli+trSf>)6$#51BH(bVZY32fqQ2j>Ysj`TH_IcC)k8
z(`&@c%Tq|tUEo8nJ9&ZF$*$Kbf;__JCqtXeBW((_hEYdYqw`7@f;ak`c^0DSWQZbu
z+0bMwu`WtdV-6!O^;jz(lDH(Un1X#EbCGwUYx?ei#<aV@&#>K#Tfg!a{oF{fT;KO-
z>f_}8n+^s8ICVA-!-!SY$}Jm|CWtWiWbQ$3ZO@%cn^w`zgkg<<RsTPmYHT~PSlJV@
zT$w=zi$Fqy<;)^$&yTv#b&A6;J=Q74HG6v-`hLHbvO4PJ19+dNu5tPj=QBh3Mdhil
zr4~Rdx<nE*GVn~dU+jl&u2n=lG$OC*tzSph1OPg~P+SO>f6TD?G4~fK{r~TV@HUt?
zlV3PN;j1?k#D=_l4GR0KRid~xg$lMOuWD1g9AR0ckP9WtdH9@vKOz1kkm29-?7BSf
z=Ugs3&cARTRggOX*uxR{ZVK}{eIBlpr614sD$*}EtL^mTDyQEa?WVA|o6udRDMV7D
z_{o=yF$H+(i~UX?O^9^AjT?wryR>{sSzK<o*V2h4Id9AD9E|L?5FP-WW0ciAmz#A+
z)K#^(Ip}pYMx(x&-U$yHw{w9<r8_V`4q(RpAL)gAa&{8N-;SKuYU5^4&TolJQ;-TZ
zKQ$J(^g&gQzap2JR0`L@_!tjzwI#=Xmq@X50E;BW0xXzGS*rA_O6+)Y?-r9P3R!6Q
z7UPTEoHg6pm(rKXQrJv7oglV0O2vUhu^mU*kwYd2x$(X8rTq0dQ}un?j6Uli<Xsu1
zlhLW`_37w~ufk_u(Zt|pitQmXTI@<)>Y#52gY%i11e+sMn&V~I^uj$orO@jr=hUtU
zU8O6AU!-$=3WrGveZ7$PC_gq2P9|X;O3MCz>3;n_L@74{>}`!Hc6>ae5<_rmYz^&H
zvGczCn%HeGqgm@2D)IE;$4Fc8yy;%Njm555-UTX2B6hoe!V^rQjNShI_uuy{(;06m
zT-Y#~-;{4#)tA=R@+)cSWl>X@wkS*ID4V3Ew|fN((2t9Hd3hNd?`XgenMeG(--egl
SR<m<j|NjGGId~&?aR>m?V3JD!

literal 113194
zcmV(vK<d9AiwFq8wbe=h17&zIE^}dR0JQy0bL~2^EsC}2e^KGH=8`n^K>{E?NUCng
zig)#1<5q;ja=RMt*iHNYH;j=0$prWTaMpQq?<HxieNOGM6F?#}kvYcy_Vxeh^}l;F
z=%I7%Agmj#XtZnoU;pp_U)OYP+qBI~^BI5o{{8;?`uX|&#ozR!|NMUce)&K6`TFvI
z^3_b!Wc;sjiXS_UlNpu&gApI+e`@~M{~Z25{@*A5FaO)_zrK@Z{r4N$$o_vV5a{2D
zfBDbX&-b_g;vs$0pZP!Zf9ikNgn#jZfPSRkjds$b8%Gf#Xw`iEfBpBi`PcuVZ~xE#
zJAM1#Ti|Q{_YS`!klnwv8YF=<!VI#tja!T)@pAwvv}`)pHzkbzKcktBKf?gaDHSbc
z159s<0?w`z|CVYnL#MjIwu3eOinD7u&fepMIgbJU{CxlZh85j|h^6(v?q+;5DNetA
z3OGgQ{{26{W!v?gx2kFf58Qg|s<!F81tVU;@Vc%Moob3`zrXkJdZ7J?E#R;R&5+r(
zYW>B~55lj%?3`Z-y&jTI&WD74@~}$hBtrD-G`&yZz`Wz9clR<WZ&~%nsPvHR{QK8a
z;{UblDIpAHEB!W~pW|ov7<|$A?J&}xY5h42Vm4FVDfz#dfA`J|V(6q8l#_!K9kY>J
zY~(K4C|^czEHk#=muO?mYNOqMJF}2${7INC1O%tH`ToWE-5?l~AI8zzOBSN@ss1g!
z3-O*#4#v5m*L=srVtU^37M{d3MVQf4cYLw)4}dtdX0)SGtxzgIp}L6Fc~3HQgAlzp
za6j})uAW42kLQ09UDp&*?ie(FoQFFM(pu+ib}~hA&-p9EUml%zdj6N}3`40`l+q&<
zqHRhL{XaeP%UJk+TyC%#WnZ=K&YRPCSEtNi6<>9`9$#VMmqk-V`LmC6z4XSXewr@d
zAJ+bxPLcIry#uiR|CIrCeA)fRb-F3zJiX}rM*?&Pw7fn4?N7U3a}U#-HLP?1DLcOE
zz2BRzDMIw_kOPVq^fv{tDdncO!I{gqTs-Z5TY6I(e~4nJ5P`BoBwNvl)_ai>%%5Gq
znC*bawr7j@2Aj0=M%63`l>O|x>8DEHZFaGR{VB~TllyHK!0NvJzt_xuF@PwVDSDeR
z`S%a=R~x=xb>BQ}!DZ~vhd(2|!ueg1num}UKL1@vvF)xc>+j)}+`Ye#0|Kh<k7~_x
zYi{N}T~@k8ynBY*)Bd(I%$rEdy@`)!oNUNaKgX8(nZvtr;@|k{j^q7Ly8l(&{~}mR
zMsu~nTwRDAdVfr<|GO35O|nbGN2J;=&e_|~xaNE{$#Tq8qr(M9=hDb!|9AKI%$?X8
z=Zx6i*rE5~gEl;LP=dQX?Y)U>iYODe=htzf7<n+GzvB=ui?=uXHeL-K=48pos!I0W
zPj<|lhQqWTrf$v=;tvv-^Rf^0gYEopy3yD?S2~Xzmb(rv$6p|H<uP;i+&TNu$A>}9
zDZ+36`CV|14Cbh#bw`i*&!1`k7U=>`wR4n3_wUXhB8{*g!C1#~I)9>9oIZRM{vs*B
z{Q6yS@^~S9A<sB=!{$m>HJ6C;fui3bgX_H;N6f`6>Vt<l1eZR25k9@!2l+!F@n`-|
znsHR#ba(&jt?kC~_C|kj^LMj+e!9K92$QY64*>Xa_y3yoxU-sr^uew1P=#gbyiS%L
zX{Yx~hn6IeSo_8X6WVSz=NJzE$Eba((7IM;K(#4HIbCc0iMPV%Gl+5pgPE+n57QhR
zQwgPapX-wMnLo!#^2ukm@~SHycrs`2&FS5f&*NRs;xXNGoPwg;GR)uIJJHgyAL!2U
zKU|J(y*Z`#^TMg2_e*b;V;fX+*W}&4Xl+x3c~~BC_i!IMe>?V9T~jcq89E#zf%}{L
zCd2AD(%)4$^M@$E0?MDw?LE$JN{`Re;6|ZG>9UoV$w~52_Vy&Hf9oDMy!ZIVhvZJS
z>(U#3k|dU1H8aYuJzJs}^}AX6;5nx?e0&{-{Tn_yQDJt0+75}glY{KBSGjXC{4n)!
zPNq3q^Z=AhIxGh&@uj1}Kf7->>G@%bp@>hrTc6hUD2?$;-t_qvMNPJZ@)u{`p)er)
zx7TshYOaR1?MpW)JKVFpyDwMpMC)%QF=&5tb{?#4c}i<|>fN_78Y$t-&eAZ4bU5>I
zBaC@&Q}m%e8Zq#p5=Y-hcZif$pteizc~o4^G2$P1i`xBTKPf$FWykSv9yS%jJHJCs
zH(2aUALk3yLymy<CRnrv=-CI6iOnrq4xGY8?@fuii;wIP#2p~@|JlMj{Tier|9FQq
z8vo7z<qW=Ad@dKUJW_ujdH3W`yoPDn#biUotQYTObyJ3U=<t_4OP|a23<^fQ$M+}6
zYVX0^2I%7NQMa!E+ECiT*4kUbFL$`~MQ{mM0}{CV&flP&k^|*)mO}c7(|aeoEPV6U
z`I!6*7|*QfvtPeKZj+#}sS}f#+7Lmt5x8h`oHBLy{n1%ci0z$s``Y_6?8}JL%!#kx
zQK*%BfZsskzajkesQm3jH6uz!{gU6G-`?Kj?Iv$;Z};~3c9-7s81wutr5vVKuPYww
z^*jH5vHbhJk&O2?R3jiTumm&FPi`WQ;OY7H+-fa-EIDbdUUXX7@TS|X5X?ca+UC5v
z^GA1(J`V1M_l%MqVV8u!e#cqYoUGs%4Ca5oW<STDtEDL<lNuMIkAZ!tKlJSpP7^})
z3pm$LK<tpI_pWt5|Go4=MAx>Y*9U*e=yvE8i_~4u98It3j`;?TGmKwp95imxh99#)
z$kYTvM*3h~sX;j(tE)ai>YdGOPeWl)1=42ZklZw`_ckC7<0SJFT)??(PpSv{{b_kh
z;aKHJ`gB4Fq&@{}vgjgt8D0`~dZ50vJ!#ny1WDoDp+}lzya>+Nx*OWAMUX%H6<(1b
z_c`YN{+#V851$}V=op;#>6#T=tkI<Y(v}rik1CgrgUw%b&acAwj%WS<z4nXV4py$I
zxoCO(Yh9@Ap91_|4-!8@!}2HN^fB!`Kvbn@64fK0m-oH(4%BjU>NEzTu(F8rdlUci
z54}ZP+@ZX`0$=^LjU?g#sEvX81)zJb{yF@E%k?A5&U<QIFrANtJIPkwaZewfo=PCM
zb;D`%+u?q>im;@-T|e~FdgrCNK{-%7?%HcWX-|fYIjnmCPf?dwp1%S3^WGu#@!quc
zX^!24>w}ax1aUQRT~<7D>#?6dm&@DX4ambZs3%yDn8hT5cdsb_8{wnCGSJ_hSD?7^
zH~KoZn<G`McXj{vIl7^c1kWm_TWcP831<gNQ5C6-^6*tJR6Yj#x^<-GJdLV{h&~5n
z4Nl;)?B23=4%KysWj9R`qrX9wvdVt?)zRID)}NpMW0*dN|Dpd0kljE3xm|WqE*?^V
zrv~9ONum9B*{!iMfUX@8)Y?)5(~YQdpxW^)=<UVnunWv`$R20_+n}@zWymPPPv2;T
z`}v~RG=%fv32@CY5R3fj`g`9B@%-HSjriO?-_4-zH)K5nG9Qy=6n@W1>gyVmzi~A8
zjU6vu`NR>IdD%!`O=$iZmo^~$#{Vqw2XCJg8a;3_C=B)yrt@dSB?maUyakml%P4o#
zZ4(2_;2sBa*@e>!m8QlPsjLs54uSTP>X;XRj_lwr&IX0`!Ca0q!V#t_n>7wQkK^Qb
zx6=f?46{%Ynkd%1dUwxt0qfHrAFVsXJ)%yxC&Kp{n4gEAWy4;DDLNa9mdMO~9Nd|%
zV?b>dUE*!B1arrpbLYb_TXSR2^EylHYVz<<KG5ikWS*N}*lqKMu8}gvug3c0Y{&VE
zb}YDDF30S`u6RyycunbizH1$7Ry?p2JfX1Rk)jOqCt39=eqrCo&p}^LDDZshy*1uW
z2gO^Ogy>j~w!$o+EEEBQiSu%0xl|p?l|HgS`M38TighBOjJiNoJ~77?RDtviYFK)G
zHLHjlw4--(gN6jhO)4|D<RhR;n0S1|-Zb&_upT0}Emu}F2Rd}Wcw_ed+V0zKEmjJl
zR%>cHNA#3Z0QrkrNBMK9QEc6zy-$9;bK-#-huI6jw3lIy%NKhFv*12sjmw|+Ru8h%
zaP$ZER6dbFAY27>O~u)Rk2wWe4nw5-cjx@l`(28M-Y&@`glUXzR2w#^q10lF^DD)P
zuixnF={z_+-dmrLnZJ{1$2b266p8qZXz+QFKfizSkVK|k9=kmWlvYZ5{*gZYb?E<H
z&CBklzJqy1Q!*{w6g5lC<}}Q#o!K|8E>}JXsCQGE8m&!hIZo$ugox9JO>f@gHf5M!
zP5<$%<@?R2s^+Oz>Z$Gfr&jgqW=yMlt+mPDrfXsngUcQd=o)S{aZ>CW#TzS1OEoyR
z!H9RC^v&;p&rdVvPt%oMqG;}qeO=12>)zuuY6+hFx(h=%zgL`JdpK|U=8RFdY1C#?
z>h`AS!N)MSzh99R2Nl`4@YFD6JXW8nJH>!XcHjHx?+vx&vOPoIJKn>YANMo$%%D}>
ztO}3lRv{=s<%^d(Wm4)?)HqXv^h1xK7l}RwKfj0H;ve{Zx@i!u4DXRu`t8~Kvm{wK
z${U##^#@ZWsQ@#%?h}7cuJz`>y<Z-GSuBcn6z@|7Ye7qJ27B5noWYJ5$k8VJ=;(Lh
zXXAHoX3s)XLhPOJIkL5(kCPYUrQqeGo}&FSNLJvEPejp>?1AHYw{=M6MUO<e!{Y%v
zMl6?C66akvMUdbAx<~h!Pu=zB=P)PoNvf15OBI`wns@ub2Xv?$4k)^<j$X1Pnc}A|
z#82lqe~^dph-7W*S<SqEbygG(1<KprFZs}8H7LtIwG07N874hx`HNGHF0`4>Sv|*^
z8ML-RQV+p>c1rTXF$ap**ias+0J`~D_7zi7xmzmdAt8M7calF$+Th6u(;iDv&O`OP
zJOohqO<Mz$8p#{9?m5hazA&dm@d&S%n5*Tv1H_ggjs-gU={x_#PPw0NYCc{5q<OnP
z|9SrV_V@n!I<;EAK0mjvy$?5Kb?dL{{&u6dugywzU5oY@;oKm7avXEvh{WW}+%*-(
zoBfc>tLzj0{UbF?>^JY7wQzM*4H1axWjd53cz1sAm!hdy<AY>#KI_BOZ;D`5ZJUUU
z@gj(aR?CBV#h(RQG?dGxVke$HU`-<xN*;R>L}|$<o7xV}u%}C<*NVIHULI-k`B@30
zeI|MzpJR+`TJ-FLkNafq^8P0;(h8M61?xjcq`d9XyyYVh$4fVgdm<U<=1Ne%zjr)Q
z`1JHu8u@@H!kCw#iaT23`c^ZG@{=S&zwRkYdWFHtv*8iUuMFcdAO3P5UvBSNpLQQB
zOfp*39#(EJ;iU%Iw?3KX*+zt_m&1fnucAR|m+}y9*{f|BWu9!mZCNN6zcp&Ln&$gN
z?-OJ+7ccj^V&@(&Lnu2{A@u=>@bQ~JJ8w>eVUp}rWs6_V;xWuF5YTo0@V9WLYF++5
zc@LCx*m(}~bk@qmSR|h$DeuxaHR)r@!5T9t%Mv4x)6OqgmI_UM6;5~{ON-7ternPb
zXIH)@<7x6Cl5vC63?vIXjSJCS0U$1e=C5;3k59w+HjUJ^_qlx82Px}~iMfmp(|~-@
zd3cOw8z-u8=Kg(N@ZeBe+^Y|z?KFFgpi?RU<w&^`6)t+G0?w1+V>!i(;C|mn8WVT5
z<DHg^-dod0F5d6*${j}<)?QR19kyGNe{!TEMcSu5iEA8(ZUu#TC&bCTSl_c^h!U3E
zD1-SGV15JU&kPf93QNB6{CZQcq`4XIL!Y1D<!Bs+eCGg8wrMRiy8O-_LbU@1bG(HI
zc*+&4=GnWyQE`c$8T9KrOc5Pg^XNpGa%1@pdC9u`eB251$yB*1;`9z`9v7@fT0vjp
zuTkxx3Y2a%9vE0HoFfez)l~uYxvv1l#XGX#8-GVUcu1^CIk!A3%%m(g2J6aOWLi1L
z>5r8Dm`1DClsF&I+L*M$O@+)zQk37x-bXV$eAaaMLn=3K(=yPn?}cb3yui=d+xrZY
z(q|>_ezWBj(Q+U?`zya+ou$*ocL59gG%BL9Zz=#clX;P5t}ca$di{ire0A<^eXq0L
zV>y8t=k*2%KgXa?IeG}MvFp+BJU$>(!aQj{Tv9=}gW2KEUk`&DRLfwR4#|9H|5yKi
zbb_#IMehxJPX9LyZq0B{`xVm-yLi^!^4P|);&ss*)h|6dANVDJyQ`gdtn4Y_qj>6v
z+w?2jtGLd7zM9)zPZ~8)f8lrj5PM5LJ0A#7AJ2YUh&O*5JbxJN_9Lr!8~BZpf{rhE
zh^$%^0eqCh&zS#DCiegf8-m?;lus9|yB@OL58j-{o>$E3ZHycM^;{3qr~SwU62KPs
zY}Wy>3tK`)&rm?S_S@0?+ZY=T?(|0=0!4E(z8_q+CtG5VHfQk&mi36CNcDzp+e3iZ
z2lU5&_o_+bNTE6GPW_gf`z(XHKXR0Agp5gGy%wxmsRY0)@J3t$MVLR$ed?ySQM*vq
z8|cNpfO9t}XtSUIl}L&7-)~#p^616-E!)k}i)lmjQUv;Q{5U>_9D6i6U;JG!n;@mh
zO&`5lk5*&EG)KC`grE;|bx61EL}OAg@0Xz9;x<iLek{kiaCCX^`i9h{2I#~_O!v(@
z=0JDjtbFkW-kL#~WQluky%4Nf$GB9F%*CIk5=`$~@ApGjHN`r{<$PfFQ@xL0j1LHS
zhGlvE2SZ9eZrzDe@MD6Lqk1=U$nxg@!sj+2j^7OgP9E<=(uzg{L5<de-&hGyOx1|~
z5rbyIAc92wh(T!@K>7IE$E{lNH!{m>zP$>y=_1ZAe(Lq?n*3K04!!@pUT?7u*_HNp
znO}8{44XYUCO|Q@AWRidZoTz1*^vfeC#9w#ul<yP?l5L8hu*Oo71*=hh%u`KlP*!m
zUY&M7S0_wQC>K|}9Zy`)ZbX!e!1#3nmoEL`<Kx4a{TOfuZ#uBl?eLz=GnD?^`zS^6
z^^bFZDl{9l^txNJ=|Sl)(jbi9Z<|dcam53^1HX|#qlrIKy=E1E$7@wKH1HKM9!~<$
z=Y7|fRWm>?@`(7b#3nF%?w!H}LmB0KmQD)**B{0)0c8BOB+*ISEB-#}bn2WiO;?7Q
zpLzBnf(HDdySnP3prY{DX#DM|I(3Ls&U_~qa3-J76E}d0F5jJ1yey8MPmoCa)L*nm
zKryG^n#X`*z%!03y$;>@yva_>qd!}oFuMm6oMCUZ3vY}iOzwE2?z4!L42pQk5vQNi
z{N2oF9c~Gl{pp8S`d1O>Z}X4W`^$a(%s;=pJo}$tUhYpXFIhgCb`$RKnPGUgrx+Bh
z2R%>j9PfOyx(N?F1&lw$A+iq8;5+pdDhllEJ2ggKZY$d17+upP4it=|)KXf5lzJcT
z<I}v#X1n$eJnf<yNhBhj1^>ZnQGca!59-1cdI{y?L2V#9@bRbfrhG#bs>jyo(e(G*
z^(Qk*l)-G}f3>LhyCbC9P=HsDbmN%j^AEm`2Pr30Zp>xz!H3CCy$o{>Wu!g>9fpfl
z>8fjq`pW#F3e-XC@Rj7`-xS=pC`y-L9zR1yYT-wCS$KrkVa&Tz{)=EU1hQo@NssOm
zshTp_B0;5_HeDT6BQAy#CeeZ`kZ;o@B$;WBN+6dPDSX~JJU#i4$4u7dB!T6+>3oHP
zbXzE#DaEXr;0zOObvREymygpf4f1-H_(&?Ka^J#R)Ct0`NWEuijBWdp{~w&CaP0eV
zePTR=G3iUr_>P_wYSJv?JTT}8@MchE^ti5*9OVSV%%1#tM|SH&)EP!(Nl(v?`z9px
z>p<!>nvFd(a!9j(=DR@S(zPu;@n~kxWP?hInM=m}-4L*?xiU7xDD_TrR8^jwk(bUP
zp5zmPpyzsn6D`98<zn%3l`?9Fyp*lH<v{WH)as@PlR^@l>igU~ymLcc_x3xqpes_r
z&%Do;-ojwpIn2=SJS>w}2d_|<heLPhAsJGB)BAWj7Dh`z5s*@E*-8|}_v<dws<~hk
z#f5fM9y1*~Onk15iVkSdJ%Pyy2<RicZN<=eyMBm|fmp_0Ci(PVInLfDr{3+CMnFDd
zEuM9a;?bz+8@m+5(bDPAByk85zYF9z<^_r7@cuWRbpQ9?Nz1?b{?|u2FMs~z?HuvU
z_p#b2cM9TL{!+H^{4i;Sg~zEsS#qMa^+>znV|vknzHa_X>HK6xkUuyt;N}Ta4De~b
z-lxevejlXd0ZB4_MWYxcq+j1js=PLk)0fzAI8O4q-ho5;PN2)Iw>eDy;!OPfrrDtU
z&QVfJ<a*RGul(oLTlMDC`M4FP>B*d%wA<#Ofyfe5w{7bKXFeTFNZZ+<LeWspzl!$!
zYX0$l_PM`5<*eF;RCyt?gelP%H7LV^dp;`Eqi>U*!AdCCBX2^BnWh<Vt*5Sk`Ec&V
z{}*>xhnn_?R@bw{r(3!(cc+>nPP&+e(O#+Im5U^Mq>CG*I*{K9P><*uq~UFGh=*NQ
zVJ#QM!)@=6d@Ba<ciEOLKF^hWN>8=$BFueqIaJ6xJ6SdOW0g}5RZO%kZE^noXdNhW
z7?Wj+l-!ei3@LqLP-US_ln*mjk@###z?VP|vFRei)5l)JB2IsYYV)o!C^yiko!?vI
z(4)#jl~D==@;=iFo?R+EdDqk<!_z&pCu`U4MQDI8gWNDdtg-z#D*rp@f6CePD{7AM
z8ARet`M5-Du|cZB9u+joTw%_mqkgCU?f31WuW2gZFjlY2Zn$@9Loixl(8fBbEQJgd
zZE=B|e&k{QRiHaWtIRoZpyZrf=z}f+b$=%DEZ)Tsp6bPp57-}lIH3LC<$+%8$!A_b
z=SJiO#odV*&!=wAB1F<LV46Nuq+QsxXPF0jk5hdgC^hS$3%UX*a`$nFm50#!d_GN3
zn=dKqDwCKBmYnn_f!d>ATc|pdvd?xjYj2BIGxl+Hf_0B7p4rhvBlHQ1K))L|5;Z3c
zW(0E%EVbWhfUKYcj;G$}@DSEpZqY8imGJgmgEJQIkS$pi6J`rm!Dl5L{@7lomYd7>
z-aYzLk7g%*1l0eJhD)ggbkB><q=Hk}wf45ZNsoDp!;6TI@38^=jjt37ND`a~n*IN7
zok6!ByTJ87-TTX1bca-ZJ@ve!+|K=YC7AO>I8G4_2~Q>+6y7;Yb(%Su$jd&?o`$Xa
zDxByN8kdsY&II%IHto6WgkEyI)OLw)O5!_1qfTX*yH$JInt~3#>8A~+WW^gpDC4;n
zw)A)%+n*e!3=Izt!eq9DiSy*M__(m)wq6&VU-1r^k_G+4IYfG}nJH@i`-<UVVAKxi
zA#Z(nFL)h?1EnCet;9LSvqRqoVy1aRVF#bdD6?TGsmhY{fbi4X)89#&FS^ZA<h3ex
zf->>|%*n<$Mf#5^H0BHcoAI(_<;-j?q+fcU&GkKNZj*g-!J9D9T!OiH`E3IxTCbHx
zpweC1o$HtjCL4To=P)tqQl!hMun(yMKhZJL?-?m)rMa8<!+<fD@h|#X&G@+=Eyw$4
zN8V*m<-_h-6p9~Iwy!oRUF?VLVh<AL^(c(={@}A$Ixgjf90{e762$bCZcK<DhQa>r
z@AvWE<wcw-COR6n_twODZ#O9Qin6akdE!0syaXLLpCVta*sqOPRJ|K|ne0z)P|T7+
zkD}6`>_TT2g`|@ir}px7v`6G=_i%!-<_(pxDxeOr{6}g|F&(F!70-!bs70fj5iR#N
zwWwOBuzFDY`TvZHwWk#Na09qtWapGqs?)h-Qu#=26b;rSB+iV6DxT{@h*yKR8EK`@
zKUe+fp0?Av3;5%r!An?I1+|%2R$K+NM%yV@3s_;YniJCb(B@XL0h%{A<6Q8D`U7Hs
zw_TuRy5k)!q8zGR;`^ZPRDkOD2|-y-5!(gkp)(b-It+^X7_Zku;`Dj+1ZgTOYOJ~6
z>4YM=fs-;mpputh##j`Oo<!Vim>4E*{S~A##0gS+;?K^|>jQ=G9a8&c!8N<+vDQ?x
zFXQ}bmg{}QJ+xwG;2$8*qQNz%J<cHWzSvVI39_bhR7M$Rfx0<#vLR~vj1iPG-KWe0
zr<%%$Mj5;Vn<FX?Ous~G!Pi|Z#Lee0k3Bub_*PC^(azeJ1@AW%SesEAW2cBuTvPKf
zJ<XSJ=0MT9Y{K{#2;RWsvk~ws8WiQbP-g@IX*cwSCrb%X3@eg2z*V4ZYoY-!B+HL<
zJ4oWjRYXX}<`%$TqL*h+rD)+xD8FZ~G_vXvDrCd`-rtArY3#=Tnx3*yG##%vl7ne6
zR_7xU)B7Fjc=1m6AS;=3c}w&aCqO|}^g|kxB>oXvZdI(07~5()JpGyEi`CuhDeq(_
zw@JvJ)XM6cxQcYE=Sbvi)rmZ_iEZ`sy6?3TJ<{B^5ysm^8m>(ts*oDvjL+H~=0D?e
z*D%jhj^mDOQH3zxj@HT=-I<NQk7_HYIQ>5T^j?04v*5(0l`T1m{?}qr!%N{Vmcq+0
z{dxK@<HvkCosO+nM@(}YfzpGr{;s#|l@2)7a+unngPOwp+J(9Q(Gm7wbJm&~`7dg8
z!3K4M!eE`{eHn(|Oj^*WV%eUHA2m}9LVc*$qjqnmp5msryaBT=SC*s$gGbLcK`Fuf
z5OZbS6I%hVganYk8OSgIrk)a8K6T!lZwQzT8Fs^26LG3ovUdklzXGY9kMh7$<y4B*
z&O;$RW-2?~lnuHL2cXs*!uU?TLbvs;oy2{5{;=7UswlQ#hT5&)n^=8|3yQ@gTmiRN
zcJ5;@z&!3{j@ffJ<yTXcz*4SI$9$7nCF&^iL^>DfA2O8RD@x)e`18`YG_iuUe#ISX
zKVBh(0I1^*@o084^*dv~Vc^>bdZ5$-)wJagONJ*Qn56D#lKRx3-+RyVYrL#c(>0rl
zPWjn;o~N~fT8Mlkm+VrTMx<!PD+4_i>7MBQ>#sMo-7h&7*>RcB4O@K2D>`a<`2Jab
z?ZW(NBKPr*8(lv8x%{cbsLmu$j(}2BVw<u($wSG5XXFE*mF>5w0Niqg+BOTHaPG@E
z!_4=!Ih#s#7Vq!T-{gq)Z}m?cC<^;?=XD7uJ3!+=tpp*Ewo{mGkkk96tA0_Ha6YR?
zOOE6(b(YDA^d!|wPss(4>1eVgxvEGvO%LAV<n(49sN$I+wq{_P`czpaH$7Kr?lH6`
z<q6X78R@QX8tE`SyAHO^XZ^EB2D{@$H=<GN_7waa^|5p4yTYDNZK#0G4`B=51Kk6Y
z!rU3P2sJUh&7&EM<Zjm8DOewt2@y$o*^kI_N2-WVgLt{#DY%uUTfLH1+?rh^!!FtK
z``d45{i(;bJQ6s@6Mpf_1@Vw8Nf12vdvPFP>veZ(9*0`GIg2pAkIAs(&(_#$6tuhQ
zLF?}1HCxfeA&Vl)I+nj{Phj&gO68bxq+#VnTaC0KhW`1W=VL459?o*^IWy8(AA{;%
z11&pAhCPLV{n2#^(BA`-{>H#*Rk-5!6LF@|$@h759zB(^rd>^1cB=SY-}Ye~th@)}
zMD?b_<!ldBwP$Pu&JLjZ|J69ZQ~~wBf4hc$Jd3Q!1H>1ej&P(P`}$9S@`uLlpjFjl
zC^YzN)ZpW+6w)Qv=uGpOqx{Xc>JCmsrx56&0dR~AoL-}OGO{a}y)L}t^8pSEFI-bH
z=3-CY^>9d$cCCVsEChVsGVLbeIC~ScEo)$IWqFqFhl2~Mign)Ny6^PhpeVv==To4N
zc@?JKp7BGHe0B;~l#eAxOFv$3xu3a6{eka=&zv3advSSb+`FhU&_f!Uxmt$0<K+e&
z9VD($)G^9mtCmkMalRa?J<%h03Fir4PIt6LtGJzWy&HCzco-!+mfRJP8l%h$agg4}
zaS)>!htA)x1A3iN(;f1gsiOUQPwU9;nG_>dG-6kW6rTZ=2BUxs!dAl=D$jP@S^V<g
zTCNzlqSQP%i5e|Mj9*`ki>x!#Q4#lnK8>y5>OM{+_1G3QID;pej-1qvcPd{>I|_Fz
zP$qB(#}w(@H#mP&oL^LkZl1^CuHJ6M+Bx_yc+oLFE44$I3bmMWnDYB6Q@GnJJ;!Pp
zx^M@l&CpxERg-rdM5sO?Et(dk=WOYBVyVlI;nY|F)uYxM7rg$0`ze>@Car49IQMvy
zlAlMnAt|V;lx7llvq3R1vZSQA?MZl`0;cQdI+;ebgsNmVyoc_Zvx0&e7e2i-46?(;
z&oVv5DQV#Ki6O59^7rRN9r$y!eLrr`R29!luEw@_eZ?7R$m`GYz~?}}8Z!dU>!)>u
zPNJtEBAvfqCQjN>7-C3y-y4YrdLu7Dzm9-@H6U*s@y77<yfhPKB!s6y>A{)LHLun!
zngG?Yx>K~MH>ty$=pq*9FH$*;!vC$uxmoj(G#}k=3ha2$W?O<%)p8A1u#ynlWC`TK
z;Kh92XA!wezA0v+I77WRzQ6T*!*KGV2|9``iclq#hixVLhW9%U={1i@`$r_3)>Y@8
zUQsOqp1yz*IG6smAv}akB}Ln+Dile_bw*QI+OvBUf_Ln~Zv)qS5hupiS{hSb{romO
zd6Z|A%c87Do>TllB!x&c%0O}C^)%=Ye?aj!rUdxK^w22FoY;dVttD|3wCad%g}PEp
z``@%xIEnb)8ihGp>;tu_>t2<N+lOfx%%B(-egfa%4?fTpL(2{k(fL364)Uwtq%bf;
zIefv1whTIPbK5HB{JOp!6q`X-oC>M;!4c|<Wjmev94xKea!Xa{7E*$VY7z#zeell0
zp%kbN1-plUBDjntI{2Npe6FHWl_M97VuyrZH+>uOwldOL3la$RbTG7Vd0rt`DqWpU
z&o4#d;6;(Xfbl2Duf+p=MBQ_FKq<zTKEx#rHpO1#2&kUlJe(xj!!piv-AA9|PLUP~
zjK@BTou0G4hPrW@TzoCJXDHGK&L&l3PY_NxiR!n^reyyUhM^tJ#e<c%fgPb{<>w&H
zK-p+TcpSG?D)ErC3A(>~HE1qnpt|Jc1Dn)G#ATCygR(13dCo(g|6SV@4T2Jr$eoAE
z{V4BjupL!DYj947Od}RShw^`2&V%MBFQrhY!RrTWwoQ=|SzB9$bmkJ8qO=WqY$u0C
z!NLD%{0*NG92b;oV=0-WA;G+@dv@qWn_Wvd^d*R;2d^M%difn`s|~qsP>wU6dP_m(
z_t!bl+xYbM@%+}^NFzp2H=~%k=`XQP4A$n;=d(Iw*1)<ugxsQUSqzE|0Vt6ODYs$T
z?-V0OYL74acsq5ESajZ?g03d01+ljdCAjiZYv@6~i1RDg1?`rkEfl=*DEo0aTMKlT
z0vRQ#LNJtEz>>&YY*Plg*`eu-+JK^G&cA(uwr5qdvkd1VTq1}I2X&-MpaN~SakeKP
zQ<B#^Z}zne*=geMhP{rbsR$LDboev5=h<;iJE8IANqK5u)dSi?F_&hUvrqWCuy^rz
zTtpep(n=FPhKIPwl`)3zSE(^}HFlT9mpornGbL*gsLzAmtaSm}5c|J_$i_7PkQe65
z53#ZBs)Nb~+8iuzU`?QE5v0MkNPjj+x1MR{oR|+))@nPmPRteLok@V1L{#<|0>H#)
z({=MEm2%G5=%*jjfTpw!<LJZ~|C?rT&AXA5of{S|zwHK2QQ3Rv-|zR|^6!^3xmAEb
zMWVgs{E9~Mv737RD<?fZi)SBbPH$-Z8qM8IJ$MZHTwIpe#_LAp#CyxUM5;r*o@J@l
z>GHVsWV5X1R{;8zttR4(x3{s?dK`((PJqVjK8Z058^-7}i`-QTH7ySwYrU<f#G6yi
z?-)jeN)g7($)o`q-*?{u={M<rHvjm?_yRHwdRc$Ar20m|+c3O<x8BArnGps?`|3!{
zB(|WJp<{*DYj0^hW3Jm!FWZo$=0k7)vITdF2tI<ly-nwzv~aKoVBLqhG3~MsbrW<)
zk9`VRtQiIw*!3uWScS&%M_o9$7Yi(CkOhTuhx%`8@Q}{k3h#|}I?O~AVXk2hEBu9y
z?tyUn0633!%j`e|RO6P0#+U(f=2EWmIE`4WX~>#ol;b!>Rz*wcLQum_>S_;JGX-fG
z$a&A<r_Af^)vf&Lh{Rf~Ns3yeR@S<7&k+$iIv*j=;REm9@J;_101`1DGQ%8g`aJ;a
zrhjmmeE2!%q1fr-Ai=L*rdcZhv{>GqfyIUGdH!<fe<F^D63A1@^4V;co0|4S^%lRY
zS{9nxKGNV7asD<AvU%g@XhLV`xP6;@?4g@R(NU@eOD75GMWCWFhJe2K755lR2u}pI
z!^io6AUY6lkt>~-=wS1Ht{XI;tcXo*bND2v@3sK(EBLu?15SqGqd%eD>onvepQ-p;
zx1$5T<agl1@<*uCnx*zBQXSH}bEMxP&<mAEibzAiOjC*&_4Tob(I%6rtB^6-)7G|o
zCWX#61<!Rz=`$R5i(J>%UXWVhV=Y9`teG`4X-B#J1)#}?W8SpiRP50`9q7!YG1Ahy
z+N6OmMc<wFk=u0hH=S`q&Xl!WF^>>DWa^J=9LxE!AhAfg<rRpVn^Tjz*>@-b`)%6}
zKD8f+$Z2}n1^S>6G=s``xgiVvhES-+w9WW225G{liRKnH-vhf~tILomLy`WkYq`JT
zq)xiiBoS1F^z>t_>$6F)x(U!!Q+e61J&9IFixLBB2kDjKR;Bn6p`(gblU+~n$lSO0
zRB8~|sg@ohxJi=?+QciMe5?vBt^ShDy+EbHp3m_i5rlAx`Tw3h<Y^VBYU?aI??)Sa
z6UF94`Y;+dweEho#-jpXC)h~Pj$thiqY|KfOQTCQK>wM1#)-a76aDP7?T>NnuG(`)
z32@z`{JGKT@Frh|8KTm$qYe=?I7X6!Wc=cC_du9Y_Kpd2b%r>vS(4+Su5a?aq_nk+
zlY~r$IP$;$m~%CH-i2uy%$Q}+Puu#=Cedhy_}Db<GdwU`#tP3?f^uC@wOLRqM-=Sz
z+l<d(;aW_TnX2De(3-0;*YVxQX>I8Ls=<j#A2+&u%;&^4Rxw`rkQg&FKFQ!J^UCLM
zx{E#>_z`jt#Ghk`-^)m+qW)C6ux(h`zb}(|^M9splTTnyD^Y5vv^W#jgS??7euS2T
z<*y38ppMFdIk5#BwZ3?~gS-A|^jG@FD5+`fN#0tW@k#YnJ|`FNUSkb#N?9HjLDsVj
z^tUm3{4*lQ@VkH%p3InUgv_Xw+~7zvt@N9~v7S0kfuyAh;-dwKU+mS3A@f)Vn)brj
ztZ{j=QNzMYQ6^#`i}H$Kfrek6H5E2C2F694zi0;P!}iSOiH^=v)qJ!M-Unp|e{;7y
z!t@^2^b1T%iSX)GLK#X6r@HMIqoHLy8l9BJM;TVKw@%G^F?1gog3BPwig|t$s9Z^>
z?$ImP%3K$or5Di6Bqv%%N>X3zSi*mUMVHc7s^hx?wb`u{k5U1r2kORcm(Xwmv`|Ui
z({x-=a!MiDM#+LA#?X5;%>7p@M00|&ifbDU$>8BsE!f~i0(1$5sP+{Od<9S+?utNP
z9A6PjlXBJi%}lm?J@TCVNCYAjsw@?7{xrPidvMb~vO(#^$$te`6&82BJ57>Uv^3aN
z#`)9C6>~GqJwqY=wHgv2K1SBp`C@nEJef(wW|N9Kq{|MqyYsx8e)O_QcZD4pRF+&w
z;<oj%Xc?(r{g0TP&0ha?K}pu_9!oB$pwNT-NDq<MG6H46C`2F8C{~6EkMH8s`xtox
z$N0#Nuc#_s<_BM95il@kG<v9IvQK%nPWItRss!}wD=B@%nS{>GKX*WSjdXxLBXG%K
zw)n~2B%@wt5$JEDTkS@?k{vVKKGH!iED^K~1`d^%lnrYW)DXxGU4kW?=j~!+d^EaT
z`=BnYo~|d9KLmSeHhD1-#Ah`YMZeuR9M2zaELx75ceKV=oGy<_g4TG!p5GuqddIn$
zf$sDNUn<fsO@g6)9_m!=ByqAH?Ohf1j1)oAvhQ6@^sb;3`Gdzs<Pg<c>SGlx+n<)r
z$Z3eryBeI$(mD`6>rXgV>cf3ppNyXQmMsSN2gyWyXOJ=k&6>)GmrhVr{24Sg4)t$+
z+oG6ym04zH5HtPYKpfagHWZyKc_=yvQ@Q5PYWg^!jnsO(Gb=76yvKn7#d{^ChCYd#
zY!WxAK*L$aNqx3qUvfHx-3RN76Ni>`AQU7$1smt$K&j-=>MERDD+ZLqK>AOQ1S6r^
zB#3ESID=<Se%eKLZ`!~rbdK|Trm+TBWb8WtI%D3(v6qB8zc*_N$C)th;yTf3KWFj~
z$(<!K&p~<@um0Tc){bvvyy<R(L$br+x{H)MZb#b<t`p>|hV(IeR&U_MN5Z+VJHO*_
z+#yDKFJAZJJ*;w@#iILDrgOWU-c6K($`dFsG<ui0Te5yv&a19<9RDc5T&E3ZY_q<+
zPx9PddLOU<<lVsisX3P$6G%HqCAO^2#Cc@26@i$m0P=TB3_X6VrX0i5yn2K-E>-Sb
zR^7F0oCIy$Kw?}(IX}Ko($NKiqh-10R-U|Vx%`^Br-HruPscO)z}6sYur_#N9rZxD
zu~z(^Us|KGQfN)*7HMM|fk6e8Tu2<JAaY<?hcjw8PMh-Qp`S0Ooz%QV<iHLEhi(;q
zQJg_YTQyV1)vY`~OzDQj9QS3I@inWn{yIYM9;3KZoJR)pyhIILLYdLC<Yk;`xNn=6
z>RzyE&QJKrlT>Ei5zdYnMXuru79Hljc^~hSR8CzzNHdv;0FMM(gcS2koSQ`x!<7%~
ziZD~EqYb5qqX*)vbOPbPOK|H;G=OWSO_SpE|IJX+sKLpMDJDT(c5w5g=s4b-@P9o}
zsqbk@0kj9g_&bTU)C1>IxN|0)_bQ-0?Zw{CxsgqF!S=j&wJjDTHVU;D>-vL*CyuK+
zoN>a765!rZ9R#LC0y-*bjIq}`EY=cuAFPSGeak~-$3~N6H;<wgk^VF-xQvdR=L0S@
z@*bdeW34~<mbX`MS{`jRIi@nk>`fs-i&1!w|5ZFv2@BHaK1njIPn0Y_@w%}~HH1!l
zTLW~_dr7_gh=-eDl+HUR)n4ABvp|Voeg^>vmK8gbW$#_r)WY0;63ad^kp|Tq??R~v
zMj<_0dLG?@#1@21C7fTgmgD9#E=AGJaAyr?P%ec#yQZE~$3Vq3=qVIH(i4q8;wH%9
zuH&+L;sV&0o84)A$`Bim580DAGzBVbRCcNXs;0Db6K{HV1hi5yZKHX=IHwe=RP%61
z8LLuCT4^x?GFgKc?~lAS<np_h3yLp82YHgvD-cmi$~t2Ov?m>vGDdnoCgD?#s-F-w
zkG(hUUHBv~i<`QdIcXAkLJrcqHqawzxk60?CzKY;P-6<h)_w}PJH8s*+i}dFW-F8m
z+`s?lw`{3R4Rm3h@?QiWK~Lb~8OPuEsuJ(wl2+f8`v0rVwL3~=3@E$rp-c8gT4WVl
zz-k&t7(&G<o~Se_c_0xX4r?&kI!I<R^3A^OSv9opX}Y3jr|x{rxEl1o9?U}$G33=n
z@wX49W)hEcj6dk9o(oKScpr38J2IIoP|KHHh9QbFPCR>#>A=vKa2cbB`@pi>E(Xp$
zWW|N%2cO@fI*g^1aqeYI$w9(6z~vSfQu)&8`xh8OZ&_T@b2=tWE}+g>hIvYfbFlK|
z_qoW%)S>bKO3T(ToJmICN9E=c%p;tOUHk2uSCPnJD~r4OB~IKEH>hgLFuxk7oVCuW
zchEyB5hHy}Rj2^CyB0m|3gd2YYKF6~!TA%Tl4s2HQt@_=6z74#DJnTh#Q9Jr(&EUb
zGBR&6tiMM#Ra2l?e4ng+XAiV0M1N+3G^5_zo0bl4)Q5R3#-<=c$dSrCeClw0mW}%1
z#K-8v57HRbn}_5r(u%x(t1r%g@+4H*SZ^%BB&Heg2|4r+K}F-mO<|T|>L4{P@0-WO
zs%JVHY4za~dd3uYPMpSrh*4}6%FKLJi3mUT@wwqZPvgvq1}QS>Jt^2mj65<YD1oW%
zwgPBBnbw2yLFgpZ`}$lLM^eNC>%J$7yxgB169Rxha-a$P{r)+DN!%_J<oo~pcdFlQ
z-WA7_gjA)HPpaJ=uBX>|0aaIYUNecmo>TZxqDKr)1<T_2zn^vx>^Z8+P|qNUGP)gN
zJv`tP{N3AzA)g{hp5WWi=SKEKtRm=|%C#M<le|=Zblt3+#RnRnafG6{0Rnr|J_p+Q
z%AaaLP%m>!QwiRtJqOBke}dn~sjAti;KNd&7>oCOA3}U*=c$h^)czdm(vN3DPY&+W
z^&NDE8uzG)IeuyFzkZwd@%>$n_T5q`Vhp5BtClz3n=bp)_}~Dk3W=_)p2wwvzbm_t
zJJ*<WMzox$ej4Bg<nVVg;_vgzm{p!L?rOca_QbJHVZP6snY2ppZR^^lW`Y-6?^R32
zB)&es^p{$%E9%a{p(^6>>HJlyWycN5Wac%RBCM)Git(EH@UIC}_~wYY+D8F@CQ_!$
zTiq&ZrcV4@lB|*$G;$Ng!xW)59@QRV{$kek9tj5CZO7dA0Oey9tC5=eHMCRI>9}r$
zekE3spqHd+-y+gqBHPEf+-S@lGv2d}$3v*H40Z0->AEnH*mKtHJ|P{0E|}}9pq_8n
zJ*Z_x(j7%_w*H#m0NvU*eh$<dHTt9zF}>?ZgHfUNDQJMQPi)Gd8W6crQ)cmOJ&>Lj
zi@Zx3-O@C&?fKoWAr3HmRYl#{RWSK=`VM(l;+pjPR#EULj+k^x-8w!&kv;@~j%MGn
z<HQR+;W*LVUs(8`CM)u!@!sx*TDkIYtlFY~R1AjI%>v_GbgP(Db+}N=F7;<qJ>$n7
z-%#&WqjMW!kh)4r7hWKJP=$G%mQ~R>p_?QQZtrM|rX?ZGaY_|*6YM+2ExXkn>EzV`
zfYNPg+cI6KQj~$l`)YlKTqoz(pDu5MFo9*D{n%N9cU_J&39MpcVHs%H0;4)r{k*<m
z!&QZ|<wL7(aGE_zNOq~3&QPve-l{)Mlq(<wz!&=Q^|M=A5zH_QSprbk%7M;Y$bcDd
zQM`+;E>$w-eJGhleeV*`^=)_c1A&HViamz@Smj0j^R#+T_H#K+lC`-*SaVKOj@KL@
zV@5Szn-;5Fd)cDz#NcJu=ctZ7MuLNKiC1+$i$IpKwls31;%w9VboZWBjp{f|k=1@R
z)dFNkig{KUI6k@>tdKeRZuEy8h8OqoB47SuNO*8B)67x8_s6M$-4QzhqCkJZAg0av
z5_|7WotY1p_g>VFPzIb4o2A|5O4$2k+shTec)EOUJ@h2Nk>Om<77-KK=KNFz!#4l?
z=jT8FdH?iZr~zs6{@|Y=Nd54~>ZA9$=9{?O{~k5r1SHgbE@;`Pu(vStR-%oXaAlnP
ztQq{1ze`q*kf0mE?^wL_^QYbfXBYdm%NzAnB?Odg_5l|#cXxE6L)~}KTn(^Syh^P_
z6tXH>)ZhN&(*9cKt*SY4Q(^MOStUp@eoeyU0qNHn(s;?Qw2D4FZFS5Bx;?*|o~BM5
z`c+Mn;!rZOjK{fGl865;)YOV5Wv@P)EOm%%S_kL8G>(ae1Vvkw_r>-;GSRF5SM%ai
z$LWIeC&dYCH@7<)%YDi)-fzyqqN5UW5;|D<)u*lzC<-8S?lZ2^RQs9nLy)qr#xq*a
z4BB%C75e0gPX9mxBca~#L7~1}{X@&XcTPR2(i_KfRFz~ZE-<S8{Huf$45fhtaV=A9
zG<NqJrZShmB?<5_JJN&fNUn5+$_vy+#mT`U(oJvIUft^IWY$vn@$od$z>fP;{v-`!
zn@A6FiZb`5^Yn1|SUZC2>nhAyC&uS~kUwFq+X+f+U5ZXFqf8I~K%_c(6|Ld|#lUDs
z(>cP*0elGSUC&9q3^7oNri}BDcoNk3<MX{6JNI_A|7>QP#v_VxB-O(9C#TB7dnJji
z5zwzQpa}TeAfY=^$_zcQJgPn&f9a1ddlXUn0m&XnHkPI598^Z38(ud>d-5j~lNt0i
zvJwtiUZTqzlqSW|$>)uU`h?Nq0I0BGdhiO+b%V0Ww_y}H&^k>KiQ*ZeJgD<HcGLI%
z=Wg*#GH~S0Fn@jzV19ptq|V&#x7)|<<HJbx0a9BZ=wk~=YMqhU%Fp9jGf}3?Zl$HM
z$H$|@3?eAbW2cE1`oLl$@z!%>&R@dmuY+asX&hhC!!1VW?L9v$G2O)N>7(<vL6$(t
z1gSd=@bSj4y?^qzsN+BlPoEu)x1KNiHy(n&)>?l|m++GeIkfEWuYEth=JTT!ibuI{
z9Xs~SkwV7qjYbH6Bb(PD&hO^$U-S24{M-F$6)A$fP<7`e5bx-nW8bG?-p<fl)I#vl
zbL;OVXlq*b!po6LLk%9LDO>a#L}bSQHMSEo?x};S3G;MMq?3LJ^dh{REQ{}hFT3QS
z-qNT>o#On()fINB=|cM{JkkwaJ^pu{_BZHF>ieHR*?EeWlR6$G+|_ztmtXT|Uh8IQ
z&5}+1KDDn$tZoNJE%*13Q7wrn+a3eN?84Vw=JoyJmth{#7Y*p|F?%>=&5Uy{s=91a
zPOb%k)zEKVgo(Ac!_N52i9M^>wI~-s`CJj;eV~#E{xMWm0rYNKBiqqm`XHGkV6lGc
z_JKYoPQDM+&CTXj7VS%|9hpU|^%b>=5tL&TTM1GFa!gSR^zgGk1y$7*-5bQD;0M9j
zPRE)gHBXmCNS9AxB9}%;x`^h|r^nyZYwxDd?sK}0w~b7X&h(jf0}~QBG?X#*!sjB6
zJw$P&k_Jg3q+GP<Posvnw_EGiiH;RJ_G{DJ)lC{6_7o=dx|Sxg)d5f~gIsA1P>ezJ
z;pphC-r3&Tmm5!M*coRDJ(4Xy^L^ueRV<cMpci;%BgUV7jL7T%PJbtVbNE9)D^zbg
zeD<%VG{{fio`OML>pi_8rSEl%vb^5-Sd3^i&rM03qo=1GVM#6_R(I9sisY+Bc~bA9
zl2jQd$WiS`f|2X)%q@C7#@}j%?gE8wD3$XGs9*oD6QDO~-sNqSe_ujO?|5II+W{&w
zGW;EtoP`YEU=Yd-XxrL1snsFOt1+n}VQLemGZkG;K6V1zXULy6<Qru!sD)e&^27p7
zT|61##CnFv)#$n59OlU;jay-SV9FpC@jDGMKAE-ga=JR{MCdmz<NRsHVeH0F`#BDG
z^7!M8sIN~nC2A8<L4{lmpKo~N%%M_#*Y#CMqXpB5^gg}&rGI~){MD2fp9&M?vF}WT
zW)tC)JHxX(li&26Y|1E;%sz5V4<m{XqsL{TT7A!zC8`L}x5}1m@g+a_b`}&VLH&Xb
zh3vp(07~ppL4*%K#1H+HVJ!#vkv>;o3{nlM<vl!{t<Lmn(ZD!|`4u(!mILIXDBlqL
zlXLDpk4h@JEzlQ-VFTnN+@QRR+lqLJ7IFSGPx?NMcMZZ^AMZ?Vhj!`m;zyPP3-vCd
zRb2fjr__@!N_Cug`Uvx@`E&o&y2Dxj9Q>DV>cdg*j!u2{ezV6C6DK!Nf{-Xi^nBry
znvM&ZT?NkN#(Z|h-|T+9bUVnqG;h+}C)DI}e&;IjRcL#mAws7Toc?HWn0??5D1kgq
zK$$1Vy?tdUN~Dke$ZK$dt)WwbQ+J`mqy}d?hq?ULqA<GmY*#_^OSL?Og0sSM@cc8O
z(?jD1qy)+^J<b<L>wY6jwcBdOqhnO@XtxzHUe7z{0EIw$zXFR6)HX$!-r4(|I83dz
zc(GXr3QvGm32^pd_RW$o2d41X9m>hTj1Ic>Wt?kqbbee0MxTpF4aq_O`Oq0HRGHQ!
zJTPJO&2LVk{9HF^0#o&&G+GZcJ3Qe8VOW2h<Yk!TKPB|>1_wcS%UX-9%EKn2)dS40
zW0>E+&C};ccmKQ%pSOi7CTEM;zmevUSsa!|Y0}+VpM`d;3(_W6rzS@CqCNN9ACkIL
zKU_;f;-rSgf!FBNvb+U8@~efCIEs)?H${xU`*O=O%wdW;tXEy=j+8As^6W-}u<ep8
z6`(f#?S@&0i0(Pa{KXv_QSYZ9Cr@EZlLlTu5sg~IUTZB+lzS6$kQ$qgGg|<CR}c(?
zd<^wHP$e>_Tq=k5v!JL?l)K)MMyAY>hK91RNnH(6`uXq@CGf#*4IsX`>Hy{kJcK6b
z*zP!Bo4Hd2q-DGetB$`$|JA*9xUQ(kBOa+eQQfhJ+owM0{s1Uw%s};91(b4R{Jo5n
zMdG^Y{cW7?Ijd0xe;pkwG(UOA@4y7f6K2i`=mV^vK~UZRrv*}~oL#Ky#3ET>2h%N@
ziFp#rDFKn!cUCtw+jNiqa~hZ&o3%cqe_-yFauLiD3LkWFgK`2p%>UrVuK~K(k#~AS
z)$#0sc8jRpJ2^yxpomlCUMi#_mySJq17Da<9!r_v_|0HJ7$aF-oYU7CB46s8GROnx
zG%mj1-vW7HQe{mlI^reNht9T3HmFQf5l~jv0KLBKmF&q7od5-kA`iM=qyVay*tut%
z*)!6w8R>6=)aRN{>eIXQ4>KSM4SJ>@^lcn>kcxDCxZCS2m<L|bp4n^Iwm&ke16#g$
zeDxE#!0UO_-0g^J3Fu;aewLD8Pc%>;D8r-&#^va|`QP|>`GaV;#vX)5lN>XxxW1yV
z080<K87Rexo)3>W)<UR=BM8xR5hn6_K+NBk&l`#=NG_;DtB1UqHz&J1!3?LCLmzZP
zBg(H+l*@<8P5v6XyJr-i26snClxyAo<$2aRyd&Gaq$4f~N=@VpA#~^V{1~Y{E4Aoo
zjW{yroQ@m^rNP--dJ}O%rRrimC$Td+#H8hQ-$_T(Men!Lt3~R<K$PG4%M;OlpZF&J
zA|l%0o60DEni7@K)$Pehg5X6BUT>1bi6b>cwWlkAlG$cES}r&;xek9nP#LvgHjtvH
zyGbRZ_%45BLOfE3bXn@yJGim$BekbY4SYRHyyr=}Kx<Qm`N+IzP18&7sH6jtDNRJR
zZ0TJV+RruI0#LdkpV_@mgJYk@T4DQM_Wj?>foQyxKvC@DksePY@(3w{C-49MW0TSp
zc|Iyqixk8rkS`q+mxR?Vf3@=&3C1tWIQc!lofg1YrrEH4SI)FZQt{|P7sGqo6QD};
zaj3llsV%vu2kuXcJ!&KdnFeLopjmT@J!2x5@;N|AhVHF<>>nDYY45eRmUcxGGnj=e
zw%ou}VbwJ~GL&KNTuI}2U-{s+g{Si1^`h6MD52AQGAy;G6VyWVpf%Ia258NbG6>B4
z7lU9qg~^|6z_=t(a4dlQUUWPTK6={lLRI*A#<qY;Ue_DGJ2_ArqPrTP8*GZQ%|Hzp
zLaEEsR`WwG$n~u4=y}O57B&2^n=l$w2Kw#)zo_TI4cMAdvn#Sy&l4?IyB=o!GoQ{w
za3neHCUc&hsX|%YpO0u=(lJt`Kj5U{+mngDQ(1(Xatau~_Fytf#VM3>@)BbuL9Gk}
z_FA&rJ!%m|oVeXPqNyywJS~U?LFjHXY*TaIp_AZ&IsyHGwaBbF`(rrnvIghgAj7mg
zEm~-aU8bj9Uy{L~ikEN44Vu)$M5NPvz@Bl^I=}^^ZVt3)PR^s-lSGC9Y9%>bmD#m(
zO$lpIDd+YK%RV|zyzyAcL7;xu()^m_%inXWswS%U6i^;tN+tq-`VaIDm7@+K*RD;N
zy5ku#(Uy?-b_wUVx4loRg5iR8AYptVlC7BSJup>`+4@fQL$A<~)iQe~LA?p0oCS<u
zzhRe-YZa8UR*d23=L1aDRP>$nS)F&KXtQ7Ur%29yXmBgze8k&V$=hm5<U2JuCN8Bp
zshT3<bkbigf&7XS)#F(<|KLQmMMF-nA=Jh>6;h~5FA;qa>WXMczYj-HD`YOAiUMou
z7O%D3qGAKHYsx6U8&X@R#_-Sgr%(6Ur|IXoefX}WRl}Q7FQFN{_S<XDsRjiOT=PYc
z^w;kq?2vrMX&C6h`v{$A05|cyERP7pA<GV{57s*CxM`WkS2Bu)S72_&ejIQ70c2b#
zRrYs4MfN!dq;ONl2c<;pylz&#*#*?nUNUZ@->mijQ==WN-w;~kQQ0~tku7O6if!Xy
zow$d_?c`U6IMuk{=X6!7@4|kq*aeL!=E)Ce2V?m)_;fn~UIo$hY{&BOnFzo{70`2x
z6!l*||4TXouj<`uYczNL$sL7e4G=1okJT#2`!WBZaiuDY<+u1;onAx@7j=6ufT0C@
zeGAc?_sO_E(ti~&mWoceS2v_z)jDI|S=qiH6yv8zVOp@Ko2xMWMtu*3QRuN!3~hFn
zy}Cx?x1~Sz2%xs+RC9KRr=>E^!tQiVKV3o@m)s0Qn;yD|R*39wvS50FQz}}rSRg^w
z`Eo0JxSD$OyO<tL8T<eRpy6_|ouXcc3ZTDVKShfMyhi9;B&kw00grap2YlMG2RqG~
z$rLfuv@~H^!rKU=++Sw`DXoS_ZKGdZUceuDq^A)}%rp3dZyDK70@QXfYpTGxbI_Ul
z2iAu)?eSnI{7J%atzqOz2~J`yapEzYE9VC+eBYk>Dps0YM0UA8B}?x>02n5~%li~*
zsL-L%6uF39&ZPGUC*4*jejlpQ^BZUxs<qBvK-1{V=;DQmS*H0-^tdsvrjYBsC?0QG
z22}CdJx$k=LelxM|M8384U?5rS$6a_q@SU(7qMP-rh(^2xBko<dZ}GeA{m|E==(^W
zJ%jv-Ab&R?J<OI~y<73_m;656Z~x%CG@hrdK|`et8YyMkkrpC{x*8Nyz)<oj+w<4Q
zc-Dt`T9c&YK!I~Xa;i!*0TP(>#Gw80V+B&qnq?ITDpXTOIUjn%=)HW%9JsTc_U_ML
z|Bsc*Qu+1O-E+<?D2Ru)_9D=)=1(`-zN0#$vr7*_VEZ(CD_>JA3D**}xAqRbrVJ=Q
z^Szi;rUFcFQp_&tg^9=*+)be?XCZpJ?4Iaek~C7SS|5-)Ud~fGi%swpR>XN!F2QuD
z23d(3H_YDkv!MQ$B8J}soCf~|kQIFdCv}b9yfu5Y5t)r|s$-!S<U}=iNe0FF95tuR
zveHI*$pXy1!ecgBy;bum8<ZvmbRbaySDs*xi=5*Lu^!TpBFda5aWh(JA8o@VS~nlx
zqbbYUweKOp8IF>al|V<}8ABUhP2*t2^G!!}N9F%-JA|X`$oEXrK@m?XH42wtqD(T7
zwqPuf^t<#Ampe3Qk5cC~^tcC%_OE-iPf}1DHF}Fs4|*}(=jSl_Up{Mgms9%Cd@To(
zJceCaFv?-V*z~+u0h1s0@C$PS0}0@U&@X=wtD99u8;SqrLs99oCeh6>MPufMQT(QD
zYF6F)%QE&290`X9kXi+h6@x(=jFw?+o0k>f$>*DzH3Hk<!N<f;w4xKMx3MI4prBeW
z3_l*cK1-u>;Owce(}a-nl$ZZH#|$-uS@Z`Ct`k0bvixx5(U6Y<D}iQW7yN_5$3t?q
z3QrxNId6#+hsEoONqc%ka8*dPUmiR7(`uv}#=9GaKtSMcZbQ{Cle1veN;8GrqDAGa
z>ozuxg@qPxoK=Se|AY)XQ=o*2Bscb$L?%_4``Bsl84VjEykcMA%s=2jI^rF>OI&2n
zcY&d9kqWq(i@as7(%bj5dtke6L^#>OVnERx{szXR8l+>K=Qo>YnWKYMS*Y`oyY~Y=
zIFgb-Byk!%3f@uiuGj#L7(pBE_v-*j!!eQykH<jkXV8@!gh$-l!~rA`TuYOQ<!zcr
zaD8Uh!TfR8pN=C=LD!=Ujh2f=IS-l&EK@W@b&m7rt9c1W>43XuZ|TdI&z_Wd0(H?c
zhp>6jWhq1bZT>$0rBC?w^7r}W`LF-``T6<&>VLTZeR<7j5W7^KsXb!xNo7MBR+eQ#
zM3ibvMWFjdu>3l>93FN5Mu~2a81^LsCw`=SuHyIle57n>2lq(60rLkizkfMQsI+37
zp?SLB|NOb%@6WII*Za%ee|dd<dwqGizrNkxUhi-Bx0$GQ;bx9!st-`5-YJE|IXVZ%
zPB6Cew1|@<SuFwy=D9t8e-6j!Mjad5yhC-D)1p1qtL0YJOhW*2Qg5VsRRl^M<G((Z
zmu_ig#W>Ygg?XnnqpB{lQRH1AJAMVk*6<3%4RLxLZ`Pd+TcdZX4GmLbtPUkSRt4we
zLl)z-81^q4c8~LXTpp5%*Ch|=YeOsIqmX{bf{1%PSSBfjl2~{hQelN%31^0ZzK+S~
zksfx{2WcKDF!$MWq<M`I6dx6P6>P8ILLBkN?Wln$e)-RkV$e6}2tuHTyH&$<c@<Hi
z!lqS+iV|bQPm-dj{&V}en#zYE8>C7;u9jjIJkcgEG7?u#1<ZMCu19Kf!9K#{hWH<e
z3TTBBdvcu4gm|hR=g7d823IOpUZ4{Xr5jM=`LkpebE-zpyt4RB+vE^`HxQS{L^Ybw
z8LZ5BbMNonsBdaAeLr^NZgxD&G3;_5=NLQ5K*sc#e)=V~h+k5;qlDNaMI!;e{orqZ
zem;#)7(Si&G~)g9-aifM)3r`KJ-v(D)9p`d{=Bd0@zPR@R0Siv$T-)P>}i}&2mdXj
z#LgGl>1STYFJ`1)GScsFbEnULJw4$PXP+DUISY=`dpwh@WlhRB!x)?5HABPLFf7pt
zk@EZLcKb=#Fidpa;gP5(K3kt|-iIl_#%t8zEKTtmN;c4CwT2s<smEeMlG3*V%<u1*
zzG<XtA~b)EpLeYC*xx^In84MSbB)jk1QnQ`9Jt|~fE*~s&qEJ=1=5dqtE_Ntyp3?+
zpMDoEwb<Qh;~G_UL!l<r0Kg-*F1+Ozw{q0AR>pY3f-;0TsclW9a%SHL?#721{v1P-
zWU(gaJqjlYv)e_SU(GP=JHlh1Sk?a}@#N;SGE76mDXnKdVQ3<jSh0uncZ1X?!|!ey
z+S><8#gJC`c+c#6I>Xzkof%%D3q_vMp;5i%nVU@tcW&WdEm-uY8Ct8oS^XVrhCtX6
z@R{X#>spc-FKy^*_&ti_#JYg|4iI|TpluTr)wUP`e8KpeFxOLI0oTp@bPI9&(>Ut)
z1BzG~^Pp}g4~i<qo%KluW}`V3pSE(d@ck~-)SNzPe;;4G@yred-~JY@=%kMQU&<$!
zZ98CQf&t~dHLjcEpb}0WUyMC(^JHgg(JH%KeqSO>f1+gHr7d8fXlv(d3Gc9weB4?3
z6z8m^H1~h_?`HZN8iFQz8^jFyK`-qYQS6?S?&_g<xef!8ag4WMRB+c*`@-26cXsZ}
z?UXdiDQNakL!`9j75fBf*9GmL8l+B4(mA~c^MD$>f*u+<rS_qlM`^jk=S4-TE0KI&
z3FCL;Bg<*jDw67F`*as%W$%JNPhh=)l5TREG++0Y)#?gB|G<ad_wXG7EuW?xby|kh
z@B&EGP%8udCeC$#ks9+d<ce<t^9T0&4wzg2JECg=>0K<+MZ~;d>z!?ot|;Q=(Md;}
zK!rBIx5=oNr|H&rxm;1TM6smyy7Unwff>2EF2YPt(X{l`Uw%@c0e+|}pt}m=KCB&x
zGhW#~jQSkH{90DOQD{dT9KyPRE_?JZ+38KaeZ1Okdh$VK=1fx}xjhZNIRQ4P(wPC>
z0INYclT<KhP<$?I(uz9m^>WfyoNPT&94AD;-XKp_12i_Lr9)Gv^|>l}vP|lm0aZ=c
zE>22^4#2OU1C&4C-#_@xkB{}<`{DjR`W(F*M&rD}{N5VeUEhZ8b{p=)^K+Jl#*?JV
zVU+CbAQj!CtKWCYH7a7tTyRbpjvsAwKx5=M8;zNNKkk{dFg>%BlF+&$q5iZUB$pg`
z9cJbV0E-Imd-U}cg`~*R<F8R@R3*)79A+N~uk3>J=LF}Iq48~MVCeQ$KKJ<;XF4Ih
zSEZqBnRpv|6zGo)YQt<_g+UJ+fIgSzpv$8uA<Bb18PTmnJZs*ahO=PAbL5&L%&c2Y
z@5{hk8bV|*NPfKs;yPPdZB5G^DDhxm7%igwK0>+KpyT>=3@tW!?YKfP6Kq#9G<thS
z&snSuCz!zFcX&HIwd8N^=SNZ_okjl8r`-DhFlVkWTT@3ZXCL4}E?YXwf%2vS*ENAk
zsD#r;%oCeC2=K;8d<L&--TV7hpj#Qr<-jK}!+qrHaGpA*-R)V+dy|UfGN}A%OUXDM
z&0+p*U_wE+Y2WcMG?C8Za-zaFy9+h<O$S2hx%m8Y<~!E^2O;`UdO9;FDgF9wN)=F)
z9{jS~ZPILZ%vvGx7-U^Fgr{k0iZ#=kE;;v(E~W~o2kW1-W?f$Y9f|C9`8&avKX#4P
z*tGp4r*3>V?I@*|qwL0Y3+!I=g=~4{?r6jGgia2Apx})a2&ZYhr;PIJXC{{!*-KGm
zUB;#QJ`F?PUOoDYJeAcx#`F8tNDh@Zd}EvwG9V{7gKYNUco(&~c%N_XI+v_S+c3l)
zPjnEa^GS$7<F`#r${Ge-+XhL1f;lt3(9y0x#x~$_luvn&YV9O%?{hj!<#C<tQ3f;~
zd`k^jMWj#f=Cl8U4%)$de%?PnP1a3)FnJy=YL1HIxJKKd?-4hzWilvwCf8_a6gxwS
zJ`(c!k#)cyR`^de`CENg1A>&Be15rQ?Sd)Jy@lV=AzeKwW5PjLMN=L{bD~*f<VWb>
zb9H1Yb_oiI(E3co^7%u<$tHq->QqyM^izLIBG}J61RsCV-b$l0ocwZ9edrP>!=z*X
z_r#bFc+z$C)!_Ul1v}fi_Cj!=Z9Gh6oZn60yg6%WyDpzM^D(0R0wW(i>ky3(qw^)I
zBIEWU&~E_!o{P?FzyGpX^=NN{ZAX(cFRN76_arhAKFW6}k_<|5r0m(&wPLQ5oZ9n9
z#2_B9Fr#EPHmw`y&6UU3<>$VX*l;>hp)DfS-mwEz)Fa$;0GA`^J)LZJq<Gd$b16jN
zP(u3EVCZ;6w2z9TGt*A@|6`-Fsd6Sm%429f$IwNf9;vaT9hISX2Z71iW&J0b^?Xc_
zlDKBTG6ZjxBzhbvYNsTY%z%C!0OhuOt2z3rqVhPu;!$3boqfsQ!-;94YmN*`zyH_2
zzNwLI9ycA^d-$eX)4sW<_Q_5E_(%WTW)!Ehu&+$6`~;_p87U$ZuwXIj7Q$(k`>74j
zBPi&7|9qXS{C9Kza~nSHPfC9(b2HrsHA94WN?3m=H}})*q*U5hvNwVXAs~U+lsAYP
z&Nty&L&EdLE&9EUOV>eJ6r{S*xts>AmdacbnFE?`l|BGU(E!Af9{@T=P0DbrcGOGC
zf$5GK>3ZxXZ)4Ub9Qz{7@88B~e=CpRS|39Sy&`_pLCw-ixSlZ98{2^+P+s0a4q#nV
zhPhiN{pSjG*uhyz4$l=fmZPK??;+GS-f-UsfM5Fn(Je+D8~5A&<6{;5qJ-JbKkOM)
z@9NOzKn;yrIS`$8+&OWiP8ec7JhkKubl-EX%WsW&yA+jj#sQ{An%gvNQPW1L<TBL8
zPJbrK>DT9ww8Q#=*_9n3_w_KT<S=zX;Pn9J?_yEENwwCU`nY*FqP1-O?`~QzU#<U3
z>wS-ZtU(+~CV0Z}NR7m3?-9~4dajR<esiSL+tbs1xR1iR>Fv!M_6;+Wq5X1ymsYl~
zYG~9EA(C$SHtqRkFJlp^fHDN7;~aeQ-Ki(1<%l@2yIe8ZD!(y<@@gqjjrbd-2B;41
z(}#NY-NSoc&i?wj+5ojtDqD{di}9fa_J(Vz4AQUrs_8k-Uxq1*O0QP+!onlK_ou%1
znUmt4)cAa9IZJJvl#WQh7o@*iq|@~0@1JRUU*ga{e<Rs%){2>v+~e;c|AZbG>`#B7
zI$nf=aMAoA>O?z>)DhCHNx!}W(0DaH^&hTxSbRtBUQD;%-EKZgA8f9fJHLm<IIlAq
z(p_!e#iD)UM@6VTmlpnaZnM^Z-`~M~ccaC3*(SZ(?z+f+A<*+VUg!9jd<vBqo5TgE
z=b!6zlA!SLHpP=@E2#Fo+LS-q_jtqe!E_mCEa|$ufPVs1J|;lDkJNNC>0v<nbAr@o
zpFX#<C_7C4Z#yh+<JSMQLl~~gcM}Mi!MN}9dw=W&D?jpa<#3i{Q!?k$q&FLKHY+?p
z@LR*5=Yzh-V`<-6Lerv|P?IRc0ZcQquNPMRd+&{T5zs{GS-Q)-7<-aR>?HBG5n>l|
zk>R?4^v`IzPIgaEcn)#m>5fckoZjSQr^vsa)pJeBpgSVj>-JQ9d#Z&c{v0KZ{rEjN
z_=seBUIk&!4pQNwk9~v^t_i}4QYp6VJQ_jm6vgo(#^0#qJqUKxJ<yl%+n1LYdGWOG
z3OcX0<gtjcp*~gva|{9%{Hea+J*!3jxZK-5P;@y%?yUx>PrCeJ8T?-uV%Vi_xeRo!
z<aig4er)Sl@`<VRIT&%O4a9tBFvB-mcNe~3?QgFi)5q+iZnx>b#+Q8EQ?WODaNIMw
zTz1@f1F2P!>ab`~pU#g%V6Qj(XK1_h52~i-SYr#RHKG5d0;~Tc5BLq$L94n+jqw($
zLR~dIfS{D*vITef{HYO?O`UEkz@{Nd=+_uyzd=;8xb5FerRkPbk&@Y!m&Zxn8J#e{
zG$nG1^gbH%$EPE=fi$P_(b;C$kMmCM&|Qt+1FT^W^s80Bj<MQF>(?mywRG#GU@v!W
ztO^YMAo1|fGglRg&MNp+9a8)9PHk>aQvTKL`1beym*z%Z<eOje&Jru<b`Ga6)*VAR
zk4BO^kHqJqJP*~~#`hCveDOTx5q<e6)haK*keIx~j0;cgHIJdBqX(xl&^6TKO|A$>
zK4{8YZioE~C5V_YOS(*z^f0+bozaRF$}rU?x~2&7>*t_Q#=GF!@p935Po8kuqdGbG
z@i6@oBgF|$>e#tWTz{dw_M-T9PgxUGY~;{GyvVUIO0!onY+oXzD428ZEJ{iadhRC#
z$<O56hC8K_=T^{j18C6TN>_v0tf+&vuK{Y$Uzm3c`iP!4JM{@r8=~hLpm+ycGtY{M
zuVOwl7gSqea>4}NMk<;jNQ^aMg++V`Z}(7MU-&uRGvH)(LvML?9Br<%cB<=T6Pld?
zG-RA#zuy5U@Zo?FZKntNt|s@c?(g@%H*n5Q_er7MK(sH~-}+B(`jb(n%<Qoy)^ysu
z{IsX>%1qj=BCoxR!W(4KpA(DXEFpQ1gjsrypG1iM;~>#~S|3AGwbPcpCt<W4rpW9-
zHz?$c0{=*9>A#!TF7|6|D<tNzrQZr=GkeM`B_XNo5=alz-|Y%49=2UCrYMg-yQ~K@
z`&5^5wP%9Vh48rs>B#B9*ZXK(fBRT}ZmO}wOv+MwC@vATbr~lqE<|3&L;rMn`Yy5$
zbwUo#S%3OiG0!B)lM5y1B209Y4P#HdkuEO#f7q<CrvbO3G;2HpJ-9dY<Cj7HuJ)W4
zeb20y2M1z)FIv)kG>JWvc%+NhXBlbp>Hl}kXehZipJU*U^}oz`l-^g}^CVlP;6Y{D
z^T>fZypkGOg>%|5;An;@r$@{_*Vi4jCyZKyRzvfa0>h&wWYVBVtXTGB9|ovz;rknY
zcpG%i;ZrG=oT@sn%g#HA90emGQ$EKDirewy<*53ww>>M~?DjM?SJ%Y(6`-g|w`URP
z&u?=u$aHezEBQtRX;XTzOl8Gzb_Iv7Ed!J#kgVDqC`vvDwf>15={#iytqGgLppXqT
zV@|L#8D}j3foHFpGD?4p!f@TJS=27Uvu+Was1KdD4wo76wwg6;)fr5%y1fO}nlezk
zy3Mdlhcz7uSLg#02APKDa*Qm?JHfd?d$SDE!)(W>u8`eBCw{)tf2LyIIaC&kh$^TJ
zE==t+x1dZ&GlB)3jq2V(O|;fDy3XU}1S(}p#MyQX^=pQDr!7k25HlHoj=!k+xjlad
z+X<KF#w>5n-iG&()!TDo1=p*f^jfj_9?44(tXVwN%hWrM_<8Qke8Ai1>eI}iI?w6Q
z^HRi#HvISg-7othI<1mzI&L><6}{+2`D@*!9I2pzHc^Gt#|Mzn^qX{m@&Yz%sE>jf
zkAl7Y0<D=B$VD`nNevH}T^z73;)HrC<^V^U50fI;G2}?gsk&Nf-cc!JKp6`r1OQ_w
zI^+D_#YsstCR?=WZe{?x?hPsZ6XN{av=(ux7p^;+w4S+Dc8FW|l)xBhmwSI`ft75F
zmTc2oD%i)N%g9mnpLo@n;R#UpS!<xY!S*W$+ENFM%7Omu0R@tN8i)9rmk>Yg0##`p
zKU1*j*76fq&OWecYv$T`*`jz3vB`-q0IljI+{nHz7InP-Bw{CF87Ga6ls;Dzrx=+^
zTvT9;>p;uBi!_uWq^LuxVNo^NL%o3OWUV_=VC&*@+6oARkykAP{c7|CY4^z;wBqj|
zt?ne$=+*#<0joK4oM@4t#*!k;8pYo~`!G-XAQpWsMM<;fvrSIq_bDjfisnR`_vTN$
z>%vju)tsZUPKA5gXzi>xc@PZ|Rg$eL!u;`W^zC^N2t}lvyW3CKy&~q@lO|<bloy}`
zy^DyZjP$30APAZ6`1oJ#9p6fRGMxS9W^zp#=1=4A{6E5G%YDiKQzK=9{K+RKKEuPY
z=Qu5oPukd>R{)8bfmcnPw`Ygx^3*K?DLeQt22M0NNj6XJg{gu$Q;*ied`M|$A86l&
zqf`TwPLDmkoUV2|;-3GX-?GJ9JrM&afqK4@2XH%z_+1rThZO3*E#uomxHa#gL8L`d
zd8fB6^Z0yD3?$PI%yb2m>1;Tdq{n{x{IpbgH16~B{j>ku?`KeHrdO((Ea%C#%;lpv
z@#fP#p!zws<qu{=-^Tm>Sx+zaKHjF=+*lNq_B8$Ov_)^PyJ%0LWnTFRG{*ZqHMzG<
zVby_@WD%O-J`6+uF*-ay+<aF0w%>1XxyBcBDG&r;x|#K9+E(&g->#A}=Xm#T{ozgp
zbZcMqsB^LB+JH~tQ1#H0aN-E4?ZT7zBS6U+aGIFQ>gjVK5*!A0u&rV!c~Qc~8R6pM
zFNH|=nKkS6pQ8)fYq$NAf81aYxJwK*7AcEhkM)+MKwS{#so14d;nN@7nB-o)J$-Ot
zC#xrUwJ0G(`)^uy2Tt*>>yPZ|BR@Pi9l-qNFb|x)ST?|v(QM?StVh$~M9AhaBRGIC
zjTw{`Pr4T4;SzI##bdwec(!p11geeh^K;0XV6JOuudb9+!JwKNy4+9w5{90<bDH|M
z>G?Qm+MJ_0VZ~fhb8)2!P^CjFL=8|<-<nHhhQEs2M}dX}4+HFu+8%{m-^Ogm9g~~e
zpGq(@lCtnEj)uo<f5S&v!S1<Ls$@&gqx`-N(Q_H-SL4t+KK(gsf4dw#?+B=`ORv~1
zN_&(9)QdpDVfWLB{=<F@lj&JE`MhbZ9Uq4b1``9SjlM-1)w3;NGOypZO$Ask$U+?l
zW8c#r?VtM4`rc<Vc=93nbhZU}WGZ81T4B}3wH~7L4$1i|YE+*Ce>e+YuiBFb(vlcV
zh!(14X65g>VE>M6<R!4j9x8mc8nAW!8x8?#+ma|t=PbLXI>(*A_o#6GHJkMqWGqhq
z$@Tc^3ep&B_L-E_G%U^<w=Cz{aTc_+qySt1dS2#<&pcNvd~;O-9S45X3d3Q7L3h1Q
zF;PI2Ku0jY157ObIK5jpxp(hO-88uFu9SIqudTZI|DC7Td}gpbDZE5d(EavDN@xyL
zWbQvieCO6YRDQ;%1Am>`y8jAN+V<a3H;@{iDS$peN-N*~he*dz-QOTB69m{Wpy78Y
zgz5DvZe`K0+p{>I#<7YZe-qu%93)Ac<z$*&)oU3MSRiu}-kCba?`%17Q3&`tRgaW?
zLOgo&H|#Jyn-MQQ*I&yvLX698%1Kr<>6?a$mqUPP)kGWl(2|>nC~=TN*^UuC)?Jp!
z6t9;PX9S@t4|Nt6aeg;wXpf4-V>)xa{Pai!ZD~%_ptLVn_FOAX;_D?*tegV;id`HT
zrmUUZ%Het@x}%}KE<C^Nl?|*lWt?>2eNxvG)PupH>s?!<zw3`smns@snO>bP@VW1S
z{N*5_;6g~=teTB#C-aU+^3j;v@Mp@UY-Kvoy7!0|dY=8A*^59^Cyt>?zw%HDF50sc
zkN|t0;uV|7HySSMIl}neBBX6)cyQt<ze$%XNBP^l_x>c^@8e|O#~Zlk-|6vzi_YY|
z*(ozXXLpzZB3%)+MQFcOcwtZRBd<u6UNuE~`YqadtNApqgJ_{0aA%H*_un*U*J?wi
za@NaPN`MABkUF3~i;w>3v+v<S=V|8Y$ca|b=@ETyKRBtpb7Gvd=sM!JQb#H-Wt;`J
z^kCBm8um!Z1H}?4+#r=bM2bmvL{9%P(qTspt8!|HB2l!GE6F2fm0tA7Lrhb~d3Nk^
zR}Fse<^l<2VGJV91|vvIog2!rG&BW@ETsKUfgY`W71zQ>YKR?txoQnTA}6%GGpIlN
zP=A`i{*e<gsQH76V%eP{6Nw)`6^du_<=pS@MfkBN&KOebgF+u$|D>`93h?mixNFm|
z8R*ZF-1SrXh}+3nh@4)}M=&#Y+AURxRBe5AQoJpy@56guVeRR8bod;`B)Tg`dKEOP
znFt<Rd)^I0%Ajb9DuD!%BDx>#-ki$;8F}q=TSvUK3L}F|C&6+?5P#nbnrJS<gsA2G
znYf>)&p;H)c`j1X1`EY{lt?tnBuQApnIXDXb#syY6eHW8!ELfP@zGJv^u9@%>bi9k
z(aUw&BbPxq@?k(2#K|R;{>WQJ3>K6V52ObisNVVKkxodWdIf{7H)+o&2@<US$lLfd
zT7mVze-4om#Gw1|veVxYgJyQ+U7N)Ih5~d2RL7g3$6X2A2MDV+f=CQ0_fIyqZI*%l
zG*3^?+WU=oRazJ6l@!J^pgKpz(|wkRVkfKtc`}G1gl6YHiStEA7k&}tFB&$dDI}t?
z=-{`jn+2gK?-ET;?q0{6twTSI)1-#B;M8dZrNhNk0C`Z+86yplkEDs5{1M*2OtpTw
zmpD#M6q-=2ufR!$J2+AMJGA;K2kyy2!iPVKYMCc^Xn#Dw`L*B-A!+83=+%Ly#tpSW
zpEi{4Q9wm9J%O8EVfD>1(h%yEExHMwNm^Y`cEBcecGTMDKEEm^#rQ2I#3jfS6|Xw#
z<aUxMI}-0-0qAZ}8Y*Su6z0b^(Q{RA8JCz^U6bZclt!}9QC`A`XHoBx(WRMCmHl^Y
zQBo_%QIdwaCY{8hAX#Zm70#_0r+Dc<`R!@OVWa0zUUw>%NMbOM-m=s_vaY*LqLQG~
zSHzh<G%Q4Fbls28!n~_(R8k|`mB|U(J(zkX2Dk{ba^b66vzbcYK291H?L;-s6ZW4S
zoK=ruxwsfRN_KOU>MZJm<AM4g{z;MYKk?T;@#{1E<Ug0+8FDqO)t>B*H-Tw^-fW32
z`52@R@ci<`U@mOUl0CELH?i0Kwxc(~49cN)7|XGJ6;uJX`M>>bqy%*AL)#86CX5u_
zanon*Z)v8ygOtJ%I8RHA$=0wAElyt1wa}H5q{V%{?D<imDt_qDd!Og<?|FDvF6M3J
z+E#;dpb&4P1hfX|$Ee&moKlzUcNLWqCQP@WNTzt*$#gskkq|!Ynlj3@wZsH|-_e*+
zPPg_A&E)>F@BVf&m*&(pB{D<Y<5H2z6ngSESmWCFAywYR`n_QN`U&w!xQ?(nY0^P|
z`0vBi-+gfMCZ-Q@I^HDQ?syaR=LzsvXd20~dqA~up+-_+)aL!~=ev`!JTkH($++}t
z?Q?%!(nVQuvaktx^jpwHDXcuEM#Kv}b_N#Ot$W(imQ?g<ZA63FVe0b>6xD~H63*}D
z>CZGMWO3dhb#FfLFj`09kvk%jO`<%@2A|H2Xdvy7*#R)$p?wmMwBh-?XwRLCKBjk+
zcX|Um_fRp%qij2$clLweVUtO6Pl(FNzF8su>_Pkq)K8YwWDG9*=V?@vHU!eBH#5GD
z_Vqx6{e<srPyWvCAvHnRTs8$`64G&;&IU%dUbOrg)>D#<>r)t9q3oI7wz+4*^LG*D
zSAHu^C_2ouj6Fu2$NS~hoVhs*GebR^p7_$INDb<Ex(ey0Si5|j{|kGdzZ!~y9AD8p
z111Cm;~O`rU3+u13T&UdKK|IG(w}=1P1`;%e|o(j9-1^d8Wyu19_{%hYLkmep_bD(
zWt*P7iG+D4pE2dfzD9f<d9rMwg%XqzR-8#KM|Yu{qzE$<93!vJ5H=}&v;#GSzg3@X
zX449SG6(L=4o$ft%jRw3MNMePE@AvGO$m;B-L5rthcI^O?@kAv{GKJsDNv~0`aSxe
z109g(U(c7~lpuw9dT{NbMM)>#oCEzn1R9;{Fm$)4ZWzv$B=<N8NfHB;ZyhOgCbGP_
zNp*M*sW9o{j-KI?Kvje|N`+sq&z~R9dFWK8FWJgUEf*7~F@SUv>GGCg{x&w&!;QU9
zp9n}lM<M-B>~l2_*;DLUV<&4lQj7*$*A#K0t6++S(c|jqBXeyeh67rBzx>cTUQbg}
z%mMdbXR3fEMbQMTAx>hSjy%vqMsEzHatt!kmWO%PWo0KkESG`S^gldQ&&gG;mtD%o
zt)VhW&o)Jv{t}hL>+~Aq`<cNcYbCopd9|?9K}{61LVTvlBh~%tp;|I&4N_WWym^^8
za20mLOZZuNclB~PW!ljcTTRNx1jBf+Zpt{nn?cdrB7$anK=XQe>^2NIfCtD&F*dLl
ztR|J~ghMEm&=gJy=ofYG`Ts(+xu7;euQmgWYy8dhfiaC?7~FL0$MK%c4yc-<iPqQ!
z1_2-D+xW?HNg>~%yyKb*$l1w?jD_oSb2oL@5~%e?O|+Tq9#H5s2MO^CpwGjz8bYte
zXo>Dw=bYJBBmQ};S;zpiPjXV%=g!JI+KNqZ67e53Y}7HW5E(t@oP>{54C~5Hk$Tq`
zAMcAu_xva;M}L~pTu=6L1@U$1nS2w}#OL{z;-PA=($D@EvR!OSJ)jV!qlXR5|Cec@
zs2t(1RGcb^%CfZ7<<BFelyQjjYdQQocRne$s_~gz?e?iL&7txXYLP_3^BoR8;{<_V
zI1f%Ob>kNu)K+FlGt`liI@4>bnHJ1d&gW%MU_!L@EwB-nVfGqj6C$ir2cgtu$G@W?
zyaHvq%+M1K0@5XzR5Zb%`5<T1LuxD=#gq7h|Aev*@?t{rrB7a%p>QvvP-6jU=I!Ih
zMQ2KZ!1b;oB&5CHpRL6I`QkxcoA@Pb0}pixqem%{_j)@7zx0I0_hGQ_!;qIE3b9E<
ziC%03QoWPRlqpCZi{oXGxz7wtK$b|h5UJSB15PD<DwFP6vXKNPlxOIjiGUPf-tYGp
z|LgyHr_bv>eqLYiuQ&e{-?O|amE!K)(Z!jn+^)ySWPu(=FpB2%H@vo^kHrG(wB0}O
zj}Vt=0*dOmH?CBCe<p68Mnlm^_Ei8%Zl2~Fd^GxjD*2rsr;o|WyVwu!vS-GeqHLbH
z>@aZ&T9A$(r!Fl)3hI0+S@rwtk?g6QknU()FFT;)*%PP=qmXC;r@xqzWgqu4=u1y{
zj8u!sbFMg<7li~S70HVzecbfn^P~4ky)S%!!=_^XNR|h6>!@$;lBPGur^9{nFX;95
z80dEd`rC|-wGDT3-5H;5{G8<HhCMka)7#_8-yx%39aYC2mETBjprbCD^cN4}XBCsZ
zU0T)Wu7cn9`_w>gs$)h?OJ?3PSXG5|q(L?e2`r!3t2K_|ZWKt4uRq{s^vTSk+@X><
zt7&nZet*tT{(d!YANT(4{dODr`{2Z<PTn5&5)qvWqs)@=JJkqqK5C*|==nBH>N_@&
z78R`dyEk>Y`8-0m5Z*<7vLJ12%UxWlXtRuon01Ock4M6NMnCziWE7k8RKo$5$w{tD
z&v6=>Y-(i{PQMj3<E32ARgA0pt%oXS%R4HvmKoF^$Jv#}Io2AW@;?4SGrJE$@1qQb
z)3G;UW-R;%T%8OK{wa(NIKNAsWZ|C3-|^__{y$jx)yVG;P*a8!)S*Am$Ito@XH4!c
z<8*O%uJ7XG^!h+5LalEwsD)&s?vH>X$gkX@#WO#tO9qYcGhUi8b4eu<cH)z#M$qWH
zt|_BDf=M-pPrChcdiT0H49KB4l4_)(%N3_&eMKD&uA`r<fbu7Mzn#;kzYBjZPL|HA
zIkgPGk}gnW_^sMvWkpBm4-~Lc+v9rA+pt3|=&8`_Jjy3&Z+VNm-tEZVt&;l;&jW*q
z4@a6@^&o->k$%CZU!*G+N!`cDMm$nUXPP=>{cr6-<3_2!#Bb6bQ<o-zsgp3zP+DI`
zx!#}KUQ(>O*kL0tUQ2=h!{>LH8SviTibJ5S338Gg=+`08RRBxBq8TQ9ZuAdc{f<3v
zgz5GwWb`y99CbZ*UC*v1rZ!_#<oYbzlNyWgZm;-Z+I%keZEPi(*;Q}ytDdB=iclLz
zPdKrc%!K~?$%Q+~?E<Ym_8s-Y_$@kEUNI2~$Aat0<JTZfj29~TagPqY|4H2+Ay7rr
zj*S|PC_}L-nMm?`rvqzp)#C4b%p_^T4HOyE$A0(QdCv}oc|IH^Qum%8r?jt3H=s~0
z<BS$H<e4=6O>_XyH*ZV*+}@}0sm*|%TQr^~5}eTrqTK5<gNs}x^VOtjCcF$Xx$4vQ
z+>iMp&=;jg*dRtHH7Vdp(Qcs5{;3VKLS?A&Nt`-!^%6H}<9|0k32*2pm#20$1fPgX
zpwTG<<<|;RBrr8yOZy;ynLXE&6lF)^LBEYRs^yL!<Lw{~IT&f$lWtm^sN`i5h_X_$
zR9f_}p9WpS9?LYz4(q<WGMnY(UpD5f(dP{XRc0c)Sj`!#Br2NI1zr0x&R|QsTtp!G
zh36Mh(??#W^bVe$8Z`#v0<p#lGri>nanziH1i{L7JPe#4q4&|rN&OlA98KCDH|2&~
z9*=d@jV)DsfU|WnP`o8Lzc<F*!MSmy=BSK!)a|+>Y`tEboJj|Y%t5g0`{!iEzaW9x
z48z~?%|6Ejxr-)EKBqle`x6)@Qt6<-U9nq}!|Wh2h~DAzjTvAjfG~84$FP=8k)z4r
z{(34ukF*%vMCj0R2S5?kqbaivXi&o#ZI}}=cT0^#<LD2Pcb>*%PN^^V<nC!Oua1c~
zIcpIg?216Wmw#7gY{#*mR4Zj*b9Aq(N8O(Ia;cH-_YRM=qW2b6td$@EO1)d6)m1?A
zr$o2Lq7E)~6EW5Gkx+A9D=7W6jy1hGoPV<%CLP}B6QEyvK)?K=_|J52h2N0xHxKN6
z8o*LCta@YnQ{X4ee)Prw3V;9q<k!CIEUP=H<cjZH=*cabbkjRLuy1kRK95Dpll2Ra
zyC6~4_&ytf@+`R%fgHPp^5<vn9H&8+J}ujh)TP;fUIx0SYt0s)_hxDvdUHxJk<~fk
zd>Doy6%Re`&8e%Aq?+c-v5yobA*s`9Zq7T$2+8wiKHrBK-4<wN<z5rvQ|FJ9<p)0@
zDe1b<omU2$U!kiZi5`E@(~v(dS87e5eaQMf+6{s$M!P=Qq!_M8*{KBcn13?xD}<je
zNA^3~)|LTXq^%7RYEU=HZvgdm3=}lU<ICt??$`sbuhZ+KqG;RbzdNw=B|v9sYZV<-
zu57*L$<P#ZgHveHY!8j9OrWHzH)>wZSql22?H)*%h};p6A}vi0B+H+?FNzpB7l9sx
zdc)#Lk>XwTQ9HVVb`K{s25C674ks}Y8X94Hk6EZ%J;5d<trTw1Br;+L2;MMrnq=W+
zpdBw@{8?s0DgFGI4^l8C(bJ62o3Xo!v8Ii_#Om>T7VAO8fl|9N&aY+|`~F4*FI^;}
z+&tZ0wPzs6^}!xp_ekv=OO^X1eD<4{W?L$d?rhJ2MOA()<V+P+?>3oG7c(gjN?kY0
zAC_?bvZA=$V%{@pEm;MgzkyN=U?5s|scy5tggUZ|KsP2$SH7qHdc8CX4Wxl42HHV$
zwrQu6OB7<F0*$yOm@`fjx^pI>yFM16dTP!evD6Pu1xsQ~3}Uhg(xjdvXTIqY(382Q
zd82N|SSuUe(A_E!^6_qQr_<YB6YWbG=AOGBlkYCq?5P!wPBf6#!^D9?5s6e0s(`+B
zD-T3R_zV_*Sg9c%C_QN!y+dSb(N4xKnp~jXS%s^<g!7c$kqEqpO~-K6_?5;U^`VnL
zRFXe~>OOI!K?A1%lahNm*;$E`8Xe^%E$CfsJNdt!-;Bsx-m>Y?onDe<q@jhT)PR`d
z2&qp+sl~Nu(Cr?bcW6)K8Wfmb_D-QWizXVtt^va4<z`BlH98$8gU>O7PSNLgHYRhH
zWqQc$dg3g2A4RUzRnw!P={cnJ!IO_ngyyey2@ga^Y)@TN2FmL9<DOyPeEfW!em35r
z7W8iZ($}m&Y7#f9dvf??nCDcf@Ay0C^sMK55-mvcFe<>0y-6HtAM|C)Kxc=)yf*j`
zx`(`jx=m0S(L>IUT`_t$H{~i7coSK%#3b%Xn{b5|VHQfcKHcBup_erJ5^0gu?Q=Q^
zv>63e)*{-kCiLQ6krb>fnV_2p8L8g@hSf?<Qp#5<yf7AV=0)A)M+d$04bZ~<v@6@N
z8Wd6u?QBpYEOF3}T_AddIYd10d^LYmuO`#~!%X*oIM;e`6hP@|`q($<+@2+4DPQep
z&4T!U5S*fYoKLk_POQ_-L<^_ZB&a{THXYYC;=96o$kh(gJ=wd&u4(>66|)TN_l{k$
z!ghMY4O+KRv%cItLXfQn5(6sui86sv>&&R%(WqT#|7Zg)_g94^a#H^s{5JEaavX0y
zlltd--T+f1Puwc>9pI$F+fJO~oEM*8pr58@=&nkziyF10zfuwP8%DO^Y+Bp_zb7uK
z6QCF>Ik=>jsn(H{oc58PaFoR()#KgXYCCAb4ph=nhV+SAGfNgdm>U@8v-M;!@Z%nM
zdJ6PUwk2-YWQ;Y+Hu3uu9c3k`-<Y{vsGK!17iiEx4YVo5$8h(X_>}l<=V_E0h<j?v
zg;Uj<b!XZ>$Jx5(3?}IyFP#ue7X+VokT-W9$DHXg!@1M>7W#6*<?eH5IXFfJljH0H
zOIdN{e;OuiKYz2Kp~$_;@l`*$?sDopH{(kQNfb>&I2@_bYlITVu=bFCXGn<(8?E=D
z|DITB@A&#I$I(cWcWh)2PhGp?N%vH`mLjBO)jcS7ksx=`q^L*OiFg%G-XF4)Us*KE
zRHY7(irMRzk@~fE^Tkv*dRfKx7+Reb8~~R+b8eGDwSl1S4CICeYWcYM)l6a&M%PWR
zQyi_Kd}s!x*nvus_OWJGg><i~cb}%tkT8;SISo{eCW?_no9d{^1i1&ZX_`c}O{_y6
zxloe8j&qG>{X4Aw>o~sN|4#RhyK{erzxxMDoeZ~aRb<yY-)-)Ely*xNjjMmGdbzVp
z%e9m6_yCg6&`c^@7VWz<i?X4MxCC?Aru@|NKDYsX;gR6P(zuvk{0{A}e3zzfzoqe(
zGS2TUb;0!fIjO1rMD53Pqv~#_Ri0g?V^G&XwgYn9&>=p;iP=n}$~gDZQXRQT<L|~l
z;xvZZqbH1n$$rKCFZ9Onx|vH7J!6Sur)bcfUZ^|$9kv2mV6VzJu-Qwz-?ySgq|Y~L
zg0wf}p{kH(oJI3bHg3|bFlR-ozg4+jE+=O;$KEt$oL`L^$B&3<mknlf><esVi(pq5
z;(V1n;yRG&t%qL4q8Uf|J@@FXC{c?rl426`4#wR3%lr6po0L;#x+$Y>FYaa5x|{{O
z=F}-VQn~ZuyeD?dJSls7vc#}~a#YEnKGH7{`EN~Hc2F}544PsTYG}MQ+qvv^3T!}|
zY-xiu!Q{8WYosNmV*iUbtjfl)vd-5|?c7dV3DI?za+8YKV^(9*=jh^xk=_`Wyy@7a
zWeL%ZB<l>5L|7L|R_L?7p&e=i^EZe21t*Mmv77grK16%#^`Ft|8(|Hlp1fS1NwV(P
zb4LS>0p%qVsLD}GOv#*dH<?x0czaRpWcamp{X{gLAx+{E^uj?jz6jLs(QfKA<^?F9
zPYOJds`Sx`j8yG(Irte?A;a=lVTE6}o$VOVv?P_2QCE%JBgKbKqXoMfZ;A3ylPC%W
zK8PHbfX)x(=zRsJ)-cO-nY;hnJ5~3DvwATn-I11K>(8suii)P-)hprrX}ZXrx+r2y
zOAp6G7ZfA-2H7u|3i(r6inQx&8`^g&ke+({afgq(jC5|_=|J(V(ZsdB?0BR}bmH2;
z{8<E=OJsZy-|wkAtzlB7Nf%_KXpQI+i&|bZbD&afn<C2J-~1_mKRvk`M}@~SL(DKJ
zM=|zC4>h?;!TuFq^)4c<0b21`Ld{10_~SV7M?KQ!<4fNMlkP<KDNMffu>wVWE73`X
z`uRDB#ZHB1M^kV;_#7z&2H%2oQ^}ye4Fb~Ol_nA2NzU*5GQInJoCJpPz*{9Gx2ed}
z62JCEoK>Qd*8;v>Rc3ZSajF%FGzU_k0#0;Ab@yeZA&?M$>Q8{yJd`Xvl`RU<K9lN`
zjha;UQS^@k5a%{MeJq`Gt4AV~poE6!GV7yMa|jj1J>vJTf*MrU!>u$XpJ6mUIm_?T
z8hJULtDo{6kl&_(O1S_O$A6fh;beyCP}}1XUKkY3&<g9XGz<PBbqKa|OXybwJJ{*T
zf~zn*IhMgzG;oF)nBM=t;JE~p%8qXVo!$L#R%N88cYmaQ&z~ZyDD+7|z9ZR2VH01K
zbZ}yv+_|O*^Qg$n{OWdWxGJ2^1tyvQXP3VvuC+R(XfltB9z9f9fT&!X$ko{yPM(`e
z7v-@kU`AkbL2|E(a*5U8f1716I~vOLP6+K1eC?AM#X6eDtwH)32&_g-zVr@q!V94l
ze&|P&C4FUa$xx8=L5dIh(Am-Vf=7NC)KHnIGwSKbZTrq@=885N-Rts4<`uPM(kM+-
z0JZ2fn(Gm}MCA|6z=Ww>Otu^9I$PI1GOws5KE?VMP-bsCv_X?_|9!gUa-Ak6uR7oe
z$z(;D&*uy=P#L9kuV_uojB9@>5985i*pzq+DfFt;i+3Hk(PV>)K1oe<5E4Jf_<P)I
zinYImWYun^MJvgc6mbLf!h2HD?SPE|ick$`Oe?^~QLVJQm_B9<9p;<UR54O!LRYNA
z<8$BfJ*ty9st?12kD7D}rRiHR>;rQpxu`7Pr3@*wTSG^51=87Vj>$KC>}Ra2T{mb)
zLA1Z0e0u77e(Dpl2y{gu(Jx>mPCKSbkp48T4?MZHPbFCdVpELgTJKD|E!H_RRUtAd
zV@+LD{3tO{2oS^SM_AT!&2zT=oG}6t=rrtJHYzBW%&4X)O|d9cP<`ScHz@~E@d?sj
zl2nhDn%gwVOo-$_HD#P!F9~hY<GN4%PsaRFT|V_%QRSnjJwpXahw(?jqbMo!C`!00
zq~$l0ha|c)0ptUqS_bXTI-v7lbLLBrfU1h=5*p8S#1z9@qDk)<iWKwxWFrY=2J&AS
zNd8v3ygdE`GkfD5q{?=>i&vX_Q;|OF?zQslC^cJ-H8-i*V$BWe?|eI;A8H)HzcH$D
zxG!>9M{8dZ=VxM*3?^gFhe+aEXyYoO{GFG6>f@C(?5+M~uF@SzX@-?Q>(X>~m@O(o
zo&Dzt3+w}R(_E~X`%*sqsC58(K!v~LtSkckY6fpkklZiCaP-B7AG+&{zl`T{am3Z4
zoaq2ci9rpwgc6(#RzRo`Q#$bi-5mc1-=Q@1ZY#`fNPhMcQU){}#irO(sA2KvyM2W4
z<l+g4XWK{6_l=%k_N;r*5N|PEpwYUY_<WVb`YN;>l`Z<q1`G~o3?3(2V;Xq}P%Y`^
z^wm(O_%J@N{471X54Gmx)Ab}fN0jQe<6)HI4|_rdlIjHJ?-pkArq7yBSog`ec`q&T
z1<fs-c#E)xs*z46FJ=L2)c6UPlxj*Q{i06Z@EA3_`!O`iMN{__S@qz9{!n(fT11(9
zr4$%CNSCDFf?i@OA5*tgpie7k<AYmaM>~gYTJBmMsTJ5jcyn&rX8HrdAkbS(Gqrl%
z`6@Sdf%8OmVA{1uNOhE$uh;~QRWz>^Ex6l2^T@1OE}nLP)Vk0(QG;}x#*;gB4{6o(
zUM8NyF4lDQZPco$Kq}u2RZONp5+;jv$qISoQ0YZKA3CPIBTzl}SvE;iTe`4neL((E
z9=I|w$olZP)m83AJfRUisKbfUN!vu9T&=={$=xE(r7k%{=2aiK3hGA+ffhJW@(#o@
zndP1&9oqmN>mo`Ag*w`*3s&khCe98h|0P&QXuaKTR@_>3zulA_<xSpRyoXhAvTxJd
zT%ekJ=H~2A$Mc_F1qb@W94b$!Z5yAZN=Efb6`I9xo%$^NKJ@Q9+u;#fwvCGWdj1on
zW={PpkZOO9yf>L%Za7%4uP;!@U8^g@rm?f4+8<|5l%ZZ(Ym*ERNc9>B;8jxQNWT}P
z-z!q`qxYqM|4C2d$A>>j_6bDMqi$`^(GypH)~eZ=Hk>=U4c<;$sHch?D~nhy@q*V_
zb!WYkXXy>2Oic#Z1^V*d^gahqBI{mrM<py#HWV<PGBi!#*as8JZE|_lw_HgNJckm|
zs_}ck?qQ8K#nh2(i&%etSw&^%wldS${oS~#DnU{#Kggtd<*$IAze%P@J4=!}HAs()
zYUx-FkG1%1goxSU=UH_iJ~B-i=XWzqa`@wfTN{!39P=eNwWmnO-6^~Jq5MXJ#$}ku
zsRyEsJQ>}-{K~&k^j__$ldMXFe#sI@Jf7X&-(Mh*+%0v<{=E+beb7w)9Qs^SrIIX`
z-gkT9e|5aWBO$*LL@6Vyfaca?ooF^Df)6*Na5){e_?!d<>N;-_)F}bQ^rkUdcOh{a
z@OOMnvqOA7^IxwPsldmnsN(Ol6df1@Q>fM^*FK&n_Mv`rs5AjTE_L&fXhr(z!yN6?
zoCjsEUwTK`n{8E{AARE5FWA&asK)I#pd26lN@^K9xWW2JHNBy<Np`9XeTYSv2Nx9e
z^pE7N#F!8BRI=UaAO_J=U-uzWur|=cc?oG2^AGuVze|S$Es)cjl=!5)2f)CYH8R<z
zJd+}}Lm7tnKr!Lt=L3^|`{VcS%*`q1Zg1U-voD?VA1m?o-QAqYd!^^D{+YKy9+p&6
z)#MG>P*Z_m0HxArR{afNLxb<o4uk6C;7|WBv~*t&l1kP-L<tifZMUrio%5P6g$g1X
zk*Y;roooe{Y+5^)RL-~TcW9uCVTz5OV5K#8@^0KycDQO3X;D-~`^4bT@7pKkY7RB}
z0apoi$oK5IAE@AH+|i?FP&p+Mv|vd9u#A*wF?<X_&wk^l|FO*CP(yo)a0O4P9nM%%
z222={1Q<Fy@>z#CF_r{=)&<UMY)BGBI$=KB&(GWEU4FjhVL+Ctn1i5NfV0b@1Yl2)
zYEVj=Q>5`^tv)NX(S|z#*H?!ki?!~qFV|Fv1W!|*YI__leqWg`&!~m2MhcLjv|8~@
zSyiXnNP+Qv%Ai8X>J4IGRZy*El8-e|E3~WnFg2lwos~bS8Ihm-Fd$t(`O|n)V#2(O
z!QbBLzn-I#=!S0nKB3i5HffSf6CGA1(B04L0{V4B(pjF8J<wC#H!H+dL&Miav<Dvb
zk(Wx^sQ!Y*c$d{l8&$$HMiEO(9CK$#zc*D%?1G-Y`d__L&<DHck#$kyV4mgg6w1Fn
z3IHO*YE|l~`iS-C#Htaid0F5Zi|Ee)bKmRf$;hYuI#tB#vde$zP*OeFRAwI4rvU9z
zQIVm||G=d95hM>Gr{?yIP)UzL3nOkmXt_@xfA*0^?=QvC(4?W}*<M;=7G2Z8guo5u
zA8Mj3?9{_Sk3fR1#!<RII)7MA-`0+ZaZ+VXcBuN?r9v!bi}N>GdvDafCpux!9|7Of
zB;76LNpY7$zquzF!K&3+PqIl`6T#mMMZm(@2hvaJ=KkZ6{&KOK4qx9s@1JkEtWELw
z$AJWYtT5W`GZqprWEU0hQ*nI=t!uDO2?bh>t16c(D6{VA?Z+qXK3++WRbtjYFpO40
zofxegeg00LlRpXXKEHNT&gD{3S=owiv7M^<PQ`ng4O!qh&KUZKm)|;@)`)Ip!j%lB
z;V|hfC5-o-qXY%+7zJW>KmXh=ePvpm2$u3qYBxt8@BbyBKI$KxQPM0DrOZhkY|?UT
zY9K>wyTJ<Y{S$-oJj&2)Pv4YL{x<E{{%vP6thfh88ADaO1GC~CB{JapjXEge^Jw{K
zr%m4c`oN1+M!GGj;PGrwqrdXUaj+v)gf74Ng+hhUc<c8uf71G4(>$u9yR?k+hzxHc
zs%D25V3mu&*3R~5D*A%9(e_)U@zy6cJXeEs_rZ6d{fdeOcEK=S#v0b&P&19Qphn|e
zN}LSQwIr(Sni{CDk@_0?^xf9B+nrh=8$=@>Aer(=rB6>4@Ai&JRkUgwl@=db{y9AH
zF~BHa_oLHdjT%I#kOcLPp9$^-pgOsoFo;xPQeu}gBlRcvul@kNy>$MYcJ9OfZB#yv
zQXWLwlTv5yloL!sSv^dl6_NhTJ!)s4>ugh$oobrrhlC8j@e|TU%>INYiW1JVA>^PS
zq92o3rPnFQ#aWM7@ma|XsSYeo{{hlqW`F#py;?+>mG{t`(%~sJiJ}tXM)cB=_FfU_
zK{0)p?nHlmH`7aZO>=Q!?h~N#@xfkFK(RcAYWrmzp@8zY>E8eJy*1`;@7CJZ|1x)+
zE_COEy74w<2JK~KTGD}5R_Xk8!lpHU7hZZx=FG3sc<C>{^H<?G<*yS}GyPb4y!dzr
zU=s(G-~{)%KTMZ#;v*(YN_=+23koLwt~dGP(DxQZuei+E&15(0Zksa5-=@D|g6r=3
z(fkeL-0!ntUF=A?Jl#R16Hwnue?KS>G-;aTP~(}GRZzJtH}x~<NIam-K%MhFl_Rb4
zcGbxPip>M)aEq3q8lXXyWLlbi^Uq_gzW%-khZlNilNt{w8_jx+LP-Dwg8}7Fg7Rxd
z`Gt3>NZ~Qv?rxOeI>wjj?$qdRsvBGTC(H8Z36HsMo~HSUCuJI@%&3lEqf!Sh`Ez>$
z=>zmAb;=yL@G*iWaGADAl`r9!ggzTp&WlL*uBUz)U#B<XA{jLV1s-{fa@6JlMsbi}
zCw(3qsw3j_N~n0>L&+q(nuPH#Hj6l29YahS=$TqM$C{L+OgL6$s9%lgq#1^t8PvlL
z`iJz&In<;kVWW6d5$RWRyGfz%LK4gDeV{rDQc{0$??ct?UiECptlp#@gqeKWClkl&
z(k`#iJ5{#o*FpJwj59RIhMpeFqcDYxPLIGUzTQ?Y=|{%2hm1Mj8P4C|22bX3Dup}M
zlPRw<(jDntz5)0ZAZ?uXsDP@*`^Vk*#LwPunD`OIJZC3yl=79ucY_N+(>!#}29Zus
zxa3|nGElWwAU$`f&!f10uP%f>XiP%4Z>tWL-vw^GI;5kXS`p^w+)|iwjL3PT##hxs
zMV-^snd0MB&AAbs<4F6E@c-@r{Yrp-H@DFlztT64?sXpfxw!xuB`SGTnwqcGy~)Xt
zNu$7v(Ih+2k>@)g>7DS0ACD+yvwk;$v3+4}XCF|e8hjogAEHUWPHNH&m4XKWUEA_w
z)yC1;BGT{Xy&qn+*3+{;N*MamjDsjVPF>fH3|l}4sTRKvgkyu#2~_V^b$FJjfO>eO
z{C0Q=JNNp4Kj~k9;6WP_v0wIT7a1otJ5_yCM2Ty4rk5O6fSZu@`fG290Zlhpu(vCo
z{B>>eF6}}?dIi=qU-hz&v|3OKaR9Y@>J+MM{D19|u9(P0P_`=cZT)>k@4J1>r3^?&
z``7jrkG=WpX;&`!((E(@1Uqu8rZ*W>ZdDuGyvtBG+cW(=IzKO2HSScTE4_se5D82l
zN6V$+YXV<N0x}G!(iy{}DF~fuDAgZ)-D!Q(&v)HDKWnA$`u2SLb9+<j`MLYsDK%5!
z&*LZ}A~i9ra@%xeDN&&#9qGT|r@zlr0McPDSoGjlmGj9OC>Fe){u4i@|DyDug-skP
z8%GEZ71KyXn4v(muW!0Mr|AcnDCqqRoCMUAvIjN#elb_ki%>B)cVFjaW9f&S*Func
zwTd3x6p}xmRQKrf;vO}5>C>T@R%g;1>MH)+zmNU&*TZ}_dYT=;4$#8GRD3^EctLUg
z2i+^aMU%h}{6jR|Q$o3E@^SndUMZ}-PB<6eqD2dWEWb;W77su*c6FvUw0Ims{mr4O
z=;!gvyuT0Lo4yR@-Awn5>Q&k`vD=D*m*^hksOX?gT~rcDSxaOoXBPb?n7gNbY{Qbj
z-JsN&3o};83C&zykYGmWC(}rZhp1A8V7k2cKA47T{bWNvJq8ic>~lHjBP}USJmWq`
zwU#A)9Vc;heVjoo<NRLQ5T~sG`##XRAtW?T3k$GB;mnsmVa5&1FRP)ZpXgfA=1n!#
z%i<CX!2{+cJJiv6aGD`)>G|w2q|CrnOU;ZbYNt`i?fVVtqp9>zrC4Ec<EpUk)3~|#
z>!I`{G@^7Em=Zna`ww?25fGudZx`rKW>leGX;glI{~TlwnU2?BTJIG%J2lrr==R&t
zP@f>FONqNbiTM)3VTw<P-KAjC9mN(g|3DX&+qv~CIug(^N8RV7e}w#mc`6Q-3cu9b
zT?3WJCtRZS&;e8pO#ADpr-A7Xyd(+tV2ZU<^PEQ+tIu>khj%`XR%n<ngnjdv2YP0u
zKU{tX^iCb`0I;S*zu|!$u=BYbp;DI*24+EyX&gUPn_!CMqt6hcsPaoQs4H=Q1<Wd;
zE)aTQ1i1%emNeX@`U|yN!ztB-3$*|<vFAr5%z2rHr93fyY-&EDQdvkt_O7$7!9#za
zY*T*ut2RzeR%1QT>iXxNQ7hQ{;ikaq=l5yH*&Im<Yw19i9wHiMiy+x81tLsrf*0Be
zL_!vO(X$prQ5mCHQo-?w$$-ZaW>LW>5L6V9t%f?DSoKlzIlS~ksR@z@Y+^P=s;RqA
zS0SZ)^ZZ0z?(&$)ef+JD7j(s&ks{K_?IWCQf`)7n=`V!4nPKN4Py7Zit4lOE4(?CZ
z;H_)==ZJCK`V0VJi*p>Z&jZ)CBpaM+1>z>?AuPZ`v9B}z*cnMfzHcAdjx9e?#i~yS
z5&|emCnHl`N|lPLPfI;Cb%s6ppjdMKky?X?5D{QYV>3P9vD{nrF;ywSB%RJ_7<O8y
zPM&?rXNz8beFV@VOdUYv#cax1J^gPK4R@Ww#PH`_NmEnnXtN3iP<*`K#Pny%L)D^Q
zUzG6Z(23(iRm6C!NjU|j>*u2HxjDg|-N<{Gjy&m0dt|^T6BcBSRq_XFVxTgrDFclr
zHJlv1gtTh3D{v)=7UcLm_?Nso6&NUOQ$+c*-c=`aXb;Z!L%9i(w;)WFtQ|SQ8tzqp
zS=C@|r;t2Z9f${R)b=wz%h8OwS8vK>YQlYGD~<IQ)hMA8=Qy(}kN#%D%rejrsGRim
z<DWiaRe{H8g^_uY#VQ=BKl+BIrD~C~&lm%NA^gVSnevh69ugUXE{fqK?w#a3S(50S
zyQYltU;t_KtBw*-Bzlt%FngG&i69IOOY)PSKcQ29e;I1iTf0t6EX;1h2dXN=NJYTQ
z^+`BW940d;<B)%IukVtjN8c<_`aERPVbC)Zoj+*$a@i-Q{ZJV|%MV_j>@HHLXA#5)
zk^U!6D&~AXN(Ga%&ge%wmE5Bso?2g|4?+H*j{4y@qJ3Tt&@{eV=d&p@y^oNXlQM6h
zD!=m)?I2!xG1Y4aWqH7tA@xy|-y2m(^N+^DzbOh?JbGoXbI(%q3^C*P-nNnr%Op6R
z?&o)&_#AzLb9^6_zwRZuC6G)_Gg&SbCU-bJGoBQ?SDSLEqIz$yEJ_m`XA+p0sC-&9
z=T|enz-~Tr62`cF_Mz*C9x6B(I#;2LW_XKF$5}j`(Xkz^i1fEH&tb?^?_W`J3$;={
zUf?+SgdfFzmqbyDx6bu8OrDpa`lJtRD3d>@K{&YL;8!VLQ#*nTGQHABz5#d~Wl5~j
zHf5k+%@Q`b!%X@z(ojQ(7j=VEph@JJv@&@REilVlLi&Z~StOz3Qj?62D3RTL2Wnlp
zU&IL{nnKixL*;LMvNa(uEh0TB_Qh$`jjC+jXh*Ji&bBJ&#|!~K29E>KpF^Pj+SlV7
zEc!(7?9^Fyx^}Iad8!@q;h7#$cu(c`tg|UnUP%dDZe1|y*Re%^e}9^%QQoJUKj$;s
zR2uMqcsI7+k9OANDdPHT>vKLIrl@1AE>?X?tN$B^kYtLlke-wa6Epu1f@szE1L;bv
zcWPkvi*+9fMpAT6m5}~~d#nk(2;tQ%zE8r@g3I~k&(u%y{%(ht{<8Q)tosn1Z#!r%
z+T8X0D$qz%gl#GSH_E7&2$bwPBo-bY?MxIT=g|$;Zw4)reV|yshmArBHKuLwVm7X`
z(xD7s&UQitQR>|8>#r>5Z-kD!*Iwr#i=gMU(jI7h;C$$|_P1<}zOW@JWK4NEp91#0
zg*t&sSrujzYM_Rf9lbvCjDe6~?8DFF16tVei4xtu4tmHUIa>)*(KSJXpp5j~p6#?D
zpRFCA@;ems(!zfcNe^|$xiX#8(Px!?zysMzxlBRr-snp9@wLT_FW;#=QzfYk1Omr6
ze-~d>pmf*(ts%_E_#B8*1U05=ojg$XZgXe3rtDRJnrWPS=PLyhDn?V{p@PsTblj3=
z{|srY`qm&F-8zagnpJve<@U%ay($^q(4J(xp8_4BIYMe=$XTk8!kv;bhDdgSIqLPV
zdZIL_qzlNRI*fVq>`EuGm576BMWAs0%TB$Ar!;lG`ZZJ2M3ZaiMNPrH$p}q`)rHev
zgB6Tk1Es0&$zDBX_N<`w=p$Q(!j5&kl&<s0intw$FhS>ke3bTAzm75iJOt33Y9W-A
zWPu4()6Q?{WvDA9;(esu3hrS8%_K2<g-cieLXi(CT#->b;9g~Sl+>X&r3A8ErP}t<
z)Vxkp51qeDHJ*Gr-bAHYEamS~O_~E`n48Aw-A~@|RjjAMpka2>=hP}mR5c#amn~{T
zq7s1q?gRZLpv|P!)VrI%e5WV><Fk2w*VbS5V97G7o4R}!S*GlWVuvI*6#O;R5v&lT
zEzh?oFH{&2BI!DlvaEkxn?H_EQ<dZd(IiW#%R+0?rpHM)MkjY^Ao7n;zYn1PG;dlc
zJ&w*$NbStrnC5@)#^>88&3RX<*rgo{WL`csk^Rv7Tb;$H2~n<$l)goq@+##lJT%Rc
zgYkxXI-r~{y`aMFtDxGPbf)O!!AFJluR%%%Jol(qW4?L3UuqAwMWcUa)ugCG=U+Bu
zs5|lLnEW(sN$0Duf*#-BsGuMAX>$@vbEu9cCtFtq^{^@B(5#h;Lxz;c44!^uQmRUr
zd}XhRd$NplCqv=MwCxbGJkpmyX<zT1l}}Sd(wUr4xwb7?HKu)MK)=5lG4@md><ONj
z`;$kzp0fwj{@JWLBlMra#>Bs4+4MP9k{uGW0$JrAsCBI!xmJa!I_XVJ_(!D<b6!<g
z<2WL5cn>C!&JRQQHY#KC2kMHJ-+3k#kEz1?pehlb=@LrMwv%Dhw~W<CRYQNok9%O(
znmR=;;LV}(p(N;Jzkz+S2=wQxd3!^yG7>q~!7g}&gRT$H)Y=iz&oR{2#h9f6Dhz6u
zb6*3sA_&BL1!`#ylb%T<wc&a$64Y)TNqU~Z^nc15Kc>45SY_~7n&97FKE{_e+pMWd
z>fX>t)JXI0p9Fmbry%SSVn3%Nop-^YKNv|Jrc$zUx$B26EQHpW^FtDtJpOa2{#1#%
zxL(E@VCtB!_G>LwA7L$D15wXO)5OQ1lQLz(?biklozR|kiW5xv@BJvI!TRMkA~0nc
zQltGiLRu^6)l*_6J5qS$W=+9%I?9e$fSo?3v9q|g#?hK-o}U}7JOEp+{)WacIiR5q
z53PMK4(-~-0zeH^%q9&JTkrqS127gH$hsnMHOZ)V!kQ@k-X{{3BG5b#O!+buy}N&0
z-K*hs72Rn*v_^!T94pR5=#6S48C?<SccVWr&TFBFIjT7BlToQWSgjD$tfHXiu6Y^2
zCZ%evBH`01OnRv}9-(jt;sZ6M%D_MjE*+n>f`OQu)#ZCN(h+*QqGYuX6vGsM|C*QS
zhFMRPt61abosO5?6awZ{FzS1BdllfBn7~4VRzTQ~o7Lr;72Qe&o6_6|&r<rZ`ewR&
zpyKx8*HGLKdQ^)=AZ1C?LmA##<Jx@r!J$!vxy`P28W*k}|M{}&u=2#6n1p~Xy#jQU
z^r*o4?50*cC)kMRlcs>(o>#4^Sm{u!MC(5<|2jtX;BitLNZS-q&IAfaQ<Mi*Emwg5
z9hA}qT5O2jn=gM-1jIOv43xPwNH?F+g?M1ZaPon+Pbp*VN!VFym#HOWP))ZusuEQ0
znn0NKPCpEvll#Q-`&c`tB&<|Qhg_B+b~2eZS0v12t!$(JFG5AI;`ie-wSB$QnY#D+
zv3jVf5_8lvOpz4=U6KHFZP=@2p#G-1z58{RlhjLSf}Km@fZRh`!=ON+DxOqurSdSv
z@%X<LQPHmq^Y^Er2MHfg3~$k?PH&snz{@>=@%;y}pA|l@cKr3%C3QgKh&Qzc>NFcL
zM%VZmxjSszPNPp*GWCH%<9D>F_x+BTDU$azXGwK7npF04kV<;9C|Mt%^f{H&H|;M|
zGyLNp)9Wx|SV6Db)J$f~E7mhFq1t^>O{bKY1bKpX?N))bchGZQZTh{S{Y}wAJE3{D
zw`bRy=eN<_Uj3oR5t~}-UWv80&=I~*{4lG0?KN4>)obeiw@^*x2TZhWUbmn^TVW?4
z!_wL){ptqN&7C#=mJj{@dXa@51JPwm(+Z@Uvg})t@cu}n!mLsW=y4~&CpCc!h1$@=
zgY_u1DM78v%We<}mrkqN*uO}D;2F}NM&0k@)024eIiZ@~KRBz>Wa;Y7#;SBS1b))B
z;6nm|I!oVkqpBpp>KhsaESYqltOB!is1P}23<OCoJzFoN8qy68&n$hHY}B-BrNgvp
z8RvInBZIa^czuTrDY)WK*Oy7Z!ccXOTMKF=$f(v(AjK+)0m4>6J=m!6)9+jb<?t%F
ztsw49t#<q_h*0M)J0dA<%TNPp$rn{whQS}9sa0>cX$(Vaw@J6pH^9{|c6fI8m+V%M
zyKZamA%n|2v*VSNg?bTGG)mTmjH-OU(P#y9KkFKq>&O6dNqV+%BOgS($CM4epa>PT
z+IKg+-KU{@BXK7P(ch_)aU7P4ab6oNw_VQjOQ5OudlQ^>eD8Ki6$1mI=n<~~n`hbR
zUHcs7ZkrBzx{r*pP!FSRq4omu5^mU}iXr8!`ddQ!)962cPzlt2v%W;Nk8Cp>^o1VN
zl?N&)f4rbcEui?!4q1YfvKSE67frf5WNRlAa&B#eG5Kgv;e}|?(2}65^Ou6_MPgFP
z_o|FSx@DmLI_RIgTlN1_lkKJ#`}P*3{~TQHc#q1p8P@8VPz8B1w1^7Uax;VN*N_XB
zSq0cezfmwX)RRIK3vTl(F`frNu`QD`#qU_vitvzCVbl$*kyQsB7%cN4>#7z;uvg%L
z0Lh_AfTkChqiRbT=l1YVOP8G;)n2i!TKYqvnE^_&os^27sRXKb)2P4HKsCX7@BS3&
zpa3#ZZ+q|V@9*!%pR2rpRZh2dRO$JtO0O#&qzOwv5xDyG3>sQP?c7ZbxgP1Mp!LT4
zfiP(mfeM-IQyJ(APa*z-zV@Wn8a^A}U%^{53+PcO2+Cp}wumHZb)pGVS0$i3b)lus
z9$H$=CTpn_dsHL?&<h0`Q0Ao)q@gk(RTWe)Q@w`rYCQRySK>#8>}M6tp2ozKj|Dnw
zlU<;bUC;;mx)R)<iiYbv>n_jQhte?gbDcM?*4(frIaQIgvJ8`ELnx^h$4D;)flIj+
z)J34#=y@(E_^dUZe#Eb3n4vcmYd%anT^Hj%(#n`B6{cjqN+iAttyg#|s6gBF;={fL
zZL&J6R-CLI#Ycu!CY?8?k7+>3tUo{VBZdy&z^JWBzaqetJDoShLRNgl@OP>OWx3^r
zi-ZtFb%?3ErVR7+L}61P$a<vbhO{Z6|0TU|mGTHFMgQ%8gcPo;C!{K8TBtfxiQ4OW
zrab0V>^1CC*Ed_3KN(CqQ}^gl&Gbq?-9Jq1YoGg@_p5AKg(c}`r3ofGxWTq0Rg^$w
zn^c?7!cev-dKvq10CCkY4zVHZyw+Y>wW7rjjZ9t7HB?iqO8D?k)<NC6CHmY+QE#wY
zn^q8O&#x^KJd4gpDty*Tq(mDo2tXBJQZw^enBqdv`ann>NWm~~kJn*dUrT^0VW}(N
z63{V5)BcCyQ_?t?o=B|rYD^Vu3WaxiX|<dIUdY7rYZua~GSJ^fd#`5$b4@>>IgIat
z-MN5rfsJk{Dp?Y%dY4>M3DrhRla@iFu7LEXfrgf%s)YGujQ}iXxjbn%PY0vE*7{h5
z&x8rpGJ!&>R1sHw5hzWEYr6H3NH3ji@6oCj9YWVBR;j$Mh`G?TJ_k#Vl;TeT3Ii87
z{S{CT&y%5D3#<=iDXBauTYr^Y{I-kDmPMq$4W_}}+RYqyYBI(2KnG(_WouMfy=85N
zRcxvhl6F7=s=uiWX??siZ0WdFds(q+N2QSM)dZ>SgSM8(_WfHxdjIokv<*EL2SN3|
z1BlOU=7bJbLj1K;K2~9U0Yb@*S8bcJP50vz^)b^e`2gT0e+sJP0|kW%-UQJlo}8)#
zX$OvFQAQfy0_NM41b_WzeKtam5QdnZZ{sb0Y)#{oy}nS&RH60mD5%2o8e9J)Q>6*U
z7MzeJn|?RrYe-MVTg2O!*1txrNQ1VJgmqdFSRNv!n2I<dE0EHNjXRsSeT-(yBJTJ9
zUA*iY=~i@HdKWd7N+3_g>c%NDu3v9njI^~Nu5%<-(Ixw|-|W*Wu)w-@ss`r5$Ou3b
z4AblLWwko;{sIhZuCv5g$<{te`-u#$LrZwcroYW+`+2)bImc4?yk{5I18fkvNGU^_
zL@7Qj2xY$#PAdBkvAaSCpFeDxt}chuJA%*bmg<u>02^n5%TPlq%}2qCC#tF=2xHc=
zvqWAdEw^f)-2GHeflf^k=y%f(G+SrPAtzui_+57WdxG!WtZASU$AW9X-p6~3NAz*(
z&8$-rJ_l+gbq|#rEo}~#i)qI_01YUH^>+{Jck^aU>$7kOfZdcB2QfE*_%9#fhto;M
z-S*X}1p;?v3}nl=7wAP^`Uy8c$*zzr9o8xKs{L9u8<b1bbGz~ZVTx9RNPPh*n%KG_
zyjD|bdI|pY3EH|Ez(#0+S(e|e7{_@dp@lNq##w0IyHKwPEf-@L6dmJ#!9$kah^uka
z@=`Ul)~EtlHmpytXgGkz?&`KdZ6c6p>@u?&Cb8QF(gn<ilz<{+c?S!mQ=xrzWHDj<
zWZ9LdWi(5*KpEDg65!%{RT=6xQkK8NWK2S0_8(X^_5{HTc(^16mD~l%4IAdf!<6Vb
zP8jt?jR{E00dMuSYBlS?yoVkeOJ|D*SUO3sK@Hmmk97|9XQ430=RQquGR%kxv@wMQ
z_P&2_W%MxHSvH@K4u%G>880xV1wj=M!&Y<-lW6H4%-<E}-XIIvb5In+w9{zYdwvm!
zE@29gJ4^u-45H(jo>#k$jj{=yrmQLyf=VV0BkgW)s2P7Ym#-dUwChN<wsUA%kf~MK
z@~hATQe6DB2Y?+Y7y4E{a2p&nFV4%~9Yk0H{Gb+ZYe{WA;oS%Uw%<Wzpx=#=(*23I
zQ^=?{6PTyedwV=afzlGjL?qB7AQUNBtJ0iR^i^P|o4Gd3m^}mRfekl6l-Trk)h=-W
zNrpv40lFyy{a(c(#?ZO$uL0?eEZi8bgAad+?T6SiRDIm90!~w?7ABEkf|N2Bnq{bf
zI{2eLP$4LocK*gJy<fCGrLu|&i(LzLpg}8_fYn<cfVSWg*5BrS_d&b$rqnh>D${01
zT;8vt!~)tq>6gK~*Q8}B!)j-NkTO;(|0GQy%Ww8yf-5+}X67}TxpM=`Dtx;pZ2-i-
z)1N~9icn9_)E?B@d6raVm08tzW3Ex4WaBKq*t7}K@71V3&4+z{vmg4&*~w{x)8&i{
z{lQUjHG?{<tj5+=)$hy+?1&gLtX#toiiT9WF0MZ^LMhQ(H@r|xE!#PGu}5aj>0<Vd
z)+`ANO>pNg1i_1}2?vLgcnpS&$@%h!9BbK>i}R1^;4uA}#x4&7>R1A4{2rPs+WRe&
zG_IK>xY9j|*Orm~pF$G2r?|ClfP^jt@>_2~aqE2bXQ&|ue6NFA<6Fa%5Y)isU@_R3
zJYFUY97=Z$+L?+*-I_Jb-A&S=%L)Xwqq(16S)Dkdv{ZDvTJNv4>JFjGo@ltrP!A>r
zU-?^mceyM?mw+VlP2oKOpA-@)QwZ^Jfzq(F{CK<?ZOEy|^k^M#fZ97i(-cFydAv3%
zKUOr-UNC8%2`rYh;Wyq5*x`ZUkW>?zSyVGg=&p^!cQeQ@ybJGv{Dt7&q>TKpztg=R
z%?lbsZj+2EyMg9*$Q43nN$8ldbh#vS<Ou{%yubY0!Em7f5OKKWssep1m92J{{U`nh
zP!YBPImB#6YEyh|Co#Sl|7Cii>6WZ`IX?%_H<jACsb*;?XcuccD5n#jYLkpbkal@t
z;Um`dlh*;<UWZNj@nTQMl*NDvO9U0IV(BtAuYPY1Qz&i!HfH?u=g(x{6fuUIX=hM;
z->&iLQs$F_ai{h)YK4+}%d<es_?J59h7zAi!JvVJZ}*d!Ha}7|qu05J88>RjRLsC1
zOgGgJf&C6P4eDDt*yi^9{4CyXO0*u<-?ulYTD*l`tHDgV-x{?$H!9BpR-JW@wNDCx
z{VZj*>qr-)!UIe`O$#}ztrHBW&R9~_)zw|4SOckzZAu$}us%83rQ=%tGs(9HCWWab
zNYIoxTCKabJA&F0W1PPLYM}o4{QNLC8{gH?aHBuZAKS{Wf=e+NgI1t7O!Mo6DTIO*
zNN*N*z<vz442`Rg?PF`r7NB6oN35;C0(@Mh<Ox<pD?V{Wn_Us>xt!IGzjDcarSkh~
zA&col2Mbs^9nS)6(!hoZ!6;_C*#KW0>24k<7kbXUwy->0$%}z|vg>TG+QbF8Fb-DQ
zsIcj&j@_P3>!_l0iSQ$+bX}orSp^kQz^hM&50ye6d=D~IViTCOXn9X)f>rrcyVj~<
znZrZSIDZeXLBrc<oObz~O@;Cv#ufx*f<bR3c(X+$w23z>5!$$>bes;4fB3Aa_4T+q
zc3Sxez#`#MykL<q#dK}#3M`uRAx`_MMqB7bc^v4oLn8w-tMhJDCr-5u5|KI3pC1nN
zyIDfL<%<us&=`c6;Mu3J?^y;oi|@`)0o{(H3hm1vHpYt}FmqBPkx;OJM+klPC7b3E
z>)O0Do|v^9vF?c4vVg^LbrWdcga_=xUPVOvi(-&@eYpuc=?=Vc?}oA}J#~|h<e?tl
zPdC&HZ@J%U(<FQ6Wng8FA9^&1x5*%5LL8&4CS}vIeZ7I%GwWQv7`huG?RcSO6`}Im
z@Qw^Ox36M4+)%oX!T|}?p9JbpfO=Q0F;*#OjBAzqdw;P;S*IL+{jtuejO=(8;*Ob?
z6x)qE&j(191Fd(;e3mWYxKTO`T~_Ra|8_aPC2R>|9M-!n|NqICzt2Mf(Laoo%Q9VU
zXN$st3LO^?eRq8_806Bn02};k#mQ=3{Taem9o@5Ax<pjs3m)ob5`!bPSly@RDG38X
z6cLqzOE~ve+n@O;%Qrr(7z8FRL3K-!PFzIRp!tFgM0{1wYJ;=Q{~*_4s-_M`Ul0hn
zP`z?|IxyilYMqO1*JZek#V3#)`mdzFnUCIGbyc%2ALutQ%hGAhL#za=(t=tqYoP8N
zsxJL~rfXiubxlIN)O_bRtWGZ2v?S!6gQc;nt6N}8%K3g>mTyPcoUMNnViR2q37eOp
zj$=QJBhdp?k{nk<6>T?k5E1FkAyhnHp<rACbpR=dzeAM<*ob#Y3&tI&<#QEZul!wQ
za;hMgyayQ>*a3<#AGxT?senZs*tb~rq37bpvh6l7V>S2;<aU$Z)&!vU_PtNTJ)gL$
zY`g7=%&J$Cm>4%m)lyzZg|&>dU{CRgs&$A~1u?Nh-3lIRb^Z})3|D98e~$DQHvL)~
z7!E&w{5GE4Fl@Li(l(|Y(v?~50>;Jrb+QYR_aI}5k-NMJ3aTt0)Oquubu+2MHDHk;
z<DRpDUWPu#BGwQo{5E><*Y+@~KcgEa<DSWNP53iIYQ1-VEx2v_Wo1VbNpevwv*#U6
zvd!aGdLJl5Zo;q~hVQ&Z*TGBwv;6#gb=v>_F{T&;htNq(xI*HP@82|X4gBo2SmSu$
zl$I~Y&ree5x;)!9PhocGxo|$#>XUuTL&6?&7&6Go7WTpMzSU7#_PUhxOwHYS)3&G$
z%QizQ()G7{NU1ECAr%?>LQ7n?-o*+;+s}EIU|Jdw?KZ;-{Q=tJgn&HY9@$i(eaP%0
zt+#2vlnEYs$;u>p?Yd8SDA)zEFx6CefpdwLy=rUOZH2H^lbVP^PotE{nM#5|AH<?L
zpu%4rS#>y^Z72*jF4U};zJus(*CjZU%Agf9g>=cS9xZ>;H<&lFl-K{?^g7&=n3UhE
zEWnZp_nNXMs7MrX&H)yzN5z$Q9Qn6dPdY@~*{opIK2><cq6b$H^J{s6wGGWdRanQl
zB)GIP_{-^?IOUytcP97#RmvPBAp%YVlc=4@gwhP;(s5p_0297{us#c%U$AXW!$noK
z)e!xPcT_%s8kFM!VM1xvw;d6MRbbuOsW(<=qtg53CyY2Th6Z8OFWWW)YL>{=264U=
z=y%}XNPsRImC{su4;1I~slPwHe&n^aR7tMe>9^Yx<y{?WV$m*eZ;2|Cj-R7@|Lpxe
zA7~-Q?R@(shZ-A0e5h*Z^|DL&O(i(u)S9r)>d`cQ{Y~#JzrSC*+q5;~-MC?#o)PTb
z?M_$o{&l)f8=Q?a`Rv7C{nqe8pzQca)4WeBj?_*n$VS@RxLY4)it8ZDK}5&0&277G
z0;%JBTZIb%2bUd|aW=<zj_02<$67IizjAPS?+Vrs5^CWAOP2&JE~Hy!>pt>6R7uKf
zz;EjBSexzxn*^&MHgru9<s1SCFdEy)ZxIVuI?UmW)5{~f01`qzP5}=qgby;PLap}|
zs_Z&K4Qk8*>MxD75Y(>#^)7DoSNOF4m;brNq|w{_cZ7<TQ#xO<deO&0{Joc~f0LFh
zm>6+o`G}T56>V}0v_!*KwCV46vq@0sb6fsI4iB*~N)!f^cLav|L-Yw&NmR!*Skq0J
zw}~m#vM+~LQS;pUd4IpjLGWq(45@bs%U-QV%6hH2kibK1p)ycLVA{gpvF#C3gjPgu
zbqT51Z4+tb_0nZ1T|n@fWbZneCe|V;3=viet_x6qiR2;z=J(Z1#XHW#H-o)z*x4(e
zu{tVTIOXtI%<luy<v{_F9h6Zwy|ZC%Y&@JGzTDX!I|U@D8HB%Twb&Xs@msPA>Mpob
zs1>SGj%HhYGN^pSp;)V!6e77}>A{seL1`m2Bk`Lp45xcDS(kfSmdAH+vVf$&b%{|0
z+qFFr^0qZt5rHwwGA4m_FW|pp{9}isU3*;4422<$B37?3la<&vjbTI>%d0qM{sJR>
zaum*ct-hSa6cn4dPy^aqyt5Ulb<-{Z)kJc>BGMJmwwlr7&+a*uNEs<g#1KM4V~frB
zj-^P-f=&h+*|O(qh85^AFZ;X;Y}gu?UtR(+ukh~5St9D-S3(Lj5;Io-UGDxqQgZo7
z9tBqmb}paTyuV#$We-(B6fwZPGq~>9qM*TcbrwBwQ=~F*mFjX=wVmGqT(Jk*=s}{R
zd}(=y<x-8lz0YM9ITiHKJ3PE;?^@F+Yf}{NToLQ<PM<)EcpRRNI)<Z{%16i7)DQ!1
zJ}v-xK$;UIh7EC+s<Z1BWC#mwoFg`yY^!c=Y@^<7_YDhu>^JP!n$Tq&BZ0*3XYcK`
z+>Sp>GpII5b)*%7gzy;cPBiOWwP8QUvHu+2BH>|0DWg`k5u-T%o<)y4e;N{L&oD-v
zRTa`Lu+76uhP13N8k;XLk|zv$TxFVVnIwZTf%N>scHRG&ZEc{UI!LT%J@tthaP%b!
zjLp+RNQ@P6<}aG*>6%^<9Xwx33s<(|7IPSQhEHx)ndXEYrP@W9haoEiym@-9xC3C%
z&c`Vd&q6)xY?m?_#)0-YJ-?S=9=%`?&YnomFsXkzxDJ`NMnDDHcpp{uTd>inKK>&m
z#B;FU-+Y^nQCk@IsX=Xc00qMPv@vSdB`S~dE5A=$ze#(XDRl_74bK=Ac0DocvIj2D
zw-7rk=+5!ClS%TLuw91}Mbr3K)?Y_-qgDQ!f^f6;zw#4RMI&oM!?<a^ZM8i{+64yj
zhj%J{LK?eW{i-T9*o`oR0l8NGdM(oj3^gD90!+U2Y5sR#1<)>EQL#Iu^>-6|6u<Wq
zoZZaIT?4dl>5RMG@e94TM{9wi!qUkB>=j1_?fOU?bH$(sAN5d-idrzG|497OEt*FD
z{zmY>ia2SDE~gtdRU<p`@A&<7UcTr{4IVF5tI^JS2(=fXkGT$N_MzQ5v$KPf**=m@
zt<M402Pp=F4QWL%0~})NhX8NjUt?leuYN0NMj)ZOlQIpbWf*NN<NR%0UiR4@ugip|
z6ik7Z-6wz0yhw9@sq;+LiYlmwrlgNgRbC$z<q;aMZD5Z}B2;Ozcq-IbOF;LLeu!w?
z9zYKWZpyF15<)j!;5Oh;UE;1pGk4vd0{uyx05>QSzrY_Rb0<({j<fIp#^3s#PW3ru
zLUrCL5xc{SFd=`w(A$rkjgNxUU9S785LE`oFsO%F9!jY;Nu~w8vsd6e^Zyj|P0&EQ
z+#DX87eH==X4W*CQu(u0c(yplnkP~zwaLdciE;e%=RoNob_ML{M=pK0nrH*jn{tR$
zU{+99t^OuA9iROd6x4>#Pcu&U9BD0kxDQYj`ol3RrW(VMLIgR=T=?I$b-R>~jMm@~
z)^w8aQZ@XY%anSi4Ad&%tWlKS!1TwR;cS~14)Qx_$dXX=@6reO_4D;FeAc^Z0D1JQ
z{yRa1<?SBWqyK@AonE6comfj`G3x{mSkjHA(svU7N2YHMv!b(u0#p5&W-e=U&m-iF
z1#E}15QTgNO>^&l{5{!89wMiN@XnHOUGY4ho@fXes;%p(=<rMR!2HFo?BGoD0F~hF
zBh`KtUVoP5{XIOActarV1fR+P3!C?0^TJzBlJHih5GwmUsC{t9d#4gocnF3BS6PC2
z)H4w}JpcTm5yi`|<}+0hv@5-@Y!jcJ26HYJb4xf6HbCH79fQE;gKA(t=)~KI)l_lf
zYoA0OvI|4iC7>)uy00%}fzI4tw)9<Gr>`ccAcGz}SgUz_tdi20=;GQRTsaHOUoNt<
ze%$Bh%ZSFW@$=Oj2p7h~?~V>XD2o;Kq~Xn$g~$oi5PGQY5bF=aIt)E}j%~2z$Tq<8
zd4&B#5&s*;Q$m-Q+=2SiM5lfNG7C8ue8gKg-D!jVfnz--{yLP_Rd~x)G?AR7ohpqu
z<=lW!u;!sV4bk2Eo;fRedqJe(|AJzLza(wP&wbzKseHTK<_AFc9aQK>HGvqMIw<pb
zpOgVMR{~l1e-Umv5Noq>G}-g2XlHb2m@V29^*c9{eGk<dfaREzA~~?W3BUY^k0sxu
zE5m-+qZK%tY=edi>UIyS4|C?FkqWtzh|+}2SgB;;6&v${-W~~RmaG|Yrn9GWiZMjs
zmx~SYEksMAr|OdlUzt$9Q<Y)n|3Af<Rv8lBS2{=76a7Q2CYtmgdhO%AWS*nf*-4`3
zr2<T{=r{>S%+Ii+n?#EcuV0LO&&N<@>h8M(A4>Ba=<Bsm*Wd#kDMLM}s`-4QXi{KZ
z65zwn<)O(?WoSuFZ?H)@M7OygwRjFcwddi(J?x3;1GuRBX6`7XC*R|zo~0@<j;LBQ
za0<;7YG)#SO2M9l)fi>WsHksOg>Uq#9Qt|3ow4~U9s<nBA<Z;ha-MV=Ih!ybSHQS?
zHO>1I9n#bxd(Qa2yG<#3QlwA{q9={^*wv4&Sm(R`eAyG2dAH%s<Go35%xwHzrEKqc
z@U#{eeSimnLQuG{&`BpPP_B(a8YO$~R+ma=sZR7mV>nVc9~FYzqdx{~=brNlu%Drv
zu!s}>^YhdAU+$lu#{c|J{GEZVndP9xcOChCjCFIUJRw1;<PFT^Wu(8&_&SXJFzFK>
z^{e%e(6E7`-C%q4!tt9l;iT@-HEltaQo@;^C{CV-y*k})(`|n~=2%Q^9**shHluBW
z3+YOoW=?%*+bjW%s~@LhhtJ9H0};1@7(#0Dk>90t?@O9EvrK0Okpi_@HoC0+DbBR3
zV9bqsd-Fk5#+zG~CyZ^INJo9FN5pv>q;u1z=t=VR+>_D|Uq4^p`*7)+qV<2BZZEd$
zx*=~c%tmDn?kWi`8imr73SGR(HIQT|LN1&3+`zk&=)*L>Io-!MH9TETqUTW=Z(Y?v
zuUc~E9GUQFqY>tfs#8U*M4Mu)@we+R`URxhRMW3;uL`I0xt>aHN=!;Y{==o>FES<4
zc5>1FC?JLQ&$;fy)@xO%e;XP;wY?#6wRPVn%+w(aASS8;%#&fBK~IQV+~!TZroo6b
ziaJDB%111To>B_6BdIE=6<1(Q2RL@se;#UxK|eIc4zLldIl&qm&TFtr>vyKK_HCkW
z4IAHX5Uko=tmQW2SxUhWCmrDO(quyUjozI=q+0^IdH_c2LusnNawv1r-R1jrY2EGw
z4a$)SC^xH0f&meTbIMqM8-MUMcyl_C(4bOPe8Mf?TvSIzCnV;2dx#Zr&zfNEWoR_7
zfx2^~<{xg85l%kvHWrEVr1z@Uo7TBx<ixndTbZCzat`y07%2XB&=UJQnSOeI@j(4I
zpH=&xTo)=AT~_@mk08$P6NUtqA)<qU(})!07z?cfen<ZQ`zH)P{CxfFfb?7S@9Xo+
zO-%n~utEP$Z%L5#K@z*WPfn5YSsbwDWiTy=Ilnjsj2@xK6JDH1q{2g`r=FjmFg*L+
z{No>5|GnKmZh!4v{QcYA|LU&$d(-%%`}^(ge@<SUmUsl(-o)LKE^44hjuD9<)@i(r
zgyr;KzV<Bj9fk~lznZ(bfB3)u{OSJcyS(4KalngbKRpZLZF!~_sJzP4%OoV}_IPoE
z0!5GcLJ<3-vthkB6Gqp78jkaD2QH8sw{*H3ey;XBBQwFfqNV{T!HKb?b$Kun^1TvH
zpO+|aKxOVOQ_MaDT0`Rp$8Lxuy9Y~dpKJxkZzFW)kbr6lD8#ehZQon4`#)70RoC#k
z6CH+t7#ZYgZ30zo0w<}PB2XVVQGbfl4ta=APLQYkr+ws$YMvX6KVdc<(nz2RqDt>f
z71TR!%C|RCCRTxKM)j+ozi+KtN06wAW(0Z}C{gfZmC(4MWFEu_e@=1wfArJ1k09qZ
z#{S$Lj_=d@{;mrMSxH9{1QL8BGBIodmZr(tt|=Imz`pj~6U}8tWIG&rkGw)roG(3G
zL&ZsM*Tf6RSw^{tlr%GDNNK>oj_+J`SMxXtUQW=b#S#*gFpi<bA6T6WD8p;1^A^=B
zwylS~JWCQf%D|E?BzoW3>Z?{J&PoQggc<1fAy5jOXzJx&tDdX&3+O3TcG84^YgS+o
z%BGl;A{7xjdJ~sSi;4>Jq-ktd+8GQ!om#gh*xP8SuN$FDl4#XOt=ChOK3IXm58`}@
z;EuHuuAgB8wTi~Yc45@qfgh<C$4IGPQ&b@x{UNa9Fbw0T|Gqw-kEHdma~1TpGbKI@
zu%Jk?JS|b()?rZx{2{{M7zH+J?C*Nru5R4M1dj}rc$5I@BL|(}<YZG}?Yp)q!-Po>
zkJKU3DrTfq=apgg23&<+Ay7(MbR^`EBaOwhfb`cVCAYg#FYWaA<K4%`QRdV9{i{E)
zgWu_uA|svYlCV|Eop^sMpSf<;WKs_bl~klYwJ5dJP!b*aW+NdM4Z_S6v^$e}<zR4E
z>z83Q3D!0ot1{Ft($9REZ>?c>82)m?A6tAR9?v_p&DmrV*o_MiZBvFyL|&yW`&ct1
zTDIK~#mEk!h5<-jgo>lp&xF2YPeT$qGCI^KrgnO5?Mvs)5tbz0v932NL7T=c6lO!4
zykys(#!Miep2qRWz?-U16|HYBN3-tNWV`^{G;ffjIR%M7K#6(}U{d$V<<shW=a|eN
zGi!>~h%mQZgViBvai7kuS<93Smp)ibQzSxQTJJfy8%QP$glfYMC?oxC-pAqe;eUIy
zDq24_Vy{O?wGR4eHAwfMWz;B_L)RTqQoaEaFF+j?t!j!$e~4&gOwBsvq4wo-wIiXo
znpGtkfw4iDwg3}>LNg?Xf848LJkp$&%ab#+ChfBn48&CX9O*B`Enxt1oZ2*XJyInr
zk8b1&dK@U{7ZCJDK^l4mn0_-iF3xBi-&>n$$Qo(l4Pib<o9tGoeS$K-jFok%)uvjt
zv{MO5E{Do5EO6$&5lwCp>H1Jr&xcVFxMmuf3@AwH+vHfGG;|6PHA5c?emu9To!+ep
z5i7_&-Q*Z-myK<U@=)ZCd=kT|xqcGVl*qFJ9YFp7)^8}SFDQ|G;q~v&n-A8e;p3+C
z`0+AMFIM>j_IJ9y%-qR2@0?C}=gcmz<Br~M3OBo5QOi6vYJ}@o$~zWx`VSCLrIvr{
z_g|gCgN%Nu;NR+9Q>>xGrmey{^^+uEJ>3U=54GQqB-Y*!2QNx|aKk270lojU)U8^;
z`rBxbPfq*1SP%MvJyimZwND&D7xmN&KoJelQ~&1<y~ubJVpSM+D^Prd6?(;#6Vzb!
zSz$azfyM758uQPgU8{PRa*=YjUsI@U(D|r?TKqi1HM3Fu<91C_SZiastpwGB^%tOb
z1mh?Tv8J|xCiNNG?{ll=)gJIY9RiLuy}0_YUyDeooca8L)xV+S)>5_2is>UzyAv~M
z?zF1d{WGbrBdE~Rq9?5?sN0|=_Xv+nf93T48CH?T14Qs&#(JOnR=Ydpf%jGa&&<P0
z8?2%t9>}F18PeDaGKYlJ3PM95r!a{naW7;!w`&C%eXdQzp~~F1(uDL5(%%ee@KbgU
zvhMg9!wM|qKAltScS;x=v7aNP(EfP~oO3lcwc{8hNDe~6yl<(h7ic3+wz^is`KZI~
zN(9d(s6X2#yZ;W7_gyue9BphH2z#Is?GI{%i>B(Eqs{tp-`9S)UMS4fbog$t$-WhF
zDyWP%JQ<xA3a>h@eH~DaHg&TyycSd$H*{FyOmT;MQqOo$Nr9nF)axa^979W?2==S7
z+NUxkQ)hx+-SV+xzkAo#>XxR2zw6!&Sm(Vf=Gd@|6}>r_VH}1?n&8g%!~jh|vcDry
zVos=hXkq`)=%u5^!U2w4_{leK^Y=%h?|hD(*_S6Z7;1mR9D9X%8{$W#3&O&(vCu^%
z%Cn`fl6~x|Dji`BjYM4)R^zdHRQ)tqogA(sna#SAQKA6DaPsf~axia;0!(4YHe(0^
zgGV^nu>T#rGz@GKAV{jki>3xE+O5$>fSlH@*5nNZkB}3kRIf)sMIWd$Yk*1$4Twe6
z%oVwFANvR)F+-{nq$-~NvcGCxUps&B&F#<R?!(Lo5kCDc(i%cylwNw8!RkQy>XPcV
zO`l>+V5X^Yt={2_k+`VV_%&1x4y;-+kFiw)W>)PtsSM~yE09h*h4=sTOl`$#xKTB!
z)yx-cq@^k&{SJd;MkH<@!0$&vcSXT)tFX<Cnmoif6<EG&kb2)|HR^>#r0!ke-T#Pp
zgZZIbAB?tK<9N-i%8r!@T4gm@&9*@FIjoH%J7uC8mE}xUiz79g0Ly<V1O06}(>=Yk
z?Q1)Zx6ynW^NG0XY3zoXP&;pPsoAPIiymA51gV5m^?y`@^aC&4no3#n;8pg4gdkO}
zW;oMl4ISC&i!i<OH_y+vct4p*p>4!)MYT;+UR`5muKdEFIwt%A8JIV4QU;{IGg9ip
zPm%sKy?yE5!@$L>KS?hylWSjI;tino4!=r!TvBSFL$!wlD?d_Q=prs#^{4U2S+05{
zQ#5E2tUfS;pHt_rNZsc4C#2^n@s!jI<HDNOFSqC^(g#R>+2MD*HUMtgP#r7+1=M>)
zI)=`Xy?4XQZ*Yh5Pg~6Mo{hu96_}gR_%KippeJ=&j$EK(KMSa-YHwvA*+xh_R%_#n
zQN;OmWLs8wd0Y4ExpC)!W$q;v+ltl>^A<O2Qp8X)OgEOc0hx6STz?}ER#R$@%SDW>
z-UEl~w&%+=Wu#<Q8I={sgC>%P=qVGms%8EAc9U9DFL4p)S0JfV_m)SjZ)ZR&Xjrxc
zg-1wLECMj3Uq?uNs?zyv6)68qdy8M_dfuam+wf8!0S(Rhvd*M;BZdL-s%|4NNAVy1
zuG=wb4KH<0NxyE>eD!T8XV<O15%1#FZ_{TT&wuV-FRlJADLu1kQmc?{hyuzueG1Ud
zmsDE%F-Jh_rC#((_j-ub#0pz+mwq?49qrTDetz0}d~;PUi?UB&2=<!u_w-8n`X^v1
zvmz>%vRUaZ@a{9#>Aqvpz4b4)y6=frZtNQ4fO$Nl0;*3~ooPqq#z3>T8wp$&t=yBS
zVrWHDXBG2kVG-<aP=j`(1}n$-7SGl}zWre3J(UAwsO9!DuuTQ%W_}{j3)D?O@JI_$
z%2UrbZW3Wpcdqvb71s4g2cKInwV=;w{pvseV+U#Fm~$$pz_F%DVK+0fmAAp4`}Dl%
ztnUW{SBJHB<y6=XFnLjc<WwrKzURpL&5xJa4rs6p4=#>6Np&)_*(cL4^uR471VGM#
zV!F^c*SWVmT3I*n19sgJfLg7ZA`Qb&l}P6}fF9o`u>w8NS}!2o<*Vj4O=|7b^9%;~
z5>6r$nwGPF__@w_mWkIgn~LP>qQu43AZ^epglh1~_`_rTfhn!}2C9k<z{M!52(>XG
z=reT3)?5G;&ny{*97CBne@wlALat`zXYHK&g)b+qThh=Yq{_vTKo!#B=OH4oTu+T*
zP0AN^^eMkCK}8>i9UWeUSy^alCq5_!R1T7)8oT`Xt4#nvhpLD+kX>GS6sRj;CAumc
zX}<{oI;yl5%}Q8!bW)twt*Hg%Y?p^ngND5dP=6Ar-#Ju&`MdG&$1ugwcOSdl`r$U&
zS5nrwZJ^4OS-<Ocnuc~rZjZsXarm{yuEK}aXFlMO_sg6woFh#_6{X@;Rl=F~3~l0;
z9i$Z(z*hc=`2f2q2#D~|ehxH9Rq_MB(@6YSLvBLQ$Bde2SA(zb4b)6Up#E0H!_`5P
z^DGT~IU(FohI6E8r-cp6Uk3UOziP)s0je-jTy7uV(W_XOUnJK*#Z*2ztN=Rs{D*-8
z-uoHOYGI{}?2bWAV*nSHzlfA>R&K;~Fq3s&TuxPvrIq+TNHf8H8F?rm_J>Q4NeIY-
z>jV2%@Q7^)nwfDEq`g{ZNs35!Pt_^XTDihRVv9lTw?++V;5pLR9DwEDORjdI)~bOg
zw32)Sa26*SG+<2)R*0N@H{CFBKEYbz^UK7-%(%(LrDY6X%W&6yFzhn)?HKpmBkVFC
za?>VA!?12!iF8H>uf<9ihaF;7-E6yJC(N>&2O?D6D)vDGQt`K_2XFD^Ie=uyFF$K<
zX<NBzlLw-(Yjtys^lRR+*PTK3?zTG&zZE<=#2QF3+zHm@bTydQ2I=kadHA{hn6;HW
zS$BX;qXM%)Ew}+p7o)e;`C!`}`GH5+S~rrg%&v(lXebOq6c$*&&#-!@WyaCm-p25<
zt!E8Gj(Z>MF(2|C1?C}EBj;|Rl3lT1OSABg@J!@#4IHZ6+_N<MR6^=w*befQ!`$7*
z_rO8I{SXBv+pbASU}9@y8R>7Mp#pY^%ddJ&i!)+#vnDY?&|H;G8R~cA7idK_%oWy?
zdCi(S7WS$dtU}XB8ztBL<uzAZqMQguiV<OvNoi^qT@5s|2Sqb95N*ewlco{+G4Bqj
z;zfi*?LdU1AhanmjQs_SspJXYo|F*cOU5d$YR(X-<dF$j%!`0Jyk8J%eG%bZn(1?T
zN5{(hXbdVAZNxXr3j~DTk#PRd!u=`8$2&AzDz{irWEQ-z$o1!?rmX@Y9%857SfaO{
z0rn}t61ty2#p32w=BN5~a-caLZ%$n8|HpyG8yd#$lo39Io+Tqhqcem>fJGB-r&VNm
zjpGCx2zZ?H@n_#HHq?!N9ARim=><;581%M=$^Sb(0>WzD#88LE7!9&dpM+SY7{W(v
zh+ZT*5#$t&7URNYrAOV_OQolUBTS(q+4*?btZQsCXIO++Of@n1ab3xK=2#Gzr^6tq
zSRJ;>j&d1e0ir;-adjUq&kg<1_ZX~4n?!2H|3-+k-dEol=F$TCqI$eDYW;sAL>m6S
zbm_&{Iy}~3Y8Iqg%!Zj1-G96jhtY1_O<x|}(jFsn2eidQ{l#bw(W?JWh<kjG6&?{e
z1NA{^AK8Gc7{Tp~vCCqI62e?LBmErZ3ROMuJ-s1lJjxH&u?Q@`oQnuK#H<E)pCCKx
zoXd6D%azVKqm@2P^g*KWY3~9tbKC`ZXofu$@0?BfK;QdJJ){+y-3GO)H+qQqi7q0H
zyK&RlG`GAWh<G5xK}tXr<lc&{k8xUt(Qcxg0>twe!YQ}28SCV6LWi4k{op!gMxoL}
zeG2Y};HMB{24Lwg`u(Uzb@fn(^J4w~CEnW{O}{U(tdA9K3bBCjtFZyooTu1nhDYO~
z(8tRJ#>AQLUg!`7HHlNCLtVlP#5~X`BK%3Dwg)-I5)g!uWaw?QIaSlybI{y#tFh8j
z4H6_sVl|>{GrxF_?)&#hj@N%AnRV}uu4=DS!7+je7pzBYGfJ6z#&d*dvs}i(cRv=N
z3!AZH<@8WO1?w3~|0~Fa++cA$1&sK^pDaa<zj5h%;4pyH>hjxq7`PPH|3cd$o&oi|
z?yNv9NV48G6>!r;AxBL<^Zl|L3Vk3QbyZ0e|HCv8$EgEOsEMf>oIkspbSd@44yMP3
zEfp!q7~!`osG1jxI&M)%Nb8q>4{4yg@E7@oXsufudp{s`Fd}fE&nOjprWT}WP7U^3
zkY5}mubqdQBxwfS!p}WO@rPkQ$v!4w3AZ@9>jU)8tEs&h?yy!cZqWyz8!o+*C%
z&6=wU#^Ypcsr7jnD1(Tbb1`ZH*Sq~rKLU5rIE&Yl_t-f*ZxwZ%AX+}WoLC<qfkJQr
z$_l*Tj3d(X5=?(>9zR0NpZ4lB=*%RIWTNwj%69AO%iNr*3gyA7n`m6KS^aX}6Z&ZI
z#7CtxBlIpxJVW~Y{0d6m^WO*2s26;7-IPU1s)|^H#Lav#EFDJd1zU5)t*PiNm0_L)
zoHSxZ`}9(%6w_R^dmE^4wVGk%6>?`9n*A+8JbTeI!<&aG^GvrQoQqvvV2gEl)iY`z
zC)Dc$vGrw~{BrN3pTCMT#B>;bA8>gJzT%p%xFL5`2Kk)@B5N==d!<dLRt}kFdKwU^
zV3@jVojL*vd2n$7=;`MY(D*EZi8MSocsCy;Hv>r1_)sxMAYDJ->jjMK0TPs(@g5&~
zEr*twGM7WpnR{JeV4{U!1T&`x-7ns~58k~VXwro`t_`#PkFSTyGs~g|d5-izrC2PA
zi+&zV#|fhSSwe~Svrz=+L(P4&h_#JM*2XA#-JUZ&yrV%>@8H???0Kvt;W=FuPIijS
zI^5I7vm~A#+MZA`26c+)TjC>GQ3~#p+7Qu|Fs1?WNE7Kk9qzl)_T#e;k8ST%UL`zx
zHt7<E4}a1XW$P^R?D;(;;gxJtM7h0(Tu9rOH#{|&Y}Kpfb`g2_q@%?b&2FP0T_5_Q
zC`Rt0hQ%4q5H!=|!QwN`wD@{|{_&RUI_2ZKe#oQc^1E$pH|j0Dih`ze#mjR!5fqmz
z;O>^aSYO|`c@9w3wf3GQw@I&NIrSW+|6dB-{FNxrKUdd(*{43&l$erZmuGLYaH%Ej
ziqg`b7mLFL{R3BgPa_AQ)x_o5u>{_KH!Ds@IE%ZprXzC5%$fGTh?vNUZ32(cIKS)&
zNV+vl){-Ug@LR3|XI<-N?x6PFs7NcGJt`uQ0>-!@$C<dNjxNu~VKSYqx;j&a(!>Z9
zQpHv=Z=b+K4(i-BnA^S=NfTYUN_7oPl(^~X4{~5i%Y9?^E#>f!8j}OQ-{l3AYWABw
z&BnWq=C&4$eVS!<PBdkR`@!cp8#2^wYB0)8UCF1H=!NK^xXKp1&#w2@U4l7Io1ez;
z$Uj_$Im=Yhh&ejQ*rlnnq=T9=i!h7seHi;IOV0a8@qpEX<(sD*n!9&zTij4ZkVjRz
z1?04SA}Yt8!JLxnv&y@8DcI)ZFr}mt?SBk&Px7k*CKVUJKaYARb!5*dxe_^rbS++g
zPZ840`?yf!yZ8<4`D%2_pXbf36+DQuWCB`?P@E4#0|6&5C}~ZOvkY^mh8&WjF~)dM
z(74(v7#`9?_hr|%7<gdyIg`golxv=X{5o8AM}q_^rc7CkR2(6ir=5H<tT3Y4_6IPv
zXo7Is7Up*a%s*^vZg%Hf`Z}PKS?SUJW!@`6SLL$kOpuUT$X(kMEg93{&0DYvltNHb
zbC@oDi1>tt-1^S=ZL)^NoJ%(B&9*6nO!sCA7i1;)p83s++A(##S+b@EOqRXb{0RB~
z*&@h;+j7CI$6-`|vMjS|6boCD9$fMM@Bd(p<2dIjBytcNmvLeU-;I&!D%3tA*Lb)t
zoim#>vySNm+Y^q-YACsIn8q#<5^^@Zh54_qfBoxU9_GJRm>WC(WBSKG{z2d8PlP#(
z<Cu8V`vhmZxz~|j;Pu+)8g8w{bFNF_Hv#U;qA|-aG25eFr`$p0=N-sRgdi2-b%S!B
z<Tp_mY%kzk2iq~V(nU&U?VTp{DDT6^$7G)KM+2AmBzx3BgNPV7he*o4MTxi`QBu*G
z1#|8V308<TQ1_$shp3mRfUs-Ga@UE*6=#|v#;^Rs<X&lrT=?T8uDZ^^Wuj?p1r7En
zdCul^+<7V!-e8I_`9%~uhxL?w^h0b0G_ryw3+rKa?7~cN;uHyU5$D0ek9i%b{Dw_L
zfhc`>j4jDaF`V4XyN>S7BFaKbB=y)|?+BR{QB^xQ;qOi*4qzG-kn3QkGz)T^4AeSe
zC2^RfczmFSn07$GYgw}HlDnv-C)YB}!<xbv<4-hZU+yH1EpvDl4n)E0A>tXP?Wu%V
z2KmTfK$VM!1o{BYGcW}5)GfhjL@3Kv;4HjcgMayv8DMHxQXTk#VNrVk6tNNrwkm*L
zZduZG1|?!hW6`ubXvgkMr@5bZbZ3@vp1!-ToAr_Anm`OGsbf;>H67+$tHOqa<3XJ1
ztkMQ%RqrVg7C6Y9wZvDoz|4yas38_n9-EW-s|We&dT^KeJhL5WWNumcj?=(FLJ%A<
zryz;PBpiB$IrT2i`A+wVrqn)Y_SgTT=WLIbFL(++y+v`NETzUAXrEn4LC>aubHD#<
z`H&RL<<Ybl&l5?3?J!)`aiBT}8pa9Eah|(TM4?vc%kC+>Zz=G9_=Dflsh>A#1R9^W
zjHX+u0x0#2<Y{LvXFzx9h}oOVd}SoTgc7ZW@@>y8Yv{dL&*cV0dH<@w+x+8R{-i*=
zG`Qd41)Zf1{q59gv;?(|nKtoLhF;+-EW28Demz{C572ou4coxv($Ig0_c#TvP1Pr7
zi~B1R8wM)udb(r7CV2R4DTs&poTSqnDDSOwn(Flx`!xXVr<Xm3lJNPs<X9Jd-VRDp
zl9~b6)#n^wnBfU1&kt1WsdX#CHa@5|b$BhdaSY*s)_$~Fq3c^1yvOgOM1dqwXjGv*
z{)Eu<@+Czg;=K<{hTgj>t5^=2TGQ_a6qp~yIj*!i)27#|RxdKV98w1cvt|<OdHi7g
zDS|veiC;Z7>}WRqwAE5T3iS~Oy-2%8CLQRhy5$5p>Zv@3_;vcMO^)9X^+WcoefFTx
zC-plFH7W&_mEJI<U1R$MDM<ttfPOXO$X0lALy%t`<M*8cUUyalH><A?;#vv<$Gv9J
zGkWfpJ|<8=Lx7h7{n`hb+mu{=mzj6pre{1P!KfWw`4sPgT$^E)^JF?sedty#+B9e`
z4lx1wK$F#=V!Ku7zA_uD?k>NM53+_qRiGx5L{QAUQ-agkB{(VJoMN9vXtvnoPR=-Y
z3Fq1p2u*9D2Guc;iYBaeaP)>6!wxY1>t74THP$m9oY_OYpnyv>wf&}Q-zKz#JH=Z%
z^}=WN)=}XeEO~6%Rbw;{N?UP+lv(p<CubhnPi7lE@0wEuoKmmNzi6&CP*igK$oF_k
zoG(rl8orAl_nZ?kXstXMd3kIXrHj6)PvQ_L6rmZE8B<w$`TAet{}4n;2Le^_&Tvem
zsS}_g?L&?4`}YzN*;2RPjhWib^$8tCNZFb;gW7YbvZWNS4(hf?By9}cK;6AT+igaI
ziVpHS?@BG7o$36Q>T?9+@yd?yQrn$vifF&-pQY(uMoJjRD(J2na^$;-2}eH1`xwsl
zp34M|at?frQWW|L!mMNga&bSn%^lzpPtfm3EQS4GX%GO-O?>b2?n{YQYkFBOne}|%
zekS`iYim2Yt75K)IsATb8VXCRBhc+ykdoAVhdR#*zQ6&<+nd%>{y6W}@SE$>%k+$6
zh(lL`_nE(kcDm!?X?S|5LOKU2G1tr3{*?dY2OgK{DaT)>n#RhRLFGIn3m+ijvKKI(
z1pCx+OZ^Y|YiO-lnWFNuzzx8y$twC19R>0~!a0x+pthXLR0nZ*uJhNG5BZV{fcIkI
z5JC5|h;grvBs@Eb)uxvRlV&0TftE|90}IhUQ0ZdR*fG%GBcQ`TF%=Q7P(yR}e5d=}
z)4omely_i~COm~=>613xeWJU~Hbr~Jbr0uz<Z<5=C@IJBKIZ$g*zghxyJ%hyQ~R2S
zrGB$6aTG;ZVO;hcWwCjA-0sh^2!SQ-6bvTeJ(SnUOzx5_O{ASeOnv-DiXi^MBg8d|
zp6b2-#Dgf`ordpD9Uhzt7{AV@J587gaeC8^Rn<18sGx(XUPybS@iSL{mwiY3vxM`9
z(x&&AdPw@JlcyamH))fudUFbIV<M1q7|BayT<IGeb0|I)ae`>F{JF%dv=*P7|Btf7
zIlCMqE4Vok;)VD6Q;0b3E*(n?>5~ie9M99x<;|>RkaKRYu;w7k?)VnvT|`iHAbsx=
zOgK<|r!h|Qo$Gh-&HEf*Pkx7GK&!-^aJBn&w?i+Dw<>d>T#Oif!?A>0!1=|(kj2%H
z!K|@Xw&`&zzl>3%1`7Dd?)<I91ALn}`YzVV3MjpA+TD;o^7LP(kc{^G@n^9Y5940F
z6T~aInv<V*YIkazGRo}>-WVNI_}s839ymoLjUaoHM$-BKWv^-A=W&$tkK&QiXxyC!
zH<Co++;aDs05y&(WRCzHl9bMlYSZ^p$AyhfjlQLN-r7Wj#($Z3C5-%R5dLz6l*8Qz
z<KDHkM!XO1HaPL|E}ZkR!bgAq&>!;SmPJclyxpTruG%3*SW4p75LXLkeirs6r}6VV
zCas&WHxG=6oB!u`w>d}63qK5c>igmSBDdcjNPlszDBI>`f%p?jqSSBwLqta;-tir7
z{9kA}HShi1JhyIoe!jik{Qsou<@EOcal74~pPt6M|AW~ZIk7iLZkK;Ib2DldkuI7D
z9LB+2@5?aI=dzm#truW2bwdvlNyBFLW~3q2#nZfzHAwx1{ZI4H>7V02@BjSg$N0~G
ze&Vm;pZ<@?BaYA0r@b5o?a&-V{#rrrL#s`RzPM}3I3dE|t@cM!aog%F)7A9HSBVbx
zC~fAo7hIAvs7FH8q68BPe8K;{;`_b~%OjLF-pN4C%;)4C-d@+G2T-%T8$hL7s4Ad4
zz?gdYxFsH}qSu1@mhkJYAa%buMZpCw=_ZWT!*h&34Hb$%?0i+fe16_W{dxPm4WHWo
z8ql<Ku|Z@m!Rgjt*m&8{?E%i%%}|3gnoa~Ol8`}EGngw#S=&W*W4aCz5Li1EUv<&g
zrO0h9HL{d(E?r^E$1OkjpKhkT_O8V`ZZri!{<NE9b4s*V&M?E<Mj7QvK@+78A6t$h
zJ4Ia;myJZWYwciBH|i~0ce#=sNT<T%TL~wXIMRld^(qYC%`NxzVvopZhrLsKl$a>C
z+?q+M>y~lNQ(GQ41;wrDd;pXeVroTVQ<|;C%g;#l>^)H_tU&tJ{QqnFlkLcHEo%@S
z+hfF9=?Zt58lneCD*n<^eWg`{x+Bf={omrpJ^;xBb^t9hEh6NSZsxr^4!YCsO_>*C
zhpn4FUjl3DvPPmUQ2xb#F?5Crw(ySX!>O!+UO*i9I?P>rPAnIHzqpt6DQaaA(4a69
zO2i$YyRN`Fw^=E))e7H$GZhZz{8sLpr;uz?D(GXFCP22|U!t%(WH4JE(DcwJ%Z9uK
zs(5dAk^)QBcQqH`EqF82M_f+9PSpQsva!7}ftfY%`@SP5v%#p<7WMHlm5L45)@)5K
zT}px~sq|V~{2QH{ozujAb%|?hnSq9GT|nzM^Fxu?@Y8*-K?$bP$YEuRZbv;PQ837A
z$ieva4OJ&ONidD*L#KNc1V>3dz!@dAR)G?%?HuhaHO`V8?H=(`TXQ8Q69OH3`1gE3
zAMGma0ZknY6{M%BVw38bubT}zz4McZxjek0NC;MdJH+s551HgZ^A+VBRfp<ECGYC!
z!S~aP3Yq90Z3$P2PPR-;%OeFfsELb7&yN(LJB51_<KGe1)34X`%J<g(+CbmmuE)AB
znWjNlxTGpQr;BM#nfXW$f7P)2ePP`s1vj{{Tj%B4f{_(xl;=1nH!4X1$|jusB%qO^
zIt}HDCVQTHrIe91qy#C-Yl?MOYgC4{x;5<6!k(0c?bA&~x0N%)W|XGx<|mGu=t)ui
z1=0)4>l9(?*((KeT$o{gx5CUHF$H(&@jqu}+35&N+5whYtekc+6fKTE&&R^e?~+Pg
z<=F4G+|VA)UpIC3h6{C%K8Brnzc{9LN<Y-^r*xFu63y+Mtm`E6Nm*C8&|6H(3f2-J
zxdf>0Qe0ohxmo<R81T0Lw~YprdhBB81X%{bh);lX?y*zU6lR-nrp7vOR4z?+{h&kY
z5cJNk$>!@vK9>y8POZRf#f}n}Db;8P(p1o@fZW`me*Q|d-&DqafV5%FRJ-iY0w<Kn
zz*XCTGrUe!t#Bm%dui48xNaCqR(Z2Xj0SnDgG6*Uz37fNsQ;@oG!-%h8f!pYYWT}3
zC2Ix@9Yy-5NWT)&k5I&%X|NBU5dVC_H|FdL-4k}(Qq=Anl-zrvn+!Th`bO_)wd(VK
zay48!p(yrh53Wc;x@~Z$k2=bYU)`n2!2@|iJWIYvK1mXS%1Maea%>-weV>8EvxEbV
zF%Nhl`Kq`1@ROPK7*74Wu=9Wt!>Y+LRb3yPD4{Zgs?@|#wdZ<7e#k7V4wRPtkhof|
z9;jWCRlf_=V&!EU(B#mW{%+NJ$>n1we~+hl!O2SQq)ARZ#Wk2T)V-ymKii@8(#;M(
z;4Vx<4zp;2IWboD$0epqFn1{-7nCggFj;W04)TYx;$U~)d61^-|0*S9VmtzCnCP92
zyHpmpJ&%(@nw4$=dfcrdrhLg`StrbP^&udcqnR^eS^hyd`b8aO;Vzf)QNE+py2?;@
zm-krMmUU-tJ!#Mfd#l6zrX=p`>n4Od-knY1+=`%j7iGGV8c~M$*Xw##ra1Z!IrN28
z!h+IscOHt!)MZft30sQ6Pl|-}C%z1RZem)~DYxhmZWtkVVOpIev0Gq9Ab++u`@dT9
zeO}!a(v)s8`6Z&x({rTso7?L!sHbA->4*39{rdrCcV4_XAX!}yhlOO-s&v`cO&fjD
zVX!C4&D$Jq%RXH*moC02*KWoZNW248tKK?jkDh-Xaqc(7-zCzi^liFy@>`JFKgzk>
zONo!z@?%^=y2~8LU^3F$xB*DKy<wAn-ClaV5^Wwf@BL*0q#d5f4b-&u<c~B#y^#b?
z0d>s;{lHzC2w~&v9(^WE^K*xpD>mycF`l)Ubhn~7*F@|i+=hFs?|u(w+M=NeCv3SJ
z$GdFK@R+0_DvSo?ErJ};*(FgtBMP>E49(S0{w$rJ<Ju(|@7g5~^)bJ`pa^9fls)$G
zI;IU;hq=x)=IT;#1N+V8leEK6zpOjMnWWEs6V8*kWB<h0`N>l|7O@Kn>5*>qEHTD`
zqA`xx*)AKD9r}fJ;K5W5W~4w}45>p7lsAr%ic?Z`Ifa?&Va;6cjDM>1T^L?mCXBrr
zDsA->=5^<aSVkr{uMRT$Ps6O$G;#c|RNK~xoF=L}wBZHW>*$tx#=}^^)SaJ5{_8Ne
z>%ID{z0qoNd55lgZB0Gp8S~^(ZOJEoj<A{D+_z=hW3<-sSy1wvEl?&MpNcIXpge19
z6D2kT9QJ@TIBRm^8dJK%z?r#KQGQdVo)&v1L>Q+5DKauXkI+TNHxqKJ=*p4ooNLxP
zL@flJxK%M8RK3_vMT=V-m-@Df82xzkB}<TSZjt7st)4U}YDkZHE$PXRr(UmnT$<7%
z2;(&x+fe7`SrN^vvT<#Q`=kNqGlU{X=jLYS-{?(?--P#fHyRn<S2}H~F@3WltX>Wh
zb3$A$$hWOL8X3c6P&Z6;Y{|ZO<*zz*JJKVI6T)#)K1{aZq{!mRfo~9>Auk#En|Re@
zoQFHfnrBA9sE(|173Rs9HkG^Yx*t~y-@t6S!@J51%7Dga>NPR1wg3$?wu9L7YO~-D
z-7dq)vLoK5gxCh=_ZnvAWO`)bPb%_>MkqSUXdKe$9FhEi=jkSCsXM1`(B*FO=_v8~
zaX;uGzSQe+fb*r_r3Ri4ykQgio+9JueAX~x!Xo(m2=SQHm)~3;eBUULEGy5M87L_t
zMGMko&nvl`)~-t^M0dSJ7rSAN_J(;(T1{O8r#QbR$~cw}L$ync@1uT}gX{01`@Pzq
zvfz)14$onxJ|eWqYbsGLJp+m+W1Fpc<s8XsKT}dBuz(p|ycPX>vFz#Tl>ZLsFlGXz
z%kKOxVfk{B#QJ7Rd4}3SBIVuDhPd1z@Omfq1J#h%dcjSzu&`)|XDCoAl)p))a6+@t
zX$|K0JYlgdcmZN`Q^tNhqu)TXO%!JqC*BGZ4-#|k40rC3AI(?&Da!8yFEm}EGCH7d
z+FzEf-(&t~H<MYo|2?H^9{{@lI<0Qdy3>?14d8~3`_vRYcgC%}We_gcP@ZtxEz5uT
zjh~**fb1d~n##+`FO+d~<}pxJ#YsSy*T`eA)Q`CaKZd)v{!;iMSWPxwpV|g=3o(?%
z9G#+8_BzN@E|%;sU*=lgrG;K@CPx1sY`&T5K)-H%*}AstI6ne4KAWHlq_J|tXF*Dn
z%1qDjkV&UoD$_JvCW4=bzf>Yv7sc=wXA9CgP1Ds4`o)I7!m8X6h<?wybs*o+ezRuU
zofqE9g=iOCwz=1`hVtj@Hnr7{jxlYnY9Cj}zjhRe3ZzBcDZ3V+Nu6`iwp}8S;Q(f3
zCUqu+HSu*v`<3nZs;l*?n_G)D4-R}hEd&+ZmY3O=wwh>*G1<dOB_z^BtAM2D2Ex;y
z1WJ!I4XoMKmxq%B4Y^sp4d~QBU!L_tpbfkS8$l-qlO-FxNSm{ca&kcrsfu?t=OdC%
z%d&HrDiu81V8VVO)2sDs2D2^9w#nIzdfUE19gxaaq}9)5Qn)?khh38fGkJD(puxG|
z&_|t_OGl_N4->=HPI<eo{E&K<4Rx-1&GkMc7Z=n-x&`k43dWsuZ9n}DX=A-tk!v|d
zh&V;OK9vC<pybylf89-D{jS5DDfU>VVKC6oAcfBOdFrlbm1&qF5f7kqQ8Nzc6;7Y$
z`5SO%aN{zT7Vr2(wubv^SW^Mox;=Ac8?lk~2>QXNJj1k{s+y=3t04dSYv@z!{5o5+
zIzub=5P1coXBZQA2m_ct-#<Ga@O}R^eZc1b*Y@-B*UQ2kQm|yo=&E^MS+y$<eQKp@
zT*!l%Y}nD7XbgBb<v6<z8B7q6zk*Du1*adqVov#9x5DL;`XndS+0->@?95jK$s;#Y
zdB-2RTODLYTt?GbxyV<u2Xf}CNx+_Gi2td7Zadp-{VDd`o=TKw|NC<SFi_(`i=A#y
zlcd^^k7sQy>!f`oxrM|phQoWw<xM7(IX#^iy;@eY=+?=94*kc6{g_o?FPF);(qM}*
zS)dijsZqB&%Aecb#Pw;`x1oTND}2<T4gT2Do<#{dMT9LtPfrJ3HxGr_Yt^mApmOEX
zplP!T^MA+pN<O!67PA<=cRa$5m(I-4sfnS}-0Cn_PEEfp?rxZFo7gMR1r-kvs9-{`
zSbEwO(-?xrGg@F$$npY4^S9)4IpJQ0xM}bgzKAtlO+z9e&0<8q2D2EFyIxMOT|^qX
zte`x<yHid2vaH3ikj=73SzHKFn-myU(f;qKl<sLSEpNAD;!99sqT9ZX6eO;nKezt*
zx$hn3FVBmU51cIW$S@{MgZC_2a0R8BptNNJ-imF*Foe~PZKOmb?)ct$*=w%<TH;ih
zVe;SLOjD218DC!j^?|(1EeeKSVu;8)bO)#nG!F4F&|<84&9%_B?uASc+o_&=nR&3F
zVV~ZJrhqF3RQG2S%j8z_c^@lPRAd`e@16I<%h12}atgHjS@2{h?rxsEUJ0=*S5;VM
zuOW7l8Yk+9|3*?PdKmPpn{cLo#;HZ+GWi>odqp4ORt?dW>bQI2i!!Pq(z=E#YQIw|
z{Tc6nao4Z@vb#q((W#BuhmvohfJubH>N?Erv5$e0x%8a!nhKir!7Z!#WNVgrw;X80
zjeqb&;VJdKCsB(nX=`I2bSbsD941cE9Nd(?Y(Y8<<9WB#<#W#{wxUmh!BI}NlaQvz
zG#S;lAjO~LsvM_U^YPlsTQ!gcP~TKM{2@A4NWO()9~314OGNSwh|dKXM(;NbbEZWS
zcq9AHb^!zGZ7vEl0IjvlZFfPd@OvSx?e`2CDB12Ban9Yb^Ou!nFZ=yljb&y}vx=i6
z;*;t|y`>!c|AswR+OUg7E%&A=A%S9L^+-+Bid0^+DXXc6W&ULgwVnEtt8FryI*!z(
znenEx|M%5R+VV<S1by+1@-*zs4?Rz~n?$KwN4dy_Wg9y8G`xl}xY5~>cv#fhpiQy9
zMp`l8X6iAY8CQS88J&}ka6aF<Tdt^{Lj1Wo?M(EY^yDwOKSTcMPx_t6ie|V!n*5TS
zjz|ri;MHQO_ONnGFql^dxgVE8fd>VFFy0{;_`pnVpt(_SpCfU?S3HO^XrjjbD$M<B
ze(p>dcl2!9-XltWig<H&fqIer9!^S^DfKYU^8?f(ti6$e-o*(?=-0%a+XA%u9$mja
zw~Bils9XGdz0|1NEXop4+X(;a$ncI_^?^a{<mFvh`GYx~twEbuy%F3=WJ{v&6cpq8
z3`Hgid1rO2f!wtCq(W-ZZ{Dw`-5SP5lZUTphRlN7UGfA(;A1eL^;Vd~tpVL-QcP5@
zX}*j;s}975v7$+J$;7KkM;E$C?*-l}X;Q0X1JpwSiF`$En`B$bYa}LNsPz<V7GZ&z
z>qqH5YJ=Fv&}b~1Jc_f_Uwn_IaW@J2FBzZBqQ@uL)lH9DT*IO>9UFWd^0y}2R1TxL
zYRWwgZNG;TdfKl^AgFzJy3wj)WMldMfvet<recikgcX!~UJg7@k3lm;pgW-ka~;wT
zt3TuC-Cj26j#BO7ea6VGOy!VPZ58O}&A9;XcDvKjx1V)tV=)2cSA3Bi?kB~(x-e^(
zrBhT#xEQmlhtclUWjPxkmpjPb@Xpx4>+Y^g)Bvq00g(dv#G)s|eH{P9#WtgRllp4F
zu_vO<mOLyf+K|+K%`)0mq}~n6|JSHK-dCd~;h?;DDx+QIL<<@10_)rqjV6#>3T?oO
z&zuaM{w(@WSFN1qFX!ANDTvRT=;;=miVHYUo*5TY6=f1eKsUpgZp+mQ_{0VzF^}0Z
zwguG{bvvDC$7>Leh6_$Xp5LCkLdwjTdLd7Qta73ytit@d^?g6g41ay`$3D(B@`{db
z`S`BKCy5700J^&6yC$5=#k9NRTir5QckBK;jW&ZEAbOxoi(Ym7aYvGr$mS~;(GI<C
zdaztR8`+WTYP1vWxI-UHbn3k0=Hx(0twlBgy}Ugu=G>D_XS>ray6Gu665&=y2^vb_
z$U*9#^70QXgR@KD-%3^my2M`TK;q8Js|l=eQIK2CxsV2!n2Ngf?|nJ)bP#}+@26<&
z2op$!p<E+7A=pOcP7{eus20T6*S|jBJdx!hd*eu;mhV%Z2_tnGTu=4&KSVd=p3V*J
zbep=2q_8vI`3{Vfo*A?N>Zx`AL7;(w-5Rt4lpUyaI|cSAiH=8{I?<&zY|=w>E>O+&
zx%Nq|N6_>=hQ=i;)~G@c!=;L11K3-cKxUR)$Sp1nLo5R1)MpM-%5DxhRXkMAkx8c%
zXv>iwCvpwDAX^>#t3f9fJ@G0*i5Ox%%%xZHbQghf9V2DJsClwK9K2s&NzEFn@aeQc
zr<+~o3jlKFdISCAE0Jb4AywFjCXfqqC$p}e`>W0}XZgbvwqQM3D&9de%b$jn6vlL_
zFrn14Gbo;Z6~7K2_BzYoB&_J@M)Eo^bK13^tlX^%^6Tqn)?IB!3xB}{U5ijJD0xZ7
zff&MzTOH)2cob}TbhxU-H4*CUC=>r^47Z6KU|DguD1Mqw5sCX2<bVG2{q>*!{D*;@
z(e#~5U3h=}HI9G%^;dlHJtc6afKzoj$&e3`DNuHs^pZ~#c@Pck0=tf)7ZL+bH68TN
znk8@d82tA9xJ*o>rx($rA+y5@klOV@@ulKHkL&K#!E)Qo1JbzM-j04<f#-?3%WM!6
z^(4i@P(1M+_vqq%W_59CRzQAJ*G+KL6VyM&Sh(vk8>%TTwnZ#_B@aTR;uvL>y;=Ie
z6({NziKAcLDge`K?y5!iRc}B&+L(tVL<hFfBbY>#2N9kIn3sw2T4>$qA%|?K?E|Oq
zRGFtc#TvGoh%sv@e?D)P`dH=>Q9U1ecbM_47vVIVV$9&hSi{CUi95v+s`c~xW2bqn
z8d6@{B5L6<#D`K2)Dt*_XJghvuq~->>a557I+QM)Sm9h+tijG7o#Zy0J0H$ev-aSc
zxLYBJ4$0DZwRg;Mm#hk7K#NI|)74e4g~D_(Jv3)eI<PU$=}p`LO0n#MEP{416|?I|
zH%zVLe9h;smdlARp9Fxxh^jk`g)eVVJcfCDPH#{RA+GmlIx$Ga<|6wV^2K{IiE&_b
z26B){c@_wZ40jvC1<cY3R9vKwWVms@dkZaH!GoU|30cFJ+WT7_<j*avt>@^6ZXDmb
z@ws>IWjz?$+Ea%y57gMHom#N#-A{QBT<={T3$8;=Lf(USnUliI<7LHe-IX|LYE%v+
zx#bc`BG~fghaD{4{9$hZ8y};8m`T&;quircqoJ9YAEm;qb5LLu7cov;x5f>xllA5|
zXkcOt6UK&IXX>rNj2A|OV}!{tMxQkgh%W6vl{LS)kB{j4$`3fd6#x9=qa*wwKmLi|
zzQn)&@mK9Qm99yA#k-|>8UW{cLP2VE!Zo4nv$=GC=!9I)!V`VCjv&#t!fV1x3BsB3
zGVzMftcca~GhIqM!yO>&rm>GpPx{Q|Cc&jZ>|>$C$==E^(P1{@JoumsY|T*7&^?*;
z>L8@5wS;Efa>HkfvKzjNa(X6N^t)O7)$yq(JVbgX(GLK+R)j(vqP+?cA9vc{c@-kX
z7ZNEM4RabsOo2wF;O7WKO>~_J=4Wla2Q@rRsNOendiy!JQg?xqA*OywaB+Le#93Zj
z2hydD$`a3@zcug7f%6bm`1f1utfCJf$if-o4?BSB*(dIj;9~>MrItFCSUsz+V$@?b
zbyR<TGI=xtwgBn}<PK(-iqe{izAyfeQm?hwe0M`m_VkZWVwWK2N$dpdd4~AEG>L5u
zsU{gxaHN{KWdIhMSed3XuNcb1D)Z3*<-Z}PGOYQ5+f-X6sVPKf(l{AkLdRZ09^GO{
zL&T-71N}yH8?+6I@$F7%msjV3-e+pvH1RQnJdI!a_mna`Xf&C^x{h-(;`%eij8_kG
zx^XOx<tpg8i2@}H#n5Fan^5{OwQkJ>8KCZf6s8`>m%&>-K3(lAPMLpA-4fOah`h56
z%0&uf;s>sxY^d{vi_;GC_%LaTqatZJO27DNI$qpZ^E$|rW{HoF>F&9n&_L8p@ps(!
zJq81pLljmCHzKRA!u-7bK~3Tr2_X;$IxM`#HekFc`%zhb4*gM<)sS{d@UpjT$_+s0
z+Z+BQ&9*n-JYRE)(=OUGMd7%V5Q`CU5AW)TK&_nf>Rs-VIRpAbK=XLRh-g=h?kyGf
z-Dur-f9rnf=W*QtTi8>d^uL1bE=FSzCRp4HE~X$X@s|5r-JoB$J0(~=C9ks;<HJq#
z<?#~ONxjs&iYE<RZ;9U!PsO}LxlVx~l31VQ6p$wR`uaM6`EwKF2!V6akH+sT{TAnx
zYt;#-E$ORSp@P=l`#!gv6gc(#ZYH9;nl%#;zidY}X1^jDS(vf6`(9<k?1=Cx(M1}j
zdJUX@k)JZRG}3&4^J_8ZE*}U4zMkSA!^el&$WOTj|Cgrkw1^LE=tIZ-5Y07+WOsv-
zQpK1*XYCwGhi<2JuimRW^=Ct@6)iGE&9-b%&b&a`B9+o_=Ri|VIP7b3@8^i>kxx<~
zGMKjSeRQ2{V7b;p0Kg;(kUGrstZ*XVxttVE-OnCfhC&+R0%9xAANUS^hSbPafw|9U
zN?q^{e<JO%3dkGS7E+>6kg!wmz1M03G>iFlDe!3kno2Ci!QcDD2Q^yg^6c&00gsBR
zDynOmy`d?0r0lH$?(GES^tn1gR8pEyMjN~)MaEbMeg!0I+ZMQe8S!F*nVKcTl#lgm
zt2W31U|>*L%4-|LOG2g)Pf`hGeG8-wpyw^%iN1ca^lLm+Yl=Qw?K=eXvZMwNuRspO
zjmf#z@ghK)s$rwv=HBvbOwaeMciM+*O$nSQn0P_ZE5+x>3Z&rJC(on>YH&VS5kt)&
z2B@Uv8s?A~A|mv)?`)g6E|Ev-RlH3Nq)G}W%1G{<m^A9va{56FJWN;Ae(vjfHWsAD
zAZLsr9uCrc@f*c+oGuYrR57lnvep$hO}|hN2KACkmzpffx>5`a>J3Pj&<I~gdOD3c
z)v>SVAM(70g20!?WHDQL-iqXXl9#UIOj$4T_1DsA53c+|VH2@xAMY}r?lhdi#-jmE
zpgE-iGTQSwUO(|9CHAjzqFFDx`W|{rsc21&hZ;PD{DNXojYtu53(}L79}}hhV+2+}
zYC3A{Go?jEH(4~uB#67h`SV3VU5Qh?{(6=2>FLjt5HC;fPfx<x5!vLp$Hy1@vazVh
zp{PnYrQL}~s9=EG3FNU6Ea!s*5%*$IZK=hpYR{&Q==ICgQWzv^0J!?N`a5-mQ(1EL
zui{)EB8!)5?eMjZ<$<#bmDXrl6IMW7u|-uI&}TYC9lqSh*NF9LM0<GO;d}YkuUm~(
z-G9n`r;yB3UG$^{%j$503L=AQv}iX5ofy_pUytwpQ~dL7HtO5<)YoJCsqK{fd=04u
zZc)j%sJNwJvI@*HCfRXLiGJge-Z>=^+#6Eww%nlnou{giYQ)*AerfXA{ao%#OEC!v
zwhL&@qP=u&(HxX$ofxC*NK=r%B2z0rla`HMi`HBtFm$Oq9>gnfL=N@>sEm|2K>l;I
z7m6|`^?7^3z2C@xqEkjsKUx$Ko1jcm+*iCrVkQF1s)bK|l<cTpcTs2v4rK@D@z3H)
z6(~Woss1HO@}nHIpe{vEbIHq4Pn|?NGCx{8ed01JP{VG$-KU&RAZU9H{g<|B&gWyS
zEM997uI!jGc}_xk8qaMzP-mZh*pzVEmK<HFE)lPIsgGdB!%rljb(q6KO{_jR53r6u
zI1HJti7GEJ>c$nvER0i@tvYX-_T4n`+GJ3yx$9mg`VANVOpD*8pC9x~p}c^UE@}_V
zqJF*b=2nMU^P&2k-PtfzNL0$!$2G?^46`OnlmKAjsjPyG)_i#xMz6g0YU%)7viN&v
zsNub*{@J8MX=v1>a7lSeCtmm@W6=UMHT35vXnynil;P9;h7_m{l&F-^&#(I6Iub%(
zaB`||WATcHTOH;B$<ks?zYRv}%@<S_lRZt2+uj&P|5e~b>24DDtOETL&kQ)L@+o2q
zK3}B!K#%KpDHu`FlDM2iJV&g$?27bLnA!>|Fn`J#c%RbEwB+$8J#^icqptcIO;Uf@
zoC5EXQwKP0m-@j9oZtI6$<3P*Wlz1FoW9}Zj%1Hs=bp5vl}D1AFX9l)7ZK?vTX+rC
z?!55Arb&sKH9u3IMhrVo>GZBQ6{Mz5Jeq;X-<UlOUw4-|-fJl5)YCk<WAit;Ljzg9
zM`oeYsB_RD;9wwmWkgG&`mSQ!XQZ5HfQUcSJ{{aj<)UwLIS^TiE*+BMJcV1pBwN}#
z%)<=Hv<2?YiOU|{R1$-dYXHHo<!Mn46t+yXFfB;^ETu$qQ6<5a-)_zZU8QNp-C-=(
zz^-go&#@8?2F7zcAi+5S8vi{|!MhLk&SSf7naVAq(*2#9<zeLEoj)m#>q3WCai(X3
zDPp1}t(dl!)(Lw+8-%>mnSu_jQ`Ggen3g*wB0D8@n4h=d&p5u%27KQeC4MC7th%Xd
zRrK(ScY&r<uC#4H?Ll0Bq@>q!N6|1bI@2+N;<!%|pHpq-+_<Esx2A1+w(jY*Ux#!p
zgC0az7w@SM!CECspff8J<WzA5GyvVU7!-lX4nt4_OoYdRaKd?VT-=p#dUowkzEch7
zW}Rm>#i55M>IQf937uRgtKu6$&5p}?FJM9@rE6OE+k!CTe9Chv@#?<=<#m|U{$hGK
z_|VENm`%V{IeR3DiI!SytJ`z955I>$Z*L#!O?<q$xavMG{HWOuSS7}H?<JXlN|wtL
z)LIRp6kBAPyj(e>LN+1YoSu|j8oNQ0Ke}not3OZaAtfDJZz=c48b=G9Hf2Zu-GT4N
zN1xFoJR`yC9x$1p`O>TP4XTKcp@Q@KW~Y2(xR3UAOd(!BBi#aRs5I!@;%fSU88mf(
zGy5g33Fm9{M68WiUtR7Y_+8rdwTaeQ^5!QZpa;(Z$@uh0iLf22S7C@iuTBN$Chedp
z2<gFk0c}D^rQd(;ulzLqe(XUJ1HDip9xL5(Z!gh$XCEpZe1EIj^ph(3pSK<=5%J=@
zTcWu@CQpw`(RRLRTZjw!6)?@xfV9v-)P*soE6$pYJ@AUGNdw>;AK5+lta`R~^Z6O5
zUBV|zhbwxi&XzFMykAu!i)w+1PWFMjqcxaf_grz9GxIBIB=H6a9cwKE(AhO{$cT3>
zICp*0jex=hq7%b1k7yt7dap6{Rd^?UF{-hNf2jrPZUdz0^Pl?TuI?+OXP4@Xlp0~E
z@ijjGjDsFHy_^?cCcftupk9U4{SsqjKMd;GQZ2;KjWC<V-_4b&d|9c2dM((j=cBbR
zur3=_WFFAg^>?g#Ta_g~#p;WJb1iy}Rv4al_F<V-LiOo236)mwsd(^%kM925^31mD
zKG5B5TG(`Y`XTxX<zG=a`>a<F`npsYZNRyE*2@y<1)N1n0*#RIp|S;WL+GZV-uzH?
z=GJoU7wG)R)T!k-Jkj25z&SmrnvdY1-r#kqLryqtaq8t&oaK5C7#JBqVtW<mTp1Cd
zld(|rVPP|X&c~i8qdL`y)QsIznAqSbpx2Z!3+a@tl@d-@-Z3{idiq_Up>t5Ll{TG6
z9bD=!xoFd&rwgga!2-q1h{$BqP~2uyqAB)to&+WZ77rz`fK(D^ZD((Fpr9$X;l&+u
z6t~e7XQXZ!ZKA}PjgAizI}B&^PiLHHB}e;X73JshHibE2@*fyKXP?#Ew3~_0#g#vi
z*?IImV_J}N%Yc*K6t0Q1w>r|#TaJ~7S$jA0*AC0^YJr+<s<=&&GI5!9U^!1b7b&wK
zc@^f*&rMOs@TsqNsdZzfF$8l*7d;d1=>-~8O%9Zth_pX3??8VYfkf#xjQ<s({MUrZ
zO`H>9nW5kJopb)t7C1%G|3)6aLz@Z9M{ss>liA7Z_MB>CNI1<54)2NEj(&YV1iEX`
zd6bnBkSL8J?Bm7o!1JUa+Km5U9p=woKgm$+&u|5&Scc|l(nUzL>^RShIh_0(ly<L_
z+N&asH!1v)?347=^v1WRJzo1=sA}pV$53)~@KGgMCb2YFs@?45$;o%BU`)P~ofm)A
zr?E?1yB{<a3r~G|F7M8G_<5nEK#GJ4E~pluGpeuwn3~DgVd(zdq$Z%*oEWu^{=H2)
zs0+$?;O9v!qI{RPD#p*Bo9yqs8}l=7?`rZ%<Dd2kfZtYmJ#qSe7DVdUpdgp=?5@|G
z_#0*N*ED=tN9wf?emMQ?cV^YPUo;JgT4eGC-=T(|Bc1O0D$JZJ4fzlqe6G$krs+#K
zd74W1oOOhNcJtE9wDgTQ$7h+O#q?X;lgR15jk^0Y{u#{cyIT+8)vwS@M?e$I@v-L$
zc$!lHV|;XRMo&-@2Zi$LR61c}&<3W}R1S~PQoztP&*&CFsV3IRHk{Q@fAy{>_7Pn1
zf!Gy{P;7(pi(cmNvko)j9A1t&Yag^Hy-6bZb{%|tM$A?qHQv&N-vo4HztxZZpUaPD
z?$-b>8|mW|j}o`2$zrlCKznFqxN#Z8p-<uMtt{F=2S6nHor~84d=d7<YtB<GwaH@x
zRhZz6q#m8rAlkO}r^(T7dE^B#7eJ(Oq16CTzoc?RZgs8$y$$bM@%Xu?tR1G<K1&Kg
z2#O=#Fa51rh?N`Kp&7ryo^!WtuG{5+glaj13ocyN!JCef!n*{TLvw@^uRv(S$;_Az
ze$L{sVZ5tD1IcSLq|RwT|KZq468x2?5F~EkDog-<S+jL*$h$7=&%E#?LF2(=OC+3%
zJ5C&_?Ho1dH31zd;wU~cgEh})Fzc09XMU*IHnxMBPtx?|0a^oH0+eqX(QZD)dFn4*
z`GSS?8ZFMKJ6S#}h5`xmo@^3Mso|tPh+cjGO+d20ETCdQwz~CSjyAxGa=GIbT=Me-
zlzkky-#N#I<~-my`e4SY+4J*ELLnP<(~VsTmzJGsdj~C9lgJhplk(()F{wSX1!-Bg
z*`@5ebmz}KMHs!q;9cATngew#wv%=us;}YvW=#fp;G)>5J3vnhV!U%4blbl7ajTNb
z=?qm0zD>izfOrD)i)L;!9zQHidMlNZeL|4q1C^ScK_%&WhjtitpGsD|mk!B1?{0ON
zUrV8>pSxg&kB^gNUR^&v`OeYJk5{RM^DNj9R)I9#r(F}!)8q8mCPgZb#@bB88gEnX
znMloxw%DUu-Kr?(UeX~2%zD_%>0NHyASIdz5D7Ak4mh63)tuH?CYDZudJYt-N#LP!
zY1z>Pv#mH@Yj>Wh3uxX}#4XBQ6Io=)BndZGF;*JNc280p^(9pr(5vbDP#$?6hLVn(
zmQ6sT5j{UjI~KjnuaU=Z(Eq05F4g=bDLyt3d`di>p@bulQ;zedbSwAidDv<*yORAW
z91O1$vvKcyjO{%OLlwV5UFKA(858-&9D(ZQP^7`0sazX<{J8KshWC6Zyg-T`F0|*i
z04*!D!z#j~E~D!UPaROgS?VbBPBViNS7-SD#Q!Z*gy*I#*#z|n>D05s6t0hrIw@-G
zAE$25sxcZN4k+HL2e`~F35j9jC!;&I>Q5qqd9oBNl_s5l`~B<bpy7Q<kh&N&+W=>u
z<5U}*iTmqx@uNw>l~)mjxOzcN4yjNgz7KJqa?YXxDLtW$K+g~Yzj#0cWY(>a%3*r-
z$e@W+wjHPF$KfQS_W%c!L1ZrwHKSk07D)MHPx+EW%v=FVC%=Ch)t{ME_$^^RBqlUX
zNa-%^qGQs@xCW9hJwqBdb86LV0J_^xgpd13<Cy*WZK@NM<ua2s;DHf_dryTy4Rz_s
zMuXC3tP?a}SbFLsmH=s~^{IG6`NR+&1ELeI!Q8XwI6eL2#|U$-?AfO0Ch5>?9Rn9s
z?TeMSOHR)Uk()GYK)=5amp-}qyYvN(_Y;cX(ai^~4huy%WvbLJrFL3?{&pWu0+yT&
zy4L%;JPS@C6LIK4q?u+$m`PpfKxOvKD$ukKHyUXJbM`N@>P#~u=pY4GvJ%DOdw%2F
zoPMkUT_XGK3z~lAADC{T8$U*%ApDD))Gfn8B~08-5Y}mcxp|53$%!2>t8^Q#f;y`&
zoq9*&a}tANM)3>3NaW|zNF6B1RcQME50*MWbmKWm0mZdX?y6WP^6Lgo{SOXZIw&*>
zTP}UzHg(kT*o1WYNHKq_#@APBeTe_qb%&N|5XPjGPG@_Bc@UAG4?QP%l}@LMLmg$@
zq4(_#zam(;y!0f9iXK9$Hx;R;0;eS=E!Tw8E_udKLQTp2QC(*G#3f2|O}^_bB(PQB
zNV(e_U#1u~Wv2TxoYeU;^^8xQPL=ldN_lyf6F1QrF@jnhmvt|EJZdE}w>rp(@-^Gh
zrfUe7=PDFkp=`1zX>U+vBwnSQs-{4SYFTb1sslaxq@{NoGJtw9bne7PKZ&8F&XGm)
z*hmY|5I=re_6+c!67p4`$rlwp<^)66hmJk-oQYQYtoK$$xm3Z2k#EkkCj5HFG3Uh{
zRc(EUdu182FMy_nH&<{Ttv-|_F0(03Y{}4ShBw0F&%&SqDcsOzP>>>159ajgHoo4~
z8sp5Hh;tM=c=-HH(oFNsV&+o$6QrLLQc69o`U+?QtMB`#xA+al64#qlRhbJ|RSW@M
zbV-mn5pD}O?*Ww`(FyQ(!yfpxw+H%dPf<xcueWeh?h&$D@kYf96)ZbcJeBKJl3}TD
zft)MZCwM$KR8_wuh0Ne0CPB(x7dZ&dn~X-t9N<-O9$lw>BBTdL(VB#~eyS!)2#wwn
z74Lw+4C+|tFh%ytHR3ct<>Y8Y1ooWsq-KUJ*X4#bk|MN|L3tLuC02LrnsV$+kwH}p
zP`*b4zeYp;UPW&}T{Y=caR_)x(c)gTjsxp5eQW%jFKJr+p-;ix0F{Y#t?A=ROIbm%
z0`IxQgDSKwp{_iAb#bE}XZEf>Q^_$E5UevQX=5zQUxZFNkFj4_{pl!0Z)#~Xa$A@U
zuT-uwBX;sg@2KMBbEMISK=uA1r2I2rHskU(oqO>iKHPPp*GwcqbFOR)elihmRhV-$
zI=@p%YKDG9_p2H)=6luS%hVMZ4aMpgF^O3hNRZF1<NS>O$7346xli8ct6pu{%k*SP
zQxjZo4h;sp#$4bGBGnvPa8hz~aqjE=1#SAiWKn3M1WT2%W#M@pN2WSX6VChz^Ops0
z)=7UL0p&<_&4N5HzV<wLGEBuo2Ae8l4IA{oK;-+D=a_bn)78e3i7}V>lu)r}AbA{$
zaeHL(>&E=K#k*rL@$|(1-Xi^kgbuZpZ3xoqJ&&G7glMY82kzjRq9kIo-s>U8?}U-l
zR#4O&0UI%llrbTX@5QWYOe-d>JqM5cCMcqGP!;MeqthvIDwpo0sx51mC-Kr<=FZD*
z6KIM8bmoh*&ZAH_MWO!|=<Zo>C3lomV$uP95QbT_sE7m_w5{X(ydhp1S<v*M9HpGy
z$KY}>^#I8qRc;LUu_2Mp5@ejhor+-?x2&R!&*szk7T(68pnf7E!v4UZ^)CEbt0{ss
zI)=;g2xu@f$zKD|VYp9B{nO4YWBb#_JhKHlbrNBMO+pSQ@bo=L3YRJMy4Hc7ceWD`
z*kjL;0ppYIeEfglnJ44cXMUbqK7rYAls3;%J*Fd`3NXHzUZKOl@C*be#m{?G-|9eP
z^!Z1PKJL&z6Ea8yoE`eQYIb5Q!7lZPKY64SVa}7l7A&dJo-HWH^osZCtFm-}6a-q{
z?5KR->qp2411TJ0uDT>mEUifivKY~|;2dJe_NRE)#hP~fQs#aMJ<*6w6uBViUrxMd
z(*j;J1O2`Y@x8>X$kP50*s|}kywy`ri~vR=lhZlB#+vD0lEj2<!U}E4Tj-x-&|S3a
z_;p|A4(58qG<Can@hj=}@N6012v()4SmS=JeXdgIP4!Sna(WA0ceY_|5r<GuzPc?l
z!H0&ujoten>UB?s{4)hlcVnojWXn_)%9iXCJwZ?af%MH8=0(}U!52y6w7vsw^Eku8
zIZTk#@$&jbue;pqR8>AHRAaIQp%V{4Imf(()I=9<4g2(gg>O-9!7K)<OmW$ZPJGK8
zDAlRlHeNkUKe^D<U+C3nx%DjyBX~@sK?`H*)hkL}WGyI<Kg0&%*3EfM!nUN)<<eBB
zbq6p-d|kcR!u-4jbJv({x&kyK7Ozv(7>(nr?)a+P$;~@A=GT%Jdu;~V*5zsv-H(fI
zUeJ8lk^{9yXOX5>oV*t^j7cqe=+UJ4wjuFuDu<!Kh@6v;uTx-7NdZ&(+eLHkaAy%m
zI;2i7J9!gDvcj|-XbBI^DW~@PW}Ne9l9dVxeZOfMZtZ!&pk?%)qFdVWbL7{@9U0~t
z%ma?E;-_m=&i`jozi~dZQAx-Byn+v((s$C`>PQ#CGkbJ{u6s$E+J;F)EI#ro#En`s
zxEA#7A!K5jD$cLldwj-+-#%$?4-*H(<mtKXk>4jI+;-q2k%R70u;MOTl;gCZAdUaE
zEiezPNRK_R_x3U!esb_p|A@Df3?&_vc7+?FDd|+lN%_$Awp<!ENTYA9dDF@{tZ3;C
zTJROik(5BwRA>{>)6Xz&ighiWgEX)XeQ@W|F3#>y$NeQUj|QEi{63H|Uk;TXwLZ-}
zh`ou1iI9l&gTRjNB#T2h)4US7`=JK(6O-W9xnk+JhyS#qy!+0lf(>q__@5&+$~S3|
zeGoKuQ-%4(2E$Tk>pau)agYCKE%c@*ugr|fJ<$dl36?Z>BdnknGIl@li!`_?=Xkh1
z6Hxe0UX9am^}*+nVQ@wIrdWS1d!8KSKvkzRf6^Y%PiD~wbnvLn#dqDNh<R3JT<(C?
zP@pf^lj3E#9ll5v8x-uoKr7fL1()+wrd1NDdt}8=P<{dBXVUA)lrQ5wK10LkpY{zp
z4T($%oh%<{!r_#NrL%9M1FaNyNIkowl(eb+rMmVJEOp-r@<Wy`i2TDY;ZKtXWr@~T
zqDL!wq`dged6Jbvg4D~~`C$Uic$@x>)Dt;+5bSt$!<;su?>4U9s`q90=+AOOib^-u
zv-^}IwHOA^Z8}w;`}Zh6VAo=iOjnU^C(K<bUc2#3G1kT%3aRkwvVVRt*$y41wSJCX
zQy7JbKSZx(cr&Nf8=(%{aeaKtqUQ{OE;iS|Ih`qLBBqj>MM0CwTSwL(xC8I!Ctr)z
zftm^4q)WkF;8&?<%C_lw65Xx3@GB?Eb9_5z|56|tXzof-u~zzG`FU>^l=wO--JLG}
zp}RM6rpn%r-P$>6I`>Iw075-b3|2=#At!~Fr$>PDC*O~EziuQ{jT6C;8lc8Z*@@zY
z;?7Cp$X2^ZH;Z4a^zLhVdF_);Q_EvK{h)bRj#H6@@R<anW>C7J7%>S>mZ@w~xf|SP
z3QD_l52N2HdZtN9Y`I8zFly5HMa`Hz!aMx92hLKWNk!YZiksB+pdMsv?>8njO;`s%
z3{SM`gLV9>KPunJaE6sJCPKR*hECC&4uYp5JfCa;ah@P8#`C2=;wm9y&UBxbXD}wd
zacQE5f+ju5CYUo&JC{VEI8Zm~C&@-?B5`$Z=oELkP3LT*w%n#^I21Et8se<Qya)oS
ziRjOQ?F~+J)u^!Qj<)Tz1N``VjQ`FTc@uM=yTJImFmR`ta|%9RJ-bmdX^OP~b<Sz=
zJK8h^J(yvL1{TEbCngcJl}Y!HekOv@W~nwrXAgA|5bJopQRCC<RAEx^_g8R!f89zx
zs+EB|?1L4Fdl{BWQ!qR_=I#3klhlP+2Vq?GnH<|1L3-hp{kc&8(4<uvixNeZ7`7y8
z`zZ4cW#rhzo<>cCqNp<g=n^~YbVb#y38Y@CL8^Gpjmy`4WpZFQ%(D}JyWxz4LU+A0
z(QS~Dnba&i-WELGG%QrJ=uR@fe4Mtbw%ysGH=J+W#7kf6G|0GEl;KPv33cYxfxfKr
z&i%xpc690Q1D*Jh=Sv^l1&HtCqYrhF<<Q$jT}Vlt#;?FnmP#T=IDc;L>A&tzFGSc)
z;OrdV9fTyMoc$`pW7JX%^+X3iykFck0Y|bek@{P0hyMJ0-#!NQasL?mh5Umz8SV5H
z?bFnRd6UkAcSN8EhmY0n<J<4%5MTM3E%kKS<K1-&4)NxEV{S;)+Rl>6hu%-|2pZet
z4#igGosRKDxk<7^DUp=#XwlDXi5+Ug(-{2{P|aE>!rzPCIaIf3*;N#lYQ3Rh%Yc}<
zXxj)(3qh%nELvT5+@q^Z-i}EdEb6jFCF_L{G=&ksjJ|j!$nDRq5K6oBfL0ft%H+|m
z;!hv96g7$me+c-fmbbe5cltLhl}kFga!(4>aHW@->juy<q+@qgrYakfngg;ay&dAe
zReYhZR(OO@eP*D2bkxBo3PW32?uc0m^K|dNi}9CJPi_>Wik><eW!$?{YWg?W@NJY@
zjpd!?mr@i4hoII8dNr;#1EllbrG$^{T8z6l>mU$SVPpJFm#kPtsJnO8q=+QBO9cne
z!?ET##WIyc4pm~wuWj7Zc*<MUBUbOKJk6lV>2B4;0u?<gZP}E#0cZB&sJJYOQ$Jp}
zx99!|9$xp?OgW!F-Tt{!p;&pjGv+os)9|M}dY~nbrT}(U@UdVw2{hPT#fph58fF}p
zuZURsc%Xdm@VQL>mszVMRJmg;9c-~ld4~is`sY0F`RPAid`SA>^xHKZP*Zsi)u{NY
zdS{}$VvcZzj<~=YuGasRPL5PK@#x<X()6C`u@^Y=J0P2Yz7R<rM4LdGS?I9u1GTG&
z-|pO`G>wJc1C+^W!gcbdOM?>2vwPH_Yr3WD2Bk*W80SNe9ffksSKh3F+FR4}RG_kD
z$2OHI9@3JfiDBHK#I0ovTl)C@97Ai78J1FIHzNH$MjB1{^zt_R8J>b2y^gCNA0woH
zLY7EcU-+)LpoHC-!_5j)aQ-2e&?&a`yj458DQM!U8}zg*YG1OksRhzv&_zJMB?f#U
z6id%DeLm^6Rbg&{riBml4_onpHjW)6PBA(Zz*A`gGiZ@JbBVX83X|asdw};0+SnfB
zL7pq5h6tWLzzL58tVLIFjvDbk~=DK*Sor_@80oF=bT+@V-~)*@j9YdXQ!mD1np
zI1f_1jA3U>uGB)DO<m<^=CHl>45VD|&gkFOAg+dT%C?jA)6=hWCcT~h+<1XH5|Tys
zB;NGFv6_x4NV9#QWv}4;`aUChO<iV#3kNJKM^9278WSYXXmHyr&2ehJ>gkQIm=mHt
zI|dzCX7}ag>P?@c=jew@sK^FPkuXP8FunyvvQpAg0{R?g^qAdPQ*t<($67s}dtFfx
z3MHeI4&0{f^>p!}z<hAsq?Ha>OBM3VuP0rhn7<5N{Kq(Ul=zDT|J0*ES_8S=p~IX+
zi6E)Ee{F(2m&OI?>6DFIKokqlNC6ZCClbj@4JBGlFq?60;hV~%xakGjp}eIRY4%0p
zf6Ywl4T>O2N34;4?;yRu%XjpTK4E6)eU$J*mTnVdq1C=Ga!d&_ONuFk$RHyQx<ooV
z%*IdI(z;E_QMzQ(^Shq4ML;(eL}lEdtm>3<W>VkkC@~lA&sYm~fi_bn%Yg6$)kmJy
zk-TLyh$*12BHf`SOf6?NXHWfg;x|f^llqxM$+277Hwu_aQ{_Q4;jE}I?-;Z`>Oeh4
zkmQcWjBZmf-O8YICMVci1`4dm>+b{o`n=hAc6vHMF{*#U&7WK2Ab06|NQtj9BxU24
zC}Slyl;Mb$5F$g$aFg=3Ej0}dRV}*sBxTGr@?T%RpJ>>g(s(}lqa7<cl{mGSiOMq0
z&tse--SC<&rG8v@?QL+b8aDT?C(4h2ejJ^}P-5o!5m`8SIwf+d_B?Dhm|uT8uDgnx
zXkaF4B>|fXZ8SYmeh)4D0IHK*gKLvwp9li8VhdZL*!M|m<nuIt7u^%Xu6)V+wul9$
z^S3(4yi2|J3tQPRGiUCue%suAAi;2qJ;1CCCvS*RnLD+D@$>LBonBB)M8qzQjCyHT
z4n8Nzr+^-QNfqWcmosNa<0k0s9?tsgHC1pG%`|I~WE^HUPX_LcN}468!`$Fx6y$?V
z!c&*0ho&hf0j47K>a&SPqlyRAC5^r+%q>oQSvX+&?dWC00B%Y-!Jw|U$mRr|7F9){
zEU6~mg7oOA|M&<#RY}(|Xp?)7JhMxLHC{4S9m6T8l_gf4tay105}Aox*Xb^^xpwqN
z{Nb5Q3PDjPwnx5tlu~`kLlH$Ihc7)#<!}n>WKeml!{iqz>xG-S9}zFDQzT6URTvM3
z?*xL%j+Gmg<@W57?^K8RlO@oGqWh)j+t^IFHTh0RjX{am#mh9fVu$jHhcJ!e)o`9y
zmvWH50$OJTnbWt?!vg`eV(ld*P&9OqX3s^S>l=UABlJGEXs&{x*BKK|J-knLko@K#
zL43Q0GTu$z48-KV72=&fmrwlM?jGRZp?rUcl*oNe8`9|$4*3(WuKe_RqT}6Q?AmCy
z@cdPh)2md2SrKqZwYvu`YEZ6sE=h{*;zU@74cdY;O*GOMeEX;GeWpwQ6XO!Aypor_
zrYwGi85c~Ea3&bTS)G$T6V7v(c#}5#gVS%~){Wnp>xw-Sb1E`<8WZB0my~tK6FNH4
z05w4Ux_NI<s~ZRPa@eT(*rc$?HANja>(W{T5f^rvWNOZCLHaU8j3c$Xjvp%%4|^R@
zm4_*(Nx%hR(CXe>$KE9khK<RhUL-m53FYS@N*WxB=6oH7r)3taV$t2!qH@ZGNa><;
z7oca(FDK6Dt(?u8tW|3woAcTa-RsgiP(#^5RE1u5j1R;^ufU$?3WF{^ddZ-+3Fz24
zBp$Ym?v-Z3dZ>gF7%Zy!+>p^cd9Msf306^lamQ)k$$P@t51@PWR~@|E^b~y(Ok#QX
zqpX1_0nMFSbdpg1tRqD?G-Yz{XwAnHdS8Ig<89M74-)M_RV>0r%R76a5@B+J?79?>
zGy?v)RdZ!TS+{ws>TVwu?NKBF!CE|IOcl%MjeknDoGRkmcpv-|Wp;~EH~!}XL=iMU
z2z6!&1^FU&=13iScu=%&0Xjq47?|8Fze$$1(88p7Cr>0hm=9hI>TcHZdw?iu&+k<1
zfZNmSpFgk9@s@vILOguYl@8dcw_tB5FVWXX8X}6Gs$@=zj!uf7uK|BUn!M#)?e5E*
zeZtG&*BkUyp>r-ZI<Vzig<Z-TAWY$*<OF%cQQE`EeCe0%xo^pmT55YnXpcy*W9+^q
zQ1EU@*#vY?J+h4QSG*eyJj9rFK!y}KGNhi_t8WVDB&VJ0Fr#Z~3tLj?e5yOqW>D%U
zM#GbYEDoD%Hl|3Bl8`9H%@v^O7Oj+SFSnFTozFx-9>|emOV5xxt>`^%0eX&+`1T&D
zHc<-7Vyrpt0>@n`qgEaYsa#iq#+N^^T#1y!(8;v?L!htYs4jXabB8v@axB_QSo2FF
zQJo%9LlZe1=GQ*VuT^@7rke52FZ(o*?<#!19vxk^zJ-;GIu}V?nslR{a$d;eY^o02
zD=LZ3oEx^9I~hi~rr@aOOQ*<GW6%lta>p-J4iq(WJfbZ?myvt=<8@58ee|cE)35%%
zT6Tb_q@-CVuOQXQ^7)2O;u`E!l^gU<QGVU{ofJ1`@=WYv(Cy)G$d=+^>ZOP-RCK31
zmWbjErA~%hK2PTu%Ql$erb=$=`@3DI_L>OD9TAlF23-c48`G^LC}A(cbVsh~r8Vaq
zTS%7$qRr+PRDslFCX)+D0|^VIVq-b%ayh}nYj`fP*;st(I%ZkF4)v9|)#1*URL?87
zr<SeDAkJ66Mq)o+tX^=-a_SZ0rIzYe2Z~_7M?NBMt=1G@o%%6NVheKdq0(y$n{uRy
zsZ=&0&1lr~ebn#s(dv@2L;7yzN!5|&Lxq<L(H<Mg!<g1QHmX9GlcpKxG0e1A`{ACc
zHyX$SR-{XZYma#hXn5ITz5(N8y11kCt!7Prr0D1mn0j+KTd(ovM?t^)6+Q9N%`eri
z_j(_Rn;5Mdq|DqSE6(l7);;%_)R%n%YNlM&Udzelbx$)R#PV*@v*lPbX#}bqclYq$
zJLCM`;QWMA($9Zx1NE}^i7%f<Mi9;SPoaN`JL}0lJ^!`euVoTWE4EI_B;tZrO>~qR
z$CIXoNu5~aW@;wIe1jc-#RJrN)J5aZqEoEJU~@e+93`aA9OiJH$3GnBCMZ$)ayo?>
zBYSe^;>muQaZSwz<1Q~$<K=|hnF8@AwA;9w$B{bKEr+QFbomUL_NnGAaq;`rM0;Io
zY-%6J(7lC%lMP2E^>ShexrFrSbNv=gZ98$31}%F|1!&@<IkOo!15uoG*`K4V!yGmb
zehU}9rxF>auY4%f1r4VpR*B->Hvt`J!J|*M-1dJDFU3oGH4B}p;_<#mYZ(Sbd2N&f
zMM;M!o|_EHuT71=aYaeuqnFR|hl5u4^V%rn&w7vk4a+8);_|yW^<<ENCO5Bwv5-I{
zlww#KihO+bU+$|_7Y!8>N1boF*HGnUttFh1*vTnUs$nEQgqt#d?=nXvMg;8x&b^7Q
zokBBqyptp!XD(={9p!IZadwC|UgAoZ3>FJB_0_q+nS<V8(LqFAcp@yFPcnl<Tv7%4
z^LfBpMHi_6AqYg<@W0t_cr`Dz>LuMat%(@QZjev=Rb(#Zrme8Om{O+!x=MRwnlE0a
zsh=3NGSzDh)#Cyv?}>rHB;u#Kg?F#hX%h?7`W|IdM!t-KJ+6LhEGLN|-&r7ATHOaX
z@|ngxuX&zqe5h1LcI+)!Y*b-S9^ezRs%gTxXU{n^LGIrwt1DYuVl++Uc#Xz)QCKvn
z`MuAKU@b^psy8o3DqR5__zG#$7A00y5#K0EM)3E^uihsgZVeZ}AwTuB?D463bN~;M
zLkf34zg0bBv+yAo@gKvv{#@Uo7z>SW8U6|JK(X8oNVF%Mx}%hC(jckbr`UoaZ^n&L
zch|j%4MHbMRbjzZY6>Tx#r0Z<Jm=DM^A~l7I^&9})|-%dg!2@i-gxQ*Tle_J#prpm
zr<%GSOOV8@%s`?Q>W_u&>6;tv+t@uljZdn7qW-P!0Hr9Ic1*V&Aeh<JlqBy2GZ~9P
zT_S_X4II?-Ozdjs<^&ytr?&hnn^e}Hexj|wyr4#tnd1{V;HO@UYLZb~fSUVoSICCm
zhj%~v=l9XQn=l3!tq}KZ*zf?>WlvKn2P2{}w(oRb2(+4lp4$1OdRl`C`L;Rh&`)1$
z;yBfWUJ~&_;1t09+3_S9$Ek^}uxrD)UFZtV28*V2o{8p+K@&=kK{BLv5uHj;J+&7A
z=;sO0Xi=1w`lmEBQruGK%vHPmMU4CtGmVtdLofdF(gUZNUwWTCr<y&hfoQw}uIoGG
z_U!14O0+|9)>q^B`w3qqQPQ8IT#Aha4bbu#{<wA3zBR5F7+`lUK&F!|dvV*_B*<Nd
ziTBefx-vrv_g`*M3H3I(%L4h)!l1mT6oS1$Hv!#ieq43!tqj`ArfI+e5-=a^*hGe7
zRAA(aiYA;Zbyj|glA40Q-1fXANFg*w$tMu2uv>Kf)D!P(gH7Xqjq!fi`OLq(nkJh^
zTBXe%R8es%DCuYJ{HA^SI;9?_leE4`ukO<+0VTnuOiWdLpK_je2R$F~>Qz^NeL;~J
ziJ$1ehF%ZxpVdBX7`(>37|jPmIiYa#ICr{E-1*^FhxvWWH$1|-qwbAM-4IJxDf~Qb
zF(|y8CZD=rlQnI+?zbp^wkUU)o~M&!5(V(-RO2x(Eh={;Ii)3U(e_1iNtf>X7IUsX
z^T8K!=c4GiCQdg^gKYsBy^y@8APal4-Pzqz7_x@>y@9#g@>Fy_ukE?wQt~H^Uywfy
z;H37JbwI>7${QZjci~ny==UvEZQ=)<Sq_zEHxQfd5S)$+?_SphvDV9psv_FNn?D|M
zrxu)Z2#O!NCX@$9XM2pSzS63*0x1+j+1;LJh#-8VMn@2+{QM*Zx0Hd3i+@7>l0Tsk
zzYKEf4unRe?}I_REk7Ol<^@kAcO=1AG15MST-~C6CbOOaYtlFisS>A=_X;eN9JiaD
z4@ZnJssEx1vOrmcYd(fS^b5VJ8r3t&Wx5lL&64_W7SD~7;A^3BlUN(tejf8a+j;Aw
zT+b+Y)VIJ$(sAh%<1LSZLP{gnk>X7Z=x3yB+s%u90=4P!w)EP>HwTR(wiv1`2>XN2
z3c*#NpEqyqAp3W!YUb+p+9$q@l+trV@PQUF?ThEeFPT{fl0gvWRAFwzpf$+oF+BKs
zgOtL8y^)Q6at3^)xk<sg3i7PuGPBXF)7%Z3;Hh3r!?ZT~?81fOf(oqrg1olw3etfi
zWLW#nd~yY8zD8vqNHwq^mJDKTO>sL$n3xCmLf?!K`xGza7}iPd245(3SxT__Sde%z
z&Sn!*6-Lu>Pi)=lc7XC{LK%IGGojVJBh(@$L0Ke`o9dx<<#EN&IP;T8-~H6z#Pe6%
zaHYT?kN)>#bh7}^(}HFUr@IYSYEV-3mpup4_@!$*J~dtZlZ_t)bJO$5a6ZSgHIyf6
zh0`8%>lpE@1~`*iN@Uug@(WILy?9}91I`yBegNB54P(7Mp}*{f?O<G-Fv9gbBLuZw
z#2B|$JjK{%099_nwc|ZaA<*RESfnrVj+>HF$WCDPi62aslR}h5@qX1v<o!R9v+q9C
z$ISOWd<=g6>2llVq|_}?cZ?n@2>%a&x-_EK0`zclbUdn+w~Y|l)K_RajFQf01O1Tf
z6-G2AZdH(#N&J17_0fk(AQPgFI_^Kl2xhyE+cVSCw*ejK6)RN~rq5pER-=|Y68fm<
zU^I)_Q?TSN*^}7dDk!U0B9xIYFFVyq66=bJPq7Ou$zyTzEDJx;!<?hsW7qq)3!mME
zczhr=8-p6h7M*+D;MRjWVin>SL^EbO(#*Cn*>M*1yWCZnb|<vDJL1_=LLMMhKHp0X
zNaZkfYYoFY+mrs#oDIe1RLwzRf&`JJ;OlOeSl>B8)Eg~8udjQx>_k1!lK8c>?0UzK
z5$}c|8dY<vgRJEw_INj%YY#$+-UuWmJX?3i*PTMx&<6Bv0(Dp(z0L|m8nd+UB$lGo
z0Z@vyWg}3OmY)u!C-#7<dLtBVPz|mfXKm_$+6O9Ssx|-nrQZYE+GIv5*ivL2x8)>x
z)@0^85*w`J+*2H!Zl^>BIQ;^Hs_Rg;Np^6`x~RAXzz2srKoxgC3u=95r&ib+@LN7a
zd!;h+hPUJ<O<a($Fbd?llkxw9-KwH^^yfzXs(~N4tQrZ8;<fL3D+KrsmZVDwG&<U^
zAQxx@dXcZRKAn@EeTODR3lGJKBcSP>a*aU$44j35ee5pKMiw)=S+ypmJlpZL7YhxG
zRyMDF!`W(C{L8*6KUU;RLqiF3Pg5ukyeTkB-?aor0OIpgKM%e7(LmPaIsLu}j1ED}
z@i_js=egSiq`_pjDc3isfdnVWA#uOrcW+2lzr9UEw{0<Grpk_<CkV4Ubm_3fpN-cf
zJkL~K2JHgaU@qU8`=1BI?(w?E8$Lcn0sJL$N&ZbpbLkH3U%mzDeyJ(sKf$rkujzH_
z6|tG&H6)fYqqs@c%{rFhfKyk)`CYMS_wq~szQ3%}{$;{yhk1NvPIXCCRC4hK%h}w0
z+@I;?+2H&tSAO?G*M&zi=+gwC*c-E^8(p9U!YD}L1b$I6pj!J(tx?^hl<-f$=s@LT
zjqew@S6LP&e+fm#=qC#tC%<P=ZR{k{SqECrRbJ=$%&hYQc@Y8Mqhp7ODWN#<kwWo!
z3-hyt`MIitLsrsam;fbkM0sG|*^lK`Q;bDYby7g1);uFpZ{D&??c&*a(j3sY4btzG
zLGg;tV@+XnX&Xcr?mhk72bys)M&85Ue_MMFTJM0)@w`Iwn|H|tSz(?cCI}##E#~zm
zIPNM-Rf@s-?KDmk71-1af!m7TsEjS&c}eM}I>?H5pHn*rC4fr&si}qr`YKc8M<P;h
zc?6p5`BOo<7HV5IZ;!1zL|5m0yB-Njr-popHg#Qm{<-=suxiC)dT)|d|8tI&-lM;E
zUn}wEW5D~odZ%<L)^>EVkQD`muEnqePm_ouTn<V0oREH<BLxS<_-A+Zr%cJ}RxO)1
z{Lzc5Ng~4`*DQ(`l{Y^^S`50f?fW!mo{C)>FWmRxeG?+J5E-B<No+tM`$a%p04>U>
z^z=V^pe8?X*r88uN#IffLPPUjb{2(n4wP3?gS=%H@(Rwc6ISBB|1eK@1XeyuQ%_+$
zPtZV%7eIhSj^S@VZz?1P$oK}dKn>BcOCR|mT`qI{^E7_EM7QrB{qfH~tMk-4f$8a?
zG7vy^M<%5^UpC=9yGv=$_LqsC=cWW`a|TOQo5iAB)j^v%dtU>ZRM7s(Y$fEd$W0>u
z@^K<+0LUTrK9D>+vnbafpqD?jzE)s9l5nV>CVKACs3(c}4by<kqJn1}L{dmONBWtN
z_P9$!===W95Z@#{6Q|=O-TkI9p~ap0??nC?Z&GEDa^M!_?Rr5c;+m>K_sGb{<(qMM
z{n${Hw!Zh^1VApAJEvTI00mF<P1QkuuV4G=)5oX&d3eHgy7EdICU7$Ym`hSqQ1D^a
zRNR^-QM+HkxxVk|?=YA(_gg)EQ)U{Yw{IPKQUt5taI{*R^xXdoxW~t-#o0?jQv!`h
zI8JKmqd*5$PTtUWt;w5xi_+Ik@@DUUf63cOR*g#->Q*|sb^2xg-}YB)|5<J|zBQ?9
zix)d&5fi(aFQ*0PU_7;EO_Ukg%{W-jdQ3crn!(55Efj<~B~Y-DAaT<MbeK%IyRI9c
zjq;!#PLW9Lm{xcE9@i|*nQW?Bpc45YxJUxAcK0#Dv|~uoM#x{nWveFY6iM7Z0`EEF
z$hDHUYiczOFw~KjG%33+_gC9Bv#S|jaf`OX9@SxHRI|BvOcT;!s&332RF#>vCW=f%
z!<bq^e>iKskhTmlk}_-!<GHUv?vZ$Cf*$LR<f4y`aVWMZZ)LG=7FR!q8LjE&u2*c#
z7dIXI!&F!kLmrASAe}T|9#fXtv#4qbdK>E~cW&~9YI<7!hp2rzn*Btq9=%M(0}PHd
z-B0K$y#RGas8_esqlBYqdXHa8n2E*fs_@(Aa#0QE*KLindgL3r1*Yf{gFp_QA`B<@
zhMh~JKy{e#hMjVwq**1FstLz9n|KGQW~@TW)o8AAW>t}|B|?gDtKkIH?(hE1iDBpi
zY6(N`Un@N1%eN?XP0}Um!H!x2<t;75I7rhg6_`Jnw12ZN&(^+PeAP)!23b0Ay%ack
zPRZ&d99D<<dGk8P0)qo7_fgZ`RaTmbXBZlHosTX&dp--C#-&H81?S-Ke9puD{f<!|
zyYC@c4u)9}H;W^SlB8+eqCYfL^?8o;NyR(=X`FaIcAonE{D&-K%bly#QZsp6e8ZUu
zR*UabVxh&TzqBTbyj>E%2aQUJ1q8w#-gDvRMQ4AWO-~nTT>LuBuUjAfUV0Z>F4A^B
zWN$#~h4*QBLzA3Lu6zh}%iJy%l+^RRq?O86p>m9Ih}X^Ke#<oYAfa;??<h208Wub0
zxrOyP#!cO=0dAcTdonBPlRzn(#N((ugB(3>RKpanEY}wCU7%E5gH<P>ZoyT4hP62x
zqz@)CbaXU<z*`>S6ggefhO?q6aXp=!N(lm9snc=wH7g{QI4zA<Ua;r!$FF0NHo7GA
z5Ew%`QXq3-231Rd=i|Nwhv%i!R2q@BvW{zJ@diq-%q$L3Hm#`Ph9Tv(c?ckg+zS2(
zP>y)rl_8s2!O5fuLIXJc4D(90<1sVBJ4H4<g-Ole$Rh{5r<wm+1$0)Uxq7*!E|2{u
zztoaqq>pZjS1f0uxvNio1d62YDoDS;;Yx}pC;efU`v{1YL{MoC4Y$_v`iw5Zr-hxO
z2K0RC{T67O<hCN`1i}J-8UD70iF4r^rL1=qBfs!n@m-!G?ZI$)eGUNX7x5j9cfA7v
zKJOoiXL06i)&U)XLEc(`y5TN{<)TjqzI;(1;GH8P^cdZF4KD0udvZYuitSNctfP#v
z{p^qIwu9n7y6oiB+!3Y#K$WH(i|-6iBop%_{%fl!KW|S<aP&8Ze?GboN%c9*{B+$m
zXmV9<pGhPZMXq>=g68Jb8*@@ma0TY)?Rh#c7!Q6Khi?0tUVi2?E20zB*k+jQh2^3L
z9<mpn@?#a0XJrJl53{WS4?Z(ErCp-JWv7V8pgxaP)N!Vc#<6%VH=oOWwNuX!W-2<6
z;quM~!&-(+*QF#`1JYEGC9;QflHSJA66_R|_WUMzOi&`8cmbZ&_(m0ZJuZQuUGB7R
zKzw^ma-n_hT4hNdwQt<hPJ6*y+%PK+`||ls0y?#~c%s7gO*j*j(>}KqQC`c)3fl4o
z^o&83=2^9iOEhS2vVwVi0}!`(@NvbT#-q&1gH;FU2gXOVi+}Q0)bdvp3Q|;aXxOI*
zr~f{cP5O<mS08In#PaC;??d#U9p_2|(s^{(%jj1pZm$~BbDwnBTl%xtz9`^<)eTU2
z8iI468{#1IB(`2R=r18j<bXZ-tF+10qm<fOp%{lt9}1^z42W}fr#*_Hl`77k+uvj7
z-#y&s_vuKXpf{>Hb+(G{RAXNg^g_YtrlTLlJB6*mWc0N1KCOHO<yT99w+1R2icOj2
zCp!4Tdm08}kA6hzr`(PKktZ73TcA2;o%4gqtYfa9sK>@MsJLO4Z)M!jRCmzMLXwl)
zls%`OIGd@EQ^9#+(;<7%8%DpTN~0kLvKZhI-4FMkIT^A6-AXEu)PR28enH_h%rb`K
zj^y3tv>ut%kv_OHGHz8_BApBOI+#=;X+gR}ud@DJP3$2Mm6%Y4_-24WJiqhZdhJaU
z&e~xbuM1kw2`Hp=Rp>);ns~sR=#Y^nufQaiPltI#$?w>9)7aGDKBdu53@2%^^7ueC
ztLK&PQaQYa5=Fdcx;{_lDL0h3_0vSXVwOgJz3QbYVVe0UiKGqR0&@^kmqy;Gi)e7z
zhgnaRWS9!<=U{~LT>*5y@O6+mMcx3^Q>`8W$go41_sB__)PsbE;Hv4tCcE6Cj`xb$
z?v`V`3d}v9WP0-PhC*%k^7Ld*k;dZ=8YsvI6}`dW?)Cf)r8bxf&hzZ}!O}NX#%O*?
zxVTH>Ev5Ox<oLrxoPNmW)^Kv5tMbTX+0Db$#xV@FKp7y?5Rk1r0IJjc!Zn~rpX;2s
zY3<{pJ5sS1V39sT>WHuIe+KD;syvvmlH`#F%gUmh7S(h#<J;Y8D3Oc!Fh145CpMQ=
zG5j#KbIt>1dUTWpgRU}WPTz5JYHHT0gZxQ}2v3O!F$K!;pmzxOM)B0$*YyV5G&if^
z;X81;;8bz#lM-q4_`rhQf^-<{%z<m?FEcByhcek!q_gI_kG^Yso*1*5O#x{xH5!{r
zjWv|?n;MH%z5<2^^vmbzMPZ1SLNetGv=}a171RVYxmpLj^fJ4>cILgU+AtF+s5=}b
z(Rj}z70FD?D>+(_s%;C-%V)nKN1kpeOAW#?bi(#ryyt=elC;Crl-E7o0Ssxsq#n<Z
z*7JMGqP8bGWRFBtvG#^hq;}mxOVN(>Wlw?AZICw0u_=WKa459c1Cvtdl;-BI06la~
zFVBOLSfmFVdT{aX?-<VqXVf&L)R0>4YiIv9z4G^U=f-e^vq_#sP=zEZgqM5NZXPJt
z1ayY;>&Twhl~*ZpLKlRfSTqD~(IRN}lMr9~W$Jbsc6Qm48phQ#M=h#&7HvMvo*br-
z`N403nZP{tFB_Qq3*Vp{N8Qe7+tDaFF=KjGWA-X(!cQQ-4?)Irg!=N(jrY4C^SQWD
zhxDy*`#2jWY~o3BB*2JNghT<KW92Rsar?w0U$tk-K~UoHh*o}U(bmyAOX(6KDk#z=
z1yrKNi`$_EDmiz9(f!BH=bwM@<&n1@Zj{h*pMo2Yv5ccqvOJ*<13}?dhq--1-!~Rr
zw?8#e<e-WZWbYlQ+*owaVW!JTidLM%_%<9K`0XZbsg2|ughkh@;_fpie)Tp<3e-`i
zn0b@Rp_YjJNIfu4OJ(M`@X^OdG8dH*zlYPvE@eyFfI|EE_2^>SIZCa;y+Ogz#f67d
z{c@5Dq!^{fNoqs-I{wnzeF-<J+e2C(Qk~V7QNKZ1Off${@J=loh$gEJb9t3EPSb$j
z^b4_Rd#g?=i90yWK3-DAB^nf8d7p-U>o9+AIFoCP6F;%vUHPVlN9n0Q*O4TXR{Uz>
z9c4BNAT$82H)=gH^F2AE7^R1HT*v2d3Y&H|w?j186qV%33q9z~zB)dT)*gk>_|SXZ
z>mmUaiUgFQ=BKJb8?xiuoL&>r7?s7M#|-yA7?vajPFLJd@u_bCst%M+hS`f=0hNvK
zr;k2KN4;;V7*Gz}%}Y$Z@>U1>bJG*1*8=6p9)d>*Nsw(0eMHi0o-GN?&-2mD`TeYb
zECg3~M+S$jMOzxmU{Oa(@KU?z7;1LxJ@-g#F4|X-Zno(2f-&D)E1b6FEYUay6bM}O
z^Mm6qiC?>T=!AIy=+0+l(0Xg`lz_q@HF}9<=hqg^87Pp^WX<~8+M^n^@l5s#PM42=
zCc_n^ylN0OGi7kcIgpK*oxLjDWLdL3OFOV1YA~_HF6@Yv1jWw<soWfXa`S6uMW*Z}
zd<?^qEHf#wD;}=jP7E3@o_y5Dq-T}p71Asi*@Cn<@=X(Nb%!t4#KEEAr6YC2`KH3l
zx}5(2{d%I9tZvjfEk2|SIg6m;kMWg#Q`Z+iSDJ;23y)j`A8#y>8qrM(_AO_s@25$J
zgSM^fQ-hOyT<9gL5PG>%UB~mJXg~<mwNrulb(>n=lKuoc?86%({t)8hckxP{r*hCz
zY++o>EoxYIArWCCqgw@<l{mH40`}ys<a;~zk&G$QW(KMdTK9&f_Z?@%_Fx3T&~P2_
zq5ASTd8pUh$+HHpuq5#ULaGHN?~3pti?da&`oBmvEP92Jc1Vi3z>mM<kCmWuN!$9j
zCHA3czh?bu#Y|)+)<Q8$^Igjz8$I2vt(XC*Py7bsT;JWv^*`cYlaobVGN|=wZsx+v
z64Z9qfcQoX4D`V%8`Af|r|}ocv$=;h1e#(Ms9qHq$h}}&mm>{=28)`IUfsG4m+Z6+
zNI?lx23wQno$Im+UfU*UDfD5RY-S2uQUMlaDb+c87c|QSIL?x6G;wN%TLa*P_Kl<4
zjF&z#6``|=gBC7Hb<RgjaQ8)qJQ&e(BfP!+na&k>318<vZoiMTZQuZ#05nhrnzA}D
zlkzHxFw-2=`RY$p-Xu%pQsReF>X<$`n%HWZezwT_E70h=-W%38Q2_POX!oHRXR$5W
zANxg=w%Ta`QNG8_J4pSRLHXrMl;gzvTEmII=yDxG5_y|wPToe*&&q^Hk#=jj7hAPg
z&}SD;@$JfEL!+mIRmB{wE1yA$Em_`jRi=E9qXVvXAHY;8i*3LP6(PD`6l&FNd*;66
z@2@AUsddUzGRQ&jJ}@_nGwTH+tTVTUEl|H#*XpvBwa<~O>rn$xpf2O<&e`gW$!Yb5
zjhdtEdCpAkl&CHTFO#T)piwh}#wi|WC9LUS1&AX^(*hO$2ghbzV1s$@?eYlx+}9nQ
zSf@PZH@SKFD`2w{a?3RUqPpRpxsLY9S{`Wb^#}@di1#blCD<cS;MCp3AXUd1W7YSi
zy=zT-@=ycKR0}}y7gO88=p&1&>0NC=iqYaBP&2B*VekLFMN<O9VnkIK%$>>OwU-kS
zWW(B{Uv|i)^tbWFE~S=(q)=Uago*(mfJ9LhU%0M#0o4be2h5w0?p{#GHf>4^(11DA
z&pj?x$D+c_%w?xoi)1S6r$65Q7=Q~Hi<v9GCgfVidw9R!pWpvpbx)qVRk<lqlSYPy
zoQL3R&!1B}Kc_XApSSxksNtn|L?$yCvYbcQ_x`391#!%)D!UHVAjxEj3!niLe2S>K
zsyG2e>|Up7IC9g|-<-IVRI;#Lbg$B6#1<%G72$Og&{;%~z8OCB$1vpI*bm{{+vb95
z=$fT+x{Nozj$u_-9D3eHL0m#f#-#!?5ghtxH#_pQ%E|TA;Pz9K7ZIt027%3T0Om~j
zZ)`Wy{(b`BXPQ3QhuPozzwb}u{qOj<d?G$oIsV<#`|oy|=44)XonR_aw9v4JDV}-E
zZu+jwRu$ZFAu%bp*mJKNW%>NQ^Dwn=T@lXcL8Dq_En^dLs&MywDgvFr{P{V%Li0tM
zCOq~Z(212^9@6q?jKvw(^?}Lv6hMug7@v2LeisHMm&EpVeCk={|E&bPU-qA_CbjLJ
znnpK0I^{cbcP}d+x5_M?5<tvD6ty*jW~xv(q*|qOZg9WjmqElg^;e8~h8OpHKjD*c
z?g>1kN68UYs0YO%SEX>N2Oz&n#1|PYTG^nVpEc>e?Daj?;HPPRPeC__HP4|M!2CQa
zR8g>hjx%1PPvg_e=-+a-&qIEGjdyiD**byPBqtle#($LOKzUZe`5Z)7?1?bXx$t2W
zY5EKwJ@ZGG?=DSA<RZ0=@S@uct|PwijPE9ZoEqAaRg7r$Ih!!-66L_v!A(S?G<AZS
zU%aD`-t#M@EY6D;M+4LwBwZ?kG}k9g1N9_no3*QXiAwopG77v>uc-YV;QZt!{Wbp9
zS*)ushPID@$_9z=<dPWlNyqPdEBF@WKAxjdSmNcWF8=*P$`8AFlLGsGWm37@hPt6Y
z@CZYq!|X}<mTgm>G7%=Z_f?#AvhU3pblY^XN75AZ`GG{eW`k<}GI=>`yH{ZT+`3;M
z>KDmJe~j6T9$cIXMzwnewR<=I)^~#TNJsSPK`^IT7jJzUIjWnpE+zbkJyR7Gt?|7!
zBnGb-P|1PHOv+S&i3!sI6`r1A<aqVbz)p`jNEB0*b@6}Yav603DyfM^U}$MM^$^K%
zg+H2WCM8g!xg^H~<8naytlsUP@HP!YnPK<?h{Nyr_5OQsZ-1s5dVJ;6Ywt>jRBj#8
ziRfPtF?i@PSi0;|UPCuBJ)w1*{@kWa&d@x@(WpJ=lbSHvJw(SAU-QLRnKrBj^MNu{
zB@)RANZAKs3S}dPPV-kO5Z$u4)d2pGk7swMi`)^$iVQ)H<`vbUl?9;$??owa`Y_#0
zb)5b98gWhxvAd7{BSaDsNI}kocgflVgIY6<#MFVhQOCb7F!Hof+2T#o5JiLb43Cq|
z`mQJbs0zwYByvn)#`-f~CtF4u4~0A-dkZ1)j-?%pD4$MT^>vhw4C^+L1WvLZ7=T9R
zsVg;_3%HbbA~&xAZ_e9<<{7hAN=r?GQ%)n9$J#p`Cyv8FFJ_IZFl%M!p?}@P%XOO2
ziF;ETWW{~&8PWQLQ_qDlsZw|6IndiAiFQ1AusZlHvS?lqF^i$(C=SyClTW~UktwdK
z=1lhN)9Mqm*<57q4}dlf>JuNEKv!3Moq{ZoE~zJ?5$U>Sr1|Lg!*)ty$E5YKavz9b
z-}k|wM~0y^H!o(CdKx#WflSH!A*GlPV!V5aGMq&rYI>O<_|kiHb=!_#qvq!3Xvvaz
zylaN3h?XPd;X=o|(Bei|!4#?iC+Q7=*(zA*3-aU668AxbB&kt_+0<sTMmk;iIrCuD
z(bGh4XKz7yqWB$JyrD!DX>LtWhiPhSpgxG@<Mxrtl4RZJ9al7Eo(v{n16ns6Z!(Ec
zoyk_E5R?W>>rjE!4RfZxd_?E$^U87FPmr#`5*-)xa056ay;UV#KQrFY=qA`ie(ILB
zPcP4Eu!FTj_-&lDK}P@Hg+c!bL$HGhYs=={s<LHwvd)Qf7o2f1$V2UpWlM}FyXMf|
zCO4|ZtiwF?USqesVAafg5bvs>WzXc4oC(q8<0VF@HrHY9>R|u#@eigo)5PX7v^nG4
z^-s1qEeT<3jPE*@%#F9!OtT+UNb-GEoH>oJPILKdpu3J~0d%4-r(RC1ebHwGsB_*~
z+RP+VhsxG?hfZK*jT<x6-Ke?SIQy78fdd`G=i*XDdK`UF|DXPUg-UQGwMw7i{03py
zsfU^`gkgOF?7yeoHija7Va&$yr^@<U48nv1SiQLYpysNA{l1yOoaRio2Wvm#TVGcs
zU?F1|1+GI;c&GsEIp6>aC*OuQ8*;4;F;HE%ABKCp2v>M_{nYINlhFaRL>#QSF1m|R
zI6BiU;L_ejJ9W}mtHO=Dp>(MBDv;~VyPKWZu`l`#du>U&j$H@=G~5hs&^a;Lg0}pu
zgTVWC@9tN1H)zRgrE-2WwJ*?uR+OOy`BadPpq)Q%0&dr6P4!|Rxh>?+b#G(XB8nWk
zBi)S%G@>Qnz!EP?yvOQj>228Q;yJx?t6O1WoghIt%+H7tZnQ6hYkX!Mslok;a2+Jf
zrl3{v3{VQey3Mt_ZkaHLn?a)LV_n?CmTg^_S8=>(oIq-|#kvQ3VY+jX-dJ0Aw`F>p
zuY~$a{C86+`Cn`XY!0AxX)}l^=;Clccj4L(=N7Wgr<ToN%0R*Ixn4eTN~3e$#x21I
zKLHrcNzA;UC6M9Yn3r7@etqxO&)TlY`|O0pt3|8Az=G&A5H!^Ae%;2mu6uh^v&*33
zmD{BteEQWIaD6{a^?!l+UvyKJ9!^9|LrgQ`Ks3Ix3Ge}_)6}Fi_~H6Xkx0fpv1B5Z
zt)wXIOF<9YzyLr%zrS{3ac{8ib_cBAi|bz5BylRKN3}1FE6KPDaDBQRQsJczww5*=
z#_)h~6SrrYQ&1XR$}DdbFBhPdKpT>ACs}nuOP?=lwA!HkywRGwH`w#-ZQTsAZ``u-
zw(1GtQ6waL;%)#n010?31~%vm5nY5V1sT{l8V{>k7abQU5(Q45H|Y3tNc_%0dH>|r
zuf_RxnDZVoKw2)IIo?<6h=dppy3~tLHj&evA!YSfcg9J3{VAUKKB0vO8X&`pTh7C_
zj^S1LsI|b&35Bv>aj$o=(YA~nf@Z=X@4$PmB3c(1+TbKT6c=o}g-J>YVcfaKb_)sX
zcUa?I=v42c*8r^q_7Hn8<}YXskx~nN9qz~2bF)70+EzCO>}h-<y2n28K`pk$9P<<f
z$1O6+cx^C#rC;U=(wljAqk`;!K!a#7<J;2{{vMzHm_Oo=aO1BP&{eKR>^Kam$=)X6
zYXMzhwq?V@^KD3p1T9Uc)=Wr2n+z)^qI{EAUU6QNNBmq%Nl>sbAWtv5eWX_Se6U)O
z?)0kv+iPG6eiahS@gDH3aR5-wy!x7|GfhZ0Cu$lEP7*r9gZVGaiOU#9s<;gd2O1r*
zS<NXBSE;zwaUMUkTWabH5#%(#LOKNKzGErHME;tFS8U<{lr2c<{{U^_kiNCl9emFT
zWtwQ%-3F^x=6tf;Iukzg>_L+NgaN#Lz_X^tKy0<$tp)9|57SQ;UiY@iRmOgu2VaE8
zD6@oji)TJKQu4hiW4A#4JcK%J&JhYP=O=5`M=&Lp%gaj4?QKb58jXX+5VPl1Pn;`E
zTQ%79elNCXkK8@JHz~z*-{5R4EkHLT{{MJtYIU4UouE#or2uv?DO#b@5)XNN5&pJF
z!veOq(cK#KejnZ$)c>Y!szd}SsQa1&<$W&J-;yS7^-$gJd#hb?mGAQ|95oEilXk+O
z{kC_U&w!Ja1``s`UiIJ?@yo#zaGlI(Au%)5dF>20C$H4$T&b%AZE1P1kJ0EOlC=R2
ztHMOk25X)O!~nYzO;~?^HUps(aY(+U^4I9VUy)5J;w^=Yax|~BEZ*YVz(S8mcBX9~
z^c~K+&Ub&k@1<_siGK&pIR+!Fhd4uGh-kv8i9r2*x~axRANv4ji&h%74aVh%IA+l}
z$T@3mH)(e_>{c&MJMngKw#)_5uub#;(R^=soQ>Z0#rJb*-RI}&^BCvzYxJVWyGSA*
z{mfu-h;!ns&DW{ywlCB&A|U&KVLQb-p14JW6X`nrA2|N*0a9fm1Tlm&MylTPJWGMo
znlv7A0Vlgnld2!F7?AkSr8dlE5HLE@60eiz*FK-GL!JUE=qr$)+xzLZzU`j|qF0!I
zYmOX~QXaE-n*tG3$2U}Wgp{G=4dO-;w@Sk$x~ap!iF#sJ)6l<P9a_uYlMC@>!ky2~
zI;YgNaw7kzU-f9rFm3VEm0@a(SrDa$3+NMPS(E~(@OcQZ1t{H?6Q$z7FZqN&JV2&E
zBFOQuOX|cEjI`2m2H1Sdr>?Q%3yHclZvN>{DzDtR`p}6*ZRt15y)o4(s~R0=7k8=9
zynGzg(l%y^rX*UN8}xG*>1W*B5uqDjF%k9@&rNQ2;uq8Y1$uIl$jgI>TQsOCO7hX$
z_wz}LR}^Y7HBThDdAVWGFZl-={u3w(d>{q5@|x>@2u@Pkp~_(No2|R%CrPFa+5&S(
zje^5k3i0?PHPPsxxS~Wyl!cn<WfVZ^k|%bG7IU6`XpG@1&S@yX!K8s?@fLd;ZcmjD
zd=(~@-J?n0Q-b$@hO01fzgf~BQ5pnr)-r?gFvRAGwJOe9U2WKo0@Ww|Cz3i9Lp~Xo
zoj>kj{kSJ};R?#{TWVsokn{+2pltjuiQ?vu*gxzw3T+Vk_dbx?+`R6-D_$g#(z!K|
zbdG#$Mz47mWL80jJKq_w(u+0HoPMIXLnfp<&Id<|Uw_@yYu~@Rak%#(r5&Od`qD?A
z>~-jKzMQhBrx&T%n^Z;{101}#2?X*bj9bxnf?GFC3XWCCi0qG*J$9{IBHft>%Qav}
z10tTTewHmSpyF7;fjZp=UkRy9+d!td#nI6=jX%+!zg}_u?~m1<Os?sqG7G&FP-;U`
zoB`ztsEMnzvRgkZW9q|RPOpVjmE2|@50{EwGk@)Ur`H#%AZ^6jR=o{w><=80{iSd4
zNT+(FNAo2Tdur(nDv&0Fc5Oi6-(^ABF@K%J#)rW|)$8hr-Hxt2vnYR};+CKWg_g4)
z4>zmlHH<Jdji_hdtawKl^ffbk8XkV?6fiZx-1+qX3ML=8!6t?3B$^Z0K)vT=s<sQM
z1Gf$rc&Cm#+b2$4QNi1Cppt5eL{XQ@5A5I<VLbv->PV=-{HgG0d|<+Mg|j8ZLjhV_
z5GKWk-yXrFi&Xv_nEl7cfaWFVAE0?j`Eew2;*Bp7i~a(X2lnW458bVfv+kL0^SO1;
zG>f-N6>q5yawO^qC?b-Xp7vh=9dLQAGNWx1Z0Ng&ECDik5RvzLIIYg(^esR`;J9`_
zmt51O+T=SlEFz!7?$N-CO(0T0Yfl~JB9Eqvdx>?54_ly3<MfnkMrT@xzMnD7qJdjf
zrW|eq)LX8Fbzw}5p2L*HJ(3`5ctmq+1hjpO29Zi}>UsVN)SkgM0sY@NOdwJK2eG9&
zU}=_U)N}S((RFAxakrMG&v;gu+pH%)TC3OIQyZtNBi9)Uj`4Z&VIOGlepaMt!+C1b
z%hixcx6>tAamj@7yx<-_=_cUytl~_fW75vd`NhW-ZK=)k-{2xgJ0AutA&|y=5~0@S
z>~2+zyOZ?)tbcKt?_uI1_XcDif_e|<%P9fPQJ$LS!RknfK@Ys%-qPjd^YK|L{0uvo
z8!oBbo=g&X0#i(G-?s5gndwYDVBB%-rmoIZ^ZR75{;pS=KlsHaApUXnD9-ug(*1um
zbCSd+4M}##qc5#wM@nxleRpfv0k<52jJ@Y|hLg?O;y|YYW57^7sL~z3^f}VtyEJFj
z1a$8+14hTeW0w@=%KCr{HE<6~@Q?(pr_gh&AE}1d=}{JE0iV=quu%nm{OxA#!>!@$
z-!P<3d7PPQ)Xraa*S<bfpOHG`OXKs?gR^lt^gIg*QoqcNT*G;8(H)$pOE0HuU*R^z
z5U6s+iFfODCr__-kyb;|hiQ`}RK<H6W4mA+2O4gE9<KDMd{@8kKX3m(_8G_c<^Ljl
z|JP&YESrkZs0Sa4i%kM(u=8avkS#GyNQ*<0K8~5x_eZ@tb=C(|MRg3H7)PSdtk|gq
znrQNbxK&ZctLZlwF~`GRvQ3xP?iDH%N;_>&8Qs?2d$9I$W6JX7M3ZJJj+{aK+3e5t
zsNYU;M4qJ{B+unLY)xpMduB|9zUO=^S&Eo^6V3tT8J2)0eF3)_tX**w?4Bc)D|fy)
zNS!1ZFJU`wQd1^L(zIs<XN|`Z=r27<vfx2BhFGw5r<h3%3qrL@bXg5Re~=nyyK_HN
zdV%Q0Own?DlR}JI3}}<*Yo4<;!*8a0sSdKRXW`RjPEY#j0?Uu$TNI7%E>U$a>u!sq
zB(jHJH|K)0-}|=TShJ2Zr<*OjCS&2~7dw<uDC9$*hfpgf9S%Mr{$&S;&+0JkBF%S`
zG)uR;q1kCKb&MWZY!|Npr{-gC(y{-VYW0UezvHn-AIT)o(cp-Ra-P!^Hg(JyK?iAX
zCAiX9Up|n|ovzlIG&Ef|=~NU=eXjc>|Lgny?AV)S5-!a;U=pqvA@Shuoo$-frTwjn
zGfxmu_JL&FT5NjuehN;C3zlI6(yX{CSY1SkTl_JfeNcY|(iyA(jZb}%BpHZ6DINQe
zI^IKA3pVXz%{w(L0aN1ebWrh9+`m^c2FtTI>T@7cM>^y=p>ZTa+*H8n#*rEzN9{K6
zs5tYQX9}HEW7HZ7p4^~JT$q%xg1gm0Rwc$#3hJuY44j%q$PBVOPB5H_-zi`EfhhVL
zU>><8<u5PbRD9UYqRSPEh`bgWeb4ycFngnh!jS7ow<zPSS^_;k_DjM!Iwjs%Oq?@&
zvPU%j9~hE7se@d4HZlhV?<VVqe6wDbKAQ%dkR$U{VdgHWD&o43AN(p*LMrD~Us{L9
zr&nr-@}s%{)Uex0U<9l8ogg)Nc9SyZW}65{k<O_aH9k*n@NW26a*vSh9Oty-PVTX8
z)g;fMsb?p?Kt05R7Oa|QNxjY*lNFrb2uEHPOQvt;%k@A=q-l15w3%c82Vp=6Ge4x9
zZq^cQ5>ZanaQ@s(>LBXZ=-B5f`yemg3hLFv5P?8<UIwe^fr?<CZ`}>EO*NR)!58VR
z5A>%FVGPy^8}GR=<3t*}cJSxcVbhb?Vf=%!A$v~WYJHcwU1@Ci6eFrP_wjEB=N_KE
zyX%@UZAtBB53|myHxVe6ngqW{H5<c-PN}08v}R1C`o6xtPSRwHLLVtkKGIl7%1oGU
z!v+YNS(ncZSeqJ5YNn-@=k>^4b*q+o4ZGyslWMOb<x7Fo9zXvD);#0TVBOnjr{=73
zVE7Pgqed68%tNsOEr!k?ST$V$whighpm)SUdzpTe`bxp*YkC#$RJ0MXEgN{mNBpDa
z*05i1>q00#npH|R$-M>^N^0AXA(}rx+I2x0Lq8*yXfGrV=l>AM|0IzA`OnUwy7hli
z8RsvY8`J;wSO4Z*-z(+4w~CUnYnlU7&WfH92Vm*JGq2fs;(We&<w-xVcKmPIxj*hF
zzRo{7aR#9KP5F2Q6hL_&RIsVdpmIfh5p%=``k+4Ou^&F(<4+SS&d0d>>^Ib!Cs`2`
za1CM;t}X$nh)c5Pr)(m*AXKMNe{Rpu&rdHeFZbWiFVEBG{`}|pcl<N{^ZWPnpWiJp
zbTQUCNy$cE8h!7&<E00~o&cMO-B5X~S@nko4Q@SBhqY^Udt0?A_M~xq<Rs(U<wSA1
z!V>q3PRGDc>RSWgsoBV{E6?_$cjOTEtF0iL#IM$B=*+t8wt_bLE0O#)Aa&k{kXTMO
zA5VX;8xQJ4{**zDP8Gsv^zfO*r;3iWHO!}KSp1uR_M`cGbR>2(5E-p|9p}bK(0rCY
zm2FveXa}NSFFH5Ts)1#DY;jYmM3LPB^d9DPR2CPi?azA0-ZF_Bz3Bjp@`pb|O+iXf
zi6p)0K<!7*r(heV+yu21tLOqoUs`hy5TyQkzN*0F7bBPkpnHrFGfoR}F$_MguBryX
zo+QkNVy@`9Ocw!NDo|Ra(jy1@wF8t0SkeGye-}g+;8Typ@CLaaZ+;KIR~qfA9BUVH
z=SrP8I5d%kfa%K4M@h!>^Bhs8@4kNk3_f)Pn_cVgudi>y{e^GKD#Wkokzi`Ux$(Dq
z>DBN4N%p_H3^B-q|M}?7E&uy7u||f_vYK>61d?I7ha~k5lT@Ik6P_Sg-4Y;8Q%n}j
zV73EG1&fd@kC4K|d0xYaFfMy*vw-2Bas9d4ESg%HHT8|5gCM@<eAk)6+jO@?0aXLK
zUsx);T&{VW%S$5>6AOoR@A&O#S5YU)D_4Pj-l&2=H~8RE5Bq&c^VKd88PG;DrtNmL
zcxvMF?K=l2WloD($4!bxah+Pn&T(!hnL;jtLHb~zSa}_3Ha*@cDFQBWkNnLF>EdRn
z)Oy{BbiNViNEio955Lk=^mcjiCq4xj6@4zYz!}$QisGX+r*voqXZ-c9R@CMVg0lIA
z#Aq=jX3(6YHEBb#2hwrBXkrCy+yup(GPI6Lz*n2HCi-g(i^1n8wt(lGRhg_}S+(q@
zt$oTlE<(%e^IvZq%`y^*;^U2~JnzW@P&WgOk0s_;TYxThOLuFpNmatc6+`xB(06@x
zug+Jh>7aJvwJHqimNKa6Zj)YhD^5fkykv*Qr!<5mFYeIE*_4Pe0=n>v9m;#KgG{tf
z=Q!!>4P(E<=twl`H+<rv7V5S;bfvZ_*ZxMHTZUKcPzZrqIa_&G^fv3<Gh9FVb&<GD
zH%W^ExFx<HuoJr88<t|?I2{HVNm0E8C)YU`ay7C1Tup|j9qJM}A?0vcqQMD%H~Ccm
zZn#Qs!SuOS+bG3zdX$*Vsw9_KfijwFF?m<`QB(^_vJ9s;KHBj#%`KthvR`!Si^abb
z5zShgf}{b#^~Zw@S5(5(;zgaJ{H_)3!m!iwr0esxWK&0Kb`nQQ@QB@UKPvcCUuwX5
zdx=n9;yOid+2+es@ILHeGZES*slFE0W>EPfDzQ<0ZW^yx6{!ySkteK3Ga~(!WF+XD
z%QII~l8I#4qBF%)cL`{6Q0PRZ9Ag#e7hR}uO?&@jj2U0fQuvR=@L(p47eDmC4UG;g
zo7NZisu7v;t%~@BMvtT!$M@lV{{g6`8zjj``Dka56)zjXi{*T|#IGW5O}4dncaL%L
zJav7Kf5D#)6Twp0WO3^gL(`-K%t}ffj>Ucn>$DeK$~U%Pr4m`ZVD5jON6kX1RqkPT
zFSNSX7;}pE0a$B0KAw??fpU_RE(K`YwqcGDmbG^qZMx|+I?wA=vt?=O!S#?B>0(f5
zH0wU_l5bzoK*qPBWlJ2wo8}DM`fX^^VUG7LoS1l#X%@A5#S>*dMXfD%#sBMqZ}tz=
z0(yABEuhX~PgF~2=P2l_J*O*-7IoCN+XD6QLRdENL!1#JHJM6ic4@=9^jAm)^&0+r
zq&u6mtwVxricV-Wn<~kG#>ZBcKp`erzX$dX5;lPxrNyH=4A&y;+_3FvkOW;^;QMCh
zzewY|tj@Uy@`oM#iD!KcwNz}`$2k?9TQ%+H6BStM8T$59ne6n6!uPAx^T3lA2Vk3Y
zZi$0MBA#?IK6QLfBfe5BXw_NDpaJTRQRh!8o2)~Z7GAI<(&#`|2=Pm|Lb|$5yEddR
zV6-WC;9VGBOb9Pye5E7Wl>nM;k!OvI?y2i2o-ElN;&dXXf-YG!(7-EIN-TBus2-%S
z_Q8$=4L?9g1<4T4)ME;)$t%QvcQe*y&DX<@+kIO1w5L89Yl%`_d)62kn9CB4DsL&Z
zS_S$;j6nxGl}~R(Uuiv)UwQrKj#CNgTN*FoyFl}JE5aw~(+0FR4lNB8j<fJaD(Y>3
zR-M;$vZD9u1Wo3bnLy5K<>bs)dcm$CH*!~ZLEWS&%@RChgFtS>ZCYdkJ!q9lT;Uw$
z&&H$$5vYj2F#dAQB6S^t84ig7d|`^R1yV;GaWn(fh_s(%R(9KY9?7jHxhqLsr-lj2
z`cS;8sFEslpVe@F-cp6i4HKFAxb;;ZqlH95prcV|7uY5ZoGuJ4o~e<m3iRv7Nr?~o
zqaK_jbs*sp3c@>9I!tWLP-kIT{Lr=G-2E(%*4)S2UjEyKDHOw*%B1PL|I|OXzv`c|
zOgaUkXdscKxyEkEz?s#O^NI0+y0H$2nQgLE{(zr3y1b8&*Jz}tJXDq9jR~b6wi8)c
zhh{@!F=>Dt9WCXYoqI_7zU(Pc4>XiaXNKloJbdPkBUK!!&W>o^wzv2Mzw~oknOGTF
zZqH1Gb7M|UD&H%G>B0F53Z3TOvdfJcE$i<=N_s}8xq(?97EYaym}u^z`E?(J;0D#)
zpi^IQ9qUcHK>{iXfDOfcx!+CRFxEU3PQC0y^hO1n8x5QZEZ(IlsBf{U@g_f!jV5i*
zOQQV+&3YvJB^gxK{pN@VQX2G2RFHmu{d~u8xqpI}{5)=Cp7bf`vai@2SQi&pKtCBW
zXk2G6bVUW-CkE}Rb!SPnvmEL=wbK&dk#na>+#$NpiW!$XB5E4dsGIio;Xj7w5A*Rc
z3=<hxDxof%F^}*qLEsuZwGn3shM%ke@vI4`NI<`-933P2<1KI#qyFW^`1Y1=snlZJ
zz0rR7oj4ix!~3bXXiteEn8a%kq7NGQ3~4B7j6!PGYS^ju4AjP}KR0Mjmw2?48V8b)
zMKQWWDdH=e=AQly%DiRQo^nGen&t;>^65K-cYy(w1yGkLAgjl|NRfJuZ%Ex!eskm-
zfKFFGO49I_FwD$WzMu@4n2f4Ow~4YDsUM`L8AyNi*(ahcB=lqq06dA7&PSdzuDHZw
zMEncqD8Gp59df1oemZyQFEB0{)oZ*_LP>5?!`#1kn&Rr~7O4Nh#jw9uhjH=${P>s9
zNA90EW!}+fd`4W4gT)GdrFD(SJo+ux?HR&-QNl-vO_K8$sOsx^1|r0Z&CU~=A|5mx
ztr|{NJe#~KSko@Q>bs}0J589DujwS~geyRy$p@DKo6QaJzeO;dmNF&#Fu%Tje!j?@
zpET@D<(z^32%||4Kzy<9_c1*G(!;YUaf*ynC!G2~yI2zC8uELXE8<mjU}aeypSZPa
zOaGa&I^9Y!rJU7{w}QR2{<C%_R$rt3U<JDb7}KINliWicXFgS-*a4M|dP>W%^r3?N
zl?e5<SKOqbcmc%JjnrV#)6cN+q~z&X{=x`}k&+PKR3D#yu|n#Lr(cLmNQV`^>C<ge
z<!3(Mj1<8w7~wGZWclM?;DkE3H~-qAFsECqJ=85aGc5{qy=$5#C@kuh+S5e7bP?&E
zFTEKgYgqeP(@`rT<%pYcIZx5a)00h|r9O|eOKZnzNNR3<YCav(tgd5(EDx}QrKSl<
zoim|>fc?_l7i8i3#s5Sq--NVsp2C)S>n!pUlSd_~%?AopCT>%9MM=3Z2slh;VlJl&
zlUnutT8VlcuC};-azh0hLHg+L`z~%%{*sntQ9%-~b{%Mq#BPVQwjn=3LNAg`DpAc7
z?Bq*|{P>eE{|(S#TKHCzZi$sOnN%tW<rOA1n_C}FY6WMNz;H)_s)fKXAS{)g1Z!nn
zf6XjfkQ;@h`B8^?LjA3e;^(H(%*1rz(0>|liV`Sm>J2WnN;LpI+?~sry4cj4QzsC7
z&;flhp7rqnsF?$O8!{n715h*^IYZSEKzrSRygE$vJAYz?q!uYOAF*y(GU<Y}ON<Py
zCOs9$d!UGuY#NVK2T6B`4i-@TqK=GFq}No%uK=Y>pY@(LO}RJq8=&M2!7Qg?TmFuQ
zg&fcsB46unEjU$#XA;W&+~n0)bZ(1?4=6N00?+fRqS%DGxN^5DO3)gGaZp%&cbMaQ
zXg~&6emwHlf!9ozNN1)n=Q5>w3sSHfs9YLLP&amcsGAc6;Fur>T~xd*mrkRgL~-ZK
zEJ#&D8W(>Xh&WPvNxxT*QSl_bG)d{`o*$-&nH{F7zEg$yd7IzLq2S`-8{zU{A`c3a
zCSX&KE;Q?i2{BHd#|%@J4|V%0&iT$~Ju$2=(nsTlU%PkyiNsHqQ9$pOwxr|97GAHD
ze4W2lVSewpq(foOVYrx$%>4y|`p#`UQ=AhtZ~~_2JelM@)j-ynb%(7gX4|2cxJ9+a
zs#Ao=pz`vA5FxW0)R6wL2D1SSH|nm^0SZs!d;jj;_`c3XwnBwwZ&JuqMTuAWbv!~^
z=ryEO)wFHev_+pFjhTUaO+yvi@e+h$(P(<_(H30=`gzkq2Y$#cKmN~q=f0&E!Vy)N
zNkUXk@$<wU(P(f%wC@)z>Zv_*K1>D&$-ntcrNq`lQHq=3;NzVKPYhpr-EDR$bkl0~
z{JtgAZC$7#I{`z#o;Ya}q=5s4K4>Bh@g>qF2iZaUCn!StH6#80`rboID~y78Kk9g*
z(D|CC_sGz4So1A*1yhVVNI}w!u|e?U(G155CR(g7sraAPO-dsic~~bkXU0cA4E>OQ
z4guXZ<?1&{_kzfbDkg%bpmY)5Nz_XGt%C5UE|nPJ_UlRLCJ8bHc?#U87PGtDe~L)w
zH<ZE=1fnfSqb*Ytar8&u_TfI#qmOG;!Nu>vufcXG<J?Q3JvwD-D8DNj3MmP-k)LZ4
zm!>(`7>&TqS8fY+dhTr^UaCV)GuvW%;6sK--@{eWUALx*Xo^K?d<zN45Iv}({!}7W
zGQCX=NYj8;n%c^L%oXzf!}k5ARkuMu!_%-f`c6D7EA_EW6)}T0*>v6a))#)h*=*CM
zf-Vj1n&|MwH;Sjo3uZ7O<t=F<pMw0%!vu~rx{(~k>@NKkFM9_E<^9+2zHW`GYB@4~
z7(+b=ZjpDIfTsG0pX(#F!$e5YS#lrGnm8^8PV+Mwk)pE8L|%d^U(v({+=f&Y&T{?+
z(C>Js)cZAFxP0T2!bh~fNTet8+MA-3Ppoh>r+<!;vmfN{=K1@NA>R0=4;%WzbN6Xi
zu6?=jIZg;ay-q}{Rh&I^+fHxO%WqSz;7Uy`?@F@1D1+4f9EOHzVw1s)ul^S1_s#V8
zz8jvOpYCJVJrR+m86{2Rj9%^FoFodSj+j0Ex{0ma#s><uoFYMPRIx=dT)cw1Yti*{
zUmU2O=G>flFkNEc=pj+2iVLblswc&yt4Oz>j|T?G{yQt>LAy6vl=Va!-e>4l=7OxE
zj4(CYVeVxz=S&$>ww0TxFs6L(p_u1+H*ADDC!W(bO&Pr^)L|HRMzi{3sBN5pME|cf
zIr);y_Xblqr9zRDUk5tV|9c|n@r(q{qnX#)4gG%`5uBm9=eR>5_fp!_GN+V?gy57`
zfkvDD0)fzlE2?#HRALkp0ZHMMn!+iSXx~QnRt@G}W@q(L578eGbD5$Gzf_)_kY<b0
z_`7aFdh0VuJ=>z^7OjZoP4y<-6T4c(Y57}UI;1dYXo6X@=XT*+Srwt60s%tA%p$NW
z%CUw`2tObN)dKY?9+%XiMYR7QxS`;gYUD`^;L>TzNj)(CBkvfubg}S!z{T6db5`G=
z>ru=Wi?+-*r)K@G4<IVH#T#0<&KPghvI_Kb-CZ)Ichl0%+S}LuF7BJDf;y@4%*!LY
zc<?~VWZ5PykZMIKh!&Hcm@>7-uc`m*n{<-=&`9c88{JZWXw#Xb^j{%8Sp3#!v`|Ez
z50hnXiBEht-K_!=-`}Rn4gS>oTlRXAbgeMGz|<V3o63wel+##S!Xw^WU^XT6aN229
z+=HT=iIA<3irLApBBfS$k0$pRiOG)_=|4Vv{73r4S{q;eOKGw>vZo7QC>x@4(iK4U
za*YyaU|>Lh&P__6o!Q$xlTNB`6UUX>j<qME088~drAgA!vb&TU)dia-b()v|sHb>$
z`Rf@dYSz}##qXjI8@DT0eF&U0(gmIU=PJ-t8JlMA4_<()=P40gA#{>RnRw4ijRj8c
zCMAtFoR!bjaAaZ&MDsh&{SnOIriEX_{MvyTPv-cpM(>^OAvO4iQch%A>Ly-q(y}|-
z@6BeQQ46vJDtb}HxSO>+se`FoNgetXW5UBf;IKKir>qXVZNxDbb22XvLYc2<&}@Nu
zKI*oE@`5-fQ*S*0YEsu@3(&*vr{!~bdT1)(fW8LpkCMOi1*0j&^)x=jk$xYTlz)n=
zG`&|Fa<j@cnpYnDMVKE&M8seg#ZaXvV5-}5SJZEqAtF_7uKV}@O(#i-=-vq$9b*g6
z$Hu(QtadFpYvJe7t5o$z97*K$MaAB3Jkb;`RZxEI?#~ea7SJPrH2Ru^zM*29dtIf@
z`W~ztR^n8FSZq1Y&jXy(o{W!1)|<>kjCtlst+kz2M3$>m9&dC85ril18W;G=v*z)X
zd6J^Zq$zuE%yM!nD}UOO&8LSfBwU4|vRK$I|1>_Nk_vVM*7#C=@AbTf<#Z)BqaG*Y
z@<yQdUQ*a>Os`q-w%`d(w-oh4)bJj6y`~y|7{j$}$^S{h@|xJzqI06bWW*eO4%Bxc
za|1sN^kf$_`A=xqApa?Vflu*ytb!>tc~F?hE`k1Rcff7!<L%sA=NG@-@t=teF5aab
zHEt-)9(@kf$H(avFKxqCeZZtG<y#VYA?{V=BiNfCH@KS(>U;`F8cr1M)Oa7wqr!`_
z%i)_}r(33_2f-3`KB|s&6S^{!Z&RtUe()RYxvDwT%Q)%CQyE7K(+93U5?m3Anv{BM
zx}b)cQ*E<Bhuwv5;$0=pocJ|{p-@X<7A^AQ;4tryf#NtncW{2*N^tKJzD!?c^?QFm
zf2Gz8Yx4+p+<T-nZ9JQBad!2Zk|uV|oS$XL>{EY%2l)>@rq5&aDW6v>-<B)^H*h>i
zXwDBTYU13mlr2fGehuc9xV-K?Hx(M!(%t(&Wp?vhfCloKJ{TuJ@fkNjTZgbURPy?`
zL5<=$(g@VrB}eLLti1*3vhLWGJ`aja?*9Ae^okF*p|K>(&S3~gfo{2Q^j#5;m?@Fp
zj&$FRcH^c2s*wZ^FQ^UZd{khsT`zv7-=GA!5-ms*jq#tMPvTSk8Zb2%tLdi;+dxdU
z97WPz5Jx7G%K7;F&Bae->ovS&T6gr|n{lv5Oc7@=k=P582y1hJF{5M+CJ|+zX7?7O
zwhWD3(Jh(AFfAxTY<xsb+^azsNaK4o+0$VN-?Sh-0ZKha(9K%SU!Mi}V(Ht^0)nH|
zwJ{&4f(lw#O+|-)Z%)+@W<A!}+^VH9dKrithOPVxDK05lLyAVO1*yqAL5}?<j8DJZ
zTF5sPeH-BjsqX`53_t<tn3K<edZE*NQ<F*eQ@O`;4h<yxQ@=X|$7~Erc$=OtdR~p8
z%e3KoTDO-h?iBSNsQ0J)Dw*F>yv+}(v(Y_JJ?fx~MP+<HXNxYaMb&E*UUR>GJIBI_
zzI^1VgFKoNG>T|4VsU7ueAdjM?$!X4*>YWUcpAr-VfcOZC?%0WNt~ZCBq1`0W!=px
zy+d8u6_B4_`{|u?2dasjkmThBbaZSVirzP0xTGKqia*wYKH};%HNm8Q9TM*jebyJ9
zkup{g55gTwHswgaf%Nlh`o+%|-S*U4()GO`Uu{2jR_fR1@#!_F*B5{P{WLe{PO7N3
zq}#Bim(L3KRJ^mj;nU#}nJ+6i5(*$SgMt;r&7%X5ei8BSXO=b>n><wga>~O^XwNk9
zC>f&)pk?Wnh8!Az4&I1~x@ZoU%1-j@gdhr0eTbf(fl9y<gnC9?;*&gqzturvV(4C+
zb%XA*es%R<;sePEa?IV~cIuMm!y4(5Cqa&^0qG6H466(OoZbJ_o+Wz)3M24E{f<pD
z(#);af;5ys(fE)a{3E2Tjwt4uOb0$N-tZbh&-#*H=`2lAU;6KOmbhNRuuJrMecC>A
ztdK&<stG1+d?yiorv{`9WAV)}rl6@0Yu~BSXO3IcE8<s_iaE`pXm&O(6>@4Yx9;dr
zeouctl5n1w7OX+7(H#MG>Xx+HF9TiU-txOQD14-je3bAE#dXqfW`GDa{ygnd>e+C6
zP`_7DX_nb#VVZ7?VVu2_n#yvSZX{Ghnatgx_@9(L;v*_Fm}?|Um4!0-7}7XK)u^Ah
zy*;`dt6}h)@V<!!Ds{nV%!!sJW!$P>soA6znBP-Q&rmwgu5YJDkN(7~tMKN>r%D%d
zL!lTGCdzc#0yg#MGG5*Iy@`L3pr%EC?)$nwx3$`AG2JF;yiHpkl7zxFMYmpVFln&s
z{OOBtQ0E$)UI)$1B>z+*m4XE7`4+JJm9h(O_BqfQ=@Y0nUdJgKb-V-N5h_TUal-g@
zj7x2?{6)$P{EE>}spCwF;%?7#&cCz|_|TaTNYN!l{QewinSqM`kpl-{4QS;~?I|eT
zPn6f<QH>*IyZ8qW^Gcj#QF9Gv%HQdHhsq&n@LQM|^<HgwpL<{99c8Is%O3%S<C@Bi
z|IVPhd$fhV#8U22Kz;zK;>_8PK<zY><Du+AQ<w)pzd6t_256w$EbW00Zv7ArfC^t$
z8+S`2E*cbk$|Aaao>#_0*sMH8*=zPZ%jr!Dkn0xRy1KJY_3(I&(kQz%Yz~hD^|Ss!
z6VM*JGtJ&M=?~OJGRWam6q9%Y8xDY~96+@IwL_RxlBZ8{s@(z5=6BSfF6vRt@v3<A
z3!5Tz2JvSDR3ASwl%(`4j{3`UF5dq)cBze5lj%VJzwA=cb=~F_6}LLjgLB_A<F5M(
z>#IMtKVk~cq4jxQd*T6R1u~mr(mG5`4(XwIKDT9^2e>q8*g@KOnF+AMEK47e%G)X?
z1vNw!=;zJJesEN~?Z;QQ_$GZc=)K=T+E5i2{~ebdaY-PVB*jC$5~{$6cq`PyE6M>i
zX-_?pGmDXlA`&=>*$lUbHCJ<jO<cNltOq-Fz-v0Bq|b&{v7w4jhYG!#x)0!|x9Zri
z;EkdB_yxlr_3&+EavBtpFh&CUg}7@Cb50^m#cfJ&l(^MU#$TV&DkgQrr|s~VEMC)H
zzJK}ON>9Z+S@iD_R?ps3opcAcSpRdd{EHZYwSVPrk}n<HzQ<S6Q?I*AC0gRVhwM<`
z_Y`k6k(4_F!&YsRe%(;8>rNJaJQ^9UK2joeM}S(BtE81!sh|gztro0<J9wK?z~?s8
zm~B!TG&dSRuO=vCURc$e*{UwdkhdUpUO5~0WT3nbDN@@Zh57LH-!~VPsNeo{^y8h1
zst(A1wJWZF!>@mpuKzkxYS@06<0gwQf6nU=4Iwl*zYR_01tW=CksYWa*h5+hFMkbY
zeej{*zWlBI5$H0Cr(dD<#PH>34H914TqdnUB?--2f5)J30{3zP{ILiX1W{lBH$CHr
z65vT$c$KNhBQWh+MSNi0#TVG@*gD@D^+$oC8vhi>D%{L8<ctGH;zSKry)B4b@yDSx
zE4&uVNN#jbDUoJe@t~F>6-Z2xIqQpG*{nNAn+m4HZGi@v_@);6xNE3Bk#PJpK)r8}
z6{+bhoBFyPb}geaM}CO+W26%8+wo%+&)7`=?3KxRsS3<|102wU9T>GeFO5-%PV|O9
z{aNtAE@!FpEjSBx#2M{WPqFFmbzkK96zM$uBp^{>&}b@d!Yn4W#9|hqKHo@nE`j_k
zK@vPvcLDPAX3Xnr{O>ZhQ^x$4$)7jloN@o<@cs0jA{$GuR9V+G<&ai4CC~q#FBOd^
zlK6+vU9Y7mwrWu`CoKhiyJXka4bVubj+qEhF(9&1+^vd7EZF%~1;vFjw=K+{0%opD
zjBZopDSmPJP^DtDBIjR0`$ZLQl`QWnKND*o+VgW}U|K#+6uXV=HlhccokAZ%vHsD8
zv@?4tP)eB^;?IEssrSPek0F9694ISo7=j+3bSGY|l~3})j7nVi=u#Qm+cNws2;k`f
z=-Qxi&vcbo8A32yUsbpBgc<Z8QJzw<>uLp6(Wp_IRj;0^dMY)S&lS`Z;y>t>;8xY^
z1VD!|$QshKtZ$m&*?1|Z4uD*!_s|{fY59w!ieyg!<^Cv-Ud8z}X^&?C-#G{zPHuqH
zy{f19&k_kdDicN(u1W;P+I!%}c+~>6!p$&jxf%9>ssvOmQxTd{RTwQbLbFR%^1c~s
za%mDcKhI1Wk;0C@E^JhF#$LS0xm&c(xcFKj^{dOB%D7JQPL;RPrr*0*KlBIw{_r0k
zA$&Z2_<Q&e;r=Gy?BDS>cjRi%u$J_oieIWyX=1ihrf>jd3{{pEd9Vt3BDOo-r(cKi
zv(pwB!t3jbg-x8&>(BZh7UwGZD$0Q#R`DIc^VB*_9$+2nKqPT}d6XVLRD#8Th6rK<
z(tyQVrMUdK$m$AJph*?%AZ8720*$u1=%cJz?oL%p<U)QTF>Yp1_w{m%dET^=ifRp*
zuXldzCxbFlp_g#g(9EgqNQ0U~1y{@L3J^0qy#zJu?m}Je+#NL#xtk5hx0bNX7f?Ns
zF=B|*`!(3bZ|MvL1TA29vQ?Q)%oCXy8nsHWyU*4H)^kQ{NEs+l+y?$|DV`3pJ_)-w
zY}rH>yw5fu_fJ?84X>oxGo|np^v>|-AU><8tfn{CWICya!}XPFEUeXUkI3OdnoSgx
zEt2wqMwPb#k6hpirsv?ni=g~mZ~RNN`vDboQUhW(W%O*&2Basp!K9z@I$lmv+cfv7
zH^}GC!Q5cuU1g9FDmJTICeP6sLgRAa;QstfEZj#M;QcvXgumW?578@7C<haGZ)YLU
zBdbm%+&h!J>0Ak_U13HZt(|-agklFI;j+42_r2^{)L?KBkhrh|B%{64aI!!t$2I`w
zkLSSEiuA%(LB{oWjD>Z+1cp4&nSQ&1*+kd_avAIUfLpcGENY2BUAM%7n(ovVpta*P
zYzh~x>hCrN<Iq-&*MKgH*1)P4tcr#wTCkG9`_acc7uLaGL)<!z_JP(M1wqSCl$H=M
z%9(3AQE;%TK#_q(<{F>En0nX57n5zN&K@5E2pQ=B<@(}z)oOLQlKM&=^X(}yWtiUh
zZW#OS^`CM0C%)bPgz+DT^46Lo;ndd_(|7`!z(~3<Xvuf1VNz?7*p9=k!LUa=Hs=Qe
z$Q)W+7BPhcxdRkQLDdHdnMM8v;D@>So2~%awcyNBA<r4!yNeJ1GzYNkgVnoQ53@aq
z?!=R&(E_YIN6Opj-6U0B-KX&m2=B`q<;FdFxs^rFk}Ps*R7`=3GjX$uZiRGysSE-a
z5{juK{m?dsgXUx5NQmFKsZUXeO_l9*?I)y3#>(j4Ko3xf71Aki^<mNlYq6<S*{AB!
zRpAE4BcJAduk2E}WP@C<Ff<q4KUv<YVL>X;UW2==4s^}a=@T}3s?uAvjaKTbPYv}p
zrGo?wsqm9uy!|rCz_uXekJ9$j99MCqDH&|28WbS7W$X&+V$%*K_U;<e-H#vob*$QE
zQ%VNKU^Qs8t?`%2pj^S@6ylQo{!DJ)SBdj$!ufgA@wU|B*=hG2w`qu*6>pZ4!yPjp
zv{QGVr%3_2%&)VCT7*&pRI<G+_o)#yq%f_1m#P+L76nk|`^%Lz=}7&?pD!z$mK~sO
zXZ0uLC5b*&XK0ejbEwOON-WaoQLioj=cE~dHq}DKju+gnwkan~^?3ZLcojWs9GYdP
z8cDBIwyH9u7gI<2K-RkAPqThi49=n$EDS&W(ZXu+RF%)~eg)@zrb4pf8=~v*ge1@q
zf>Nprwli0eW@a={VEV2jbzQzw`G^9P&L(EjbD*a)srLt`XVWkZQdBWeL>7pRD6Nr_
z!i8)?%5Kl~s!vhN<-Q8KQ5^=7J>05Lo_^l80-buQuGMfJ$0>oKzwG-{D-enn9p!#Z
zSqL<|r(R-0O+kFChV&7>sk%@UxqUWq4r$3FKid@+O%s1rkae1jM^1jqr=p({;c5J0
z-62vf(j;9g(lU|?B&HrxaX1XG58P3n^p_&s4(gDW-TRPUbj52LpOUYn3weU``({Sj
z{}&3X%Fny9$)x#-QU{e4;!Dt&Pzq(IDh9sI)LS8Sd`yM8)h)W3Q<x?bFxvf;GZZi5
zaOFDXb5v+mi|0NXSDT`_&KB>fPJMt4Xm79MFRLbm<G7ioa}U=mAaf9}Ul8=`^%j`w
z6`{-0#Su-_u;1T5F_P||dgAkOmNu>(_dolSAjdtnQ|K?yyNWq=CD-<y*)x3ZvTw2P
zpE)Ac)8GH%DiHJ28n*$&%cBz*6p#I}iM!%zmC)cTVJ3v1+_txM)6&z^yd7}jL_hYs
z?rJ7@k$Y-^a|XyGvyOgCp-Zew<tlkra)Wh$AFy7l^=X@P>{(Fo(#$Hmzc5)D--8}0
zG`4&B$YKzA+@fLQUe;`=tD8<^z)<1sob8P+S_2BGL^++^!5UPHj^N}@nUCJ?_*!!R
zc%2&6(eXkp5PB_wH!)>~WW{bT8U#NFO2TYz+%P^IUIL92gdQvv0WG1{6!v8Fvq6MJ
zja}cUU$;@+*EQgC_J=zCY-+U>a9YvB+!gLs1ZsVG2dLX7>e~VOhR+jyeR<pe>{qKM
zvP7}dXh{hRM@=?F=~=w8oOv|9vMIS<wP>YWHS{~l^?IXiLW*8XOax&h!0+PB$KEvI
ze4-~*v>*FMa_o3NfxtI<E}A=nPOe>6;(n;XJol_EEV|)-sD~?4?>Y#S;uD~~;KwOJ
z%@qJLsT3_xzlc<$k1t>^W@`Mxz{g|Yjnh`tZWGPxp&DHj0t>=aDqCoIYk+*~(QIS-
z51wCL{qW#yeSD=9*m46}ZcK5w^yYAxCa{M);V8S_-W&|M%fwQ|HZ5;sM&z2h?d#j1
z&PTYJU^7jlwuacWqWc=LYFqSMxiWQO$fylq53w@V$4B^T3{`<38xOCQcZbLK&MloQ
znTz&cSTzIfx69weoT?$-qKW<~>s&54nMFaLi0A6xul|;=rbYBJpn=V~p6I`Hk+mw=
zJ*CZZ@zuMkA#Jf445~Aa=aO~2-%0pV`tBGih-!m+7%JPLejLPAH#Of=1`X^Y1WK~<
z<qxO?&j%`1SL#UD%fI2*+Ae>?d4j-gk0wh@-1022m+MU`h^^)n;^*G6AHqyX>r%9R
z*_ocGzaVxB`k%mEm56j6uu5)fB6{2+{oWwm`q59tPy24_L6LgtSCG96@y^Gby)JEP
zmpV1LN`h&!sc?wx?x`>p6s9eye?d_9rE@O6br&zFxg9)74280?60PfOi4-qX4d{VQ
zi*5SI(3*#qpd?;zZqDMlCthb1RBIT|uczE=^T^Nw27%(U6kTeNQ5W4`p1t_s)g;lT
zI@0f4Qk^pu!)_zsBO|s72`kL2YvRo9wqVnN>aq}41K1mROAEZd%KFz3p(d!VSLpr&
zTT$`kN2qL{8lQWW>QGBme!`po^joOygj$xm6hhfa<BMX2Ni(+f<^Q><`_RR|2Kpy{
z%rw0c@C)ON?JmcC)Cq1V;bSls4o(fW{Ol<W6X|0eX$<?_?-BLhQ4Z3LyXxGRnTjYx
zm0;7d1}UndRoVNMYnp0R4M<N9{|?g11+a9}#1myO4qkMAc-~wes0cvSiV940e#}m2
z9$ia<(#Hp?VY(JBH271g?^C+sfhyW5e4-z30($z<+urXMd3$2~Bfc+8S224A!Fp+l
zk5(n^!&8u-=uw|;(pS2f++epH^jFDhCVnzdJ_g@`Gn->&epDbD2%4;HfI8CCJ`Be%
z>Ld3)F{=ou-V$84_<{4PNY~M1bpz6`Tgc65xt(GX69@0#)VRDFu5I9_Oo0hFV3fv7
zrFmL{(-Q`n)i3KvX`}ug{nHrMtsjRcs>PL}_!-iIy6b&81Xd6!N|$;R8j)^3=Z|TV
ziZnf@AXdO6nBZa#Ypm82w|fg#y!rFHU-~D5Pu!K4tU7DX#2YGDa8_{tg!4Uy#H-ZZ
z>L~H(`@SD1vY*{j+STI8%%H2ls}!GoBUVU@3_4mAT96K7H}p><(Uh$Gz5Q!SHVuHL
zxk`s-&t9)ilv|P~`9zUM#yQZq_HX{3KyH+8kJ$9u^alb>e<3j;RMDCm4LTS6g6@)b
z*9zz77Uw4kb|za2{P`4cw~83w6DzSh44;o%k}>`1(Mn+)G#z;HL~uH_yh%xB@YI-a
z_~T=V_j2~0HkbdUsRXs;PSBbFin8ePvjQK0x@m+eD3#`tDAY?Si9?|G@#!Az_mZgH
zGdS^*uK;<t@YAG6$)c#{Oh0O;(cL$3hboR!aD#?ClvK4RagmS1l6PkENW*?6x}h4C
zwa|$B!4szryYmaE@y>U7?!OMTT<AL=pBm^F)~-D0y1z#~REdyUD%Wf7NtH&GCS6cX
zNCz1q>9c76(kwO0nW`>s(dhSiTiA4+kHY*^9cZmLX(zdRAt*&hBDhIqS9DJlU<25*
zahd*n{#3yZ_rJ`lWdamx1A7DZ&6>LsM2Xfe<Me1u6sV)gP?(hyze$_wXLYL{C8Jj&
zG%LfNpR1Wp4Py6!7z$_CWWt%3KO};R2AuyJBA7GlL)@pHI@2{jQVb~=!HD6%1t~I*
zszl~i-(J1==bS{`qrsumH<X7eI($p>45-sRG*Ih{&ltWJ>bLoKJG^hFSk01=lypgX
zgUKPm*WcXyk~DXM@+WnmxTrMQZqRt8^w>Ygw<(@__vS+A<0}=KM!oq(mFv8Y^u(bv
zTyUv~#QT=Jd=zGqb=Ly*2krDN`?c@;uxd!DD>qLH3b+P3%qSN=fZgm_eQQ8`1Dom|
zqt0WA6w4?zG_I-+1K)*hi!_=~J--c>J?l=MetX%gqLvh!ay}iZ{-JmS9QRjbV*YJ`
zYKjh@Vc_ZC>rmNfiX~xrZ6V()4xuU%AM;I6PjV9vEI*r}GF0O;P@&?rGZAsI*#x3g
zL*rESU>&?x#hm)8)90+h@5{E&=wA`}kCOaMF+psQ7`~$CyX6YNkWF-2`y~XJw!<dG
zx}N@<+N#k}Z9oAK0yi%s*NRt{O{x!d%-2VM#aU(AEp3TN{OfqVp`ym$1Iz$27h99a
z*OmKqm7QLG?5~K^%ohRKv=<J7p1hJKgjlCkAxu=?1pAX8OvZV*Btxf}CrIM$@hxyX
zyb>(?M^8Xv)35K<MS*Z;fvIlOI#oHPRWzx5dDN&2YE{9R6+@z{bS+?Si$^t$P;Mqv
zFR@k433ZS0bfSLnMSvBN&KyudxbzA%Ax)o*cde$igS6plrKlA(G)YRiAgCyfI!{rj
zOf^l?)}hXVAA^ZIbhIBIVl6m+`N5j!{xzk5B-1H~NV$t`OqOqN*sAfi3*dJN{V>{K
z$Kh=zQlRGP(O)OkM+-%^Dg4(?6{nwPR8=QPf3)TPuUx0w_o`8gkf?PHIRTLCWfy9Z
z`wgbhrg|Ld&PHv~^$d;@0T};@-*FkGh%~-XdOlKqsr~@x*M#$Dg%ds?>ZaNZF`j;U
z8;$j9@ES>$;Z8|vzs{_QRm;EHw*0541(lTEBgbaS%fIOK3LPcXt0t{<r-rc);Zlc!
zTmpdPSSd8pgyNp!E}$WCs&K0V#qtli)a%-)DjNnAvyb9IRSG5zzp`o(1UQp=y_&FA
zKJs5YR1vWNt6E{zz~ibe>rp10pNBZ(`u{y@u<A)kMAdO`>eU0_)`zMb_z`<#rIW0$
zjLx5hJef5|Sd~lp)v8T@Ao6*S7@uBLh*&3>)J8RQ<EXY*Ri%S2J1U@2uxOxySH*Bj
zR{VwF?<+D^Rg`F-e2sAwDk{U#4Apk8)|+Q@xDsOkkW>f`lshRzsh6Xw8cq|h_h5?A
zwB8ISFl|%Dk}AKrQw``pH4m#~kp-NHYO6?bq^8((;N-ePz7Gbu^H4RZ6~v7y!Tg9(
z53s552Y}P628mOpd<$A4eS15i&e=0;16lbr_78P34ErcP`&U9$K9wq4paM%cpQE_+
z{^t%Ak>uW@$HKKLgya>~M5?N9_0^9S?S{2)n!8kvU^W%0sf7|J-+PQw6nT{bs}_06
zs{yO`%AucPPpKk*E*+~(_vJvLL8^qs^pudijY?B##S>5I8A#;R2K7Yczdnf-`JG2~
zzsvhXP$iKQV8p{f|Ai~)l}H5!)u=tGVoo64=Os9C{Of7Tnfnvn{b*1U#guZ!I*m|b
zz=ry~t5`8?zvBRG!m0qGtyMFvt7<5lkP?AN0-~9|cn0{pYGj&JZUB7ipEgj}`L>59
zXaaR8f=UB<fWYqo)v=tF4fCjSgKguT^#t<F$*3>T)am8`_L#4vf;w9_`YrK20MmZ4
zbqiiq;)kH#?<(Gt4!~hlV?Tt!UvRLwZUwUJ(HS(h$XIZo*=q@nK$jxW`%Xjc1G^y2
zrm7ULdQZ)YM^IJBx;0Hu`$bx<MWxm@X#t@1n_B;a>G~qYstOOw<=3^oS#KXti3%F?
zXv4>c9alqY+u^jT@-NC3Xb9+uRJY?Oxet(tU(v2qzpcV_=+UJ}G1jK_`=L=vL!LG1
zD2JjLtkf!3iPGgm_v(C{@GtO+*!t?qC+43d1LMfMW@Dd16{B7uje6Cus7mYquw&E4
znY6g{G5Q0Ws^$^^Lb>8?)rnH2igRnyVf(`)Z3{%EOr3uir?%x~>NxQy8PzpmJ)zP>
zHQ3wWA2O=SDWK@E@MuL9{)!YtP*sO{viM8faw!2kIK#(PHMc2Z7}aQqRuzE?XQmA<
z>y*}U#>Iahhr1cQF+=K7%mCRdx;jrSIW=<sT~7lwrB?-omGa=zr2h`kF9!4nT6MZ7
zt_)zik9RTt8r{=Vcy(eF?vESaSH6@|Wxq<;8K9?bHSqo;sZQd5M>b6^$^q21O$D?{
z?sq&=eWp`fovW)vY7=t?m_A~lqJ`wUl=sq8EiF$cRe|PD3RAQ9zXSpCmRfZzv@}z1
zCA60ZO7mwF;|V08ond|-z@*kH=f>g1j4zxztK_NMH|Ys|LZYMqeL`ZWLehjSe}UDR
z)UEhcF)6U#6&ea{8U4Rzz~dkMaE?_ThE?%}^6`=?B5JKR+cexDr*sbU^Va*6J9BK+
zed$f@Kl;bf-=y>oS^`wmg+Bu7b1kKAPh`_aZ4P+pPl48xY0-^~PAORQZL#)6*(apP
zzXfVfgA~*C8S13;4NxiXZ%OViUYPL;;MHlZ*AX?bC)I#{?X{_Jm|=^wal9YXa}@vU
zBAKFjG9~f+qC8o{`Ewh3WGVNZ{PbCS(2YPr_Z7{lZ1niJBc%%(f{?{@8jzkq++X~*
zVNYsMS6#d&KucPjC~i~hX71!VQc!%&H~qw<)*mBnNT(#E2Ceie1}Cb>P!~weq)^&|
z6j`*%Gg-zaPYzU5Dov3dg^Q0A=2G}J<^5?ckIyDhXE;A^zs#!~o@{ZDPKRh#{j}kj
zhycMTC}~9zLg}L7hZ#_ZHszW{3*U62`yUptJK4#$v3dA>UP44G{;SDjo2pcvXhHf*
zYhS8;KCNGYt1J|gc#ej58a*mLmsXloiKOsx(WE<{ozCxiWjYPfpph2r-W|#OFr-DW
z2R>S9Z*`yzg~@)s_;r$BGO13DUh(C(wz#AG`YDzARtNf(n-jSeH|gmW(gvm+F-d)k
z9_<`_lLl7ob75+vufdG5o=kJHb9w4sO`J7l_>hSVaP{LGC3!}oU?Rsgi}Gm$(u1{M
z+CRe>uHO0+PagVbDa|AOj#0C}uTqD43)F!AbabALAJbPf{P6y?N780Gwa|umOL01<
z%=J7Ue3Q7n&SBzSxQ%b65%GC`#*HeGOM7~jPx}Xde~N-R(Wvq1c=g*p6iI##HF%rb
zCmOctZe!Q!d8*%0nKK)jq|xc~UGYG1BJ#LTH6>5#K&u?T!{&}^=t(D}N!h_@lF>Vc
z3NJdo`fX6xK?x7~z@^|XCr*V#o!3$0oPxSjk>ll1Asv&#y#}b7$IPz{G`}t75^A16
z_YA71ohxjrS*4$HXYiydC~3(1XxY=()SrC$Ii*8|@HxIuUCHb^L1nM02rW>nWy#R5
zW0OWYPfI-X4(*;r6va8ygC|$YC326NO@EIvdQX4G)d20=(dth#{<}H^ydgn@+b2F$
zGVn}K2~^}NRLkP`(c6dk2#V)p?)Es_rVUMg2`EBn-z$xIzhX8iVx4K7;^`MujA+3+
zZAofFNcB7#QKoq19bJBpr1<?zkw~fD0%_r(8hMi{%_f}#-8!g6<_x3Bn+dw6AX-p0
zB1Gr3i|!ZCOPI16AxVU!4)i#Pz9-m^tp7BSgo`SG<VES?O!0=qAXC+>395nJ0yTQ9
zO}e{PTdHPR=G8}!1}d+`HB~r%5T8m3>2sJ<DJpEy{fje|9#l`FPS!if5EeEq96uy_
zgKdI3i1F{gwIB4KrODq8Cs5S|Is!pw$~oSRT=4rVY4f;W4d_YC*B>BlZb))H={k6{
zr4(f|VD-y8l~fa2unvC)t=>#GDB^U}#OW4Z-P`aS|Frhmt!VQr6^XeaDY{Y$T<P3H
zwVW}91Za>vJx7RN-`x1#_sbx{{N-@%+jwX+9jLfdC75$K_izDKbe+aNHBDHbwG|GQ
z6vN=c@a~>oG_hs5ne^i_R#Nsrb5D#>!I#EG9ntc|p~_t1t0Kkd-1~HO4!e_Gk2?My
zBc8#<4QqKPA}2>r5x>qf52rYPZuh&!Jx@2iM@w>AeR&Af>auHIAEqTOViZBWbK1%y
zu)(CTzi!l@n|~YUlq7Kv<pv72WiKCp+5jNRix)tLeuM*{as4N8hX$ms3%r|W*eRnt
zMnr|OevJC`di{&{v`cMh_2vJ({qp0-Jypuy^B!1V0sDEBU5R(*Psu*%LnfYVyNgsn
zQ5{+7x<yxaf033Vw6=eMd6cN~kf0FYb~0*ZQfrH#Pnv09>J;c*psoK2Am5@V{hn@t
zH;(v>IYsWc{SzmFMz?ezO2;CRRxCeV|57#?6{3^bN$ES*?R~ubb<Sa$Mn>0!AnlPL
zil^W68PxdiidpnJ&@_pXTz-a8dZKge(|Fg4=cRi2Pl+_Bc?a1`sJQZ-S~Q}dsWyaj
zsQ*b&=Lya^Z#E^-+{f_O=(frJvdDYZFBQN&szI8@p}NAM(l&MY*KA6Mio2-u;U7rf
zl@!y%BMFME=Irc>FAcLEEI|RKG!phXM;ZSv5>$Q$y3pt0tG#rkW~iwm>2$nW%YLa9
zR5NKgG(lZX5RoIl@e6CO+S8OtwZ!g)4iBTan-Y=EQ>dZ?&kw4n4Nz}HND|-vV2gu)
z+%>bGMVCc~M*#D%FvOeGn{v9(sp3VQ29X<}{z7Sge)M+-kxviR&O?>xM-zFHHKA9|
z9bru|YP`w52-rDPF%Y?v9ju*P>UnI!@39fM@=e_&sB<S;Tz{~qh<LgR0Xk8T_@h@L
zY30>6YLz}GY&L2e6A9f^GTu@aGL*+i!JQQ&s{a9`F!5uYDbe-o;tSw=9f}Nv3pYK_
zp;B#1k)Zn-O6rq$sT*Z~i|%byv3vM!rXaAhN8OPCCC1PpmQq+XEOK5Hqo#@JI@X#|
zH!4};%&KC(sT`V69Z8Ne0$SWkZi%I#<byo=ImXZL!|DVCym6?)ID}Or;=VriWK&W2
z(?SxrqbrKNAg5b4s<o+*(gO9o<g?YumAhA}$+Nwt$WSR<R!C9tX0-U7w#)TTN=~h2
zJsXzW+p1^FALdGmo+`d_-QVdoGi%qWtc_KLN_LIU$;S(I^)H^)$uv@95gpX?J=wP_
zM&=9tdAh2kZ$n!Dw9Fyuf`x)g(;y_YG9+@(<@kfq-4-ELJXUp}wEnqij=mJx$`};>
zaew}_JpUP~Hj`8R|9q)V7?H?x1PPJyWR)+{D#9k#<Tk)ZQ|Je#ZBg>U!x{q75ouYJ
z(|nQoC!GWoDQYl}?o~p5k0D`vbPghbTKJE#9z|`5<y4P%tenzyKFy-nk^bC<J4>I0
zDIPe909(ZpG>AT37x#c!Azj?+j+m3snN&ymee3!=GX7ykm-zIC-0ZC`FaPqgTIxzg
zrrMis1$SY1A*D=Zo9r6yW*iV+%NST=%=5YH2<P);)>W5+Z6Hlnr0LNcY}c|#naT1k
zyIA9yYq09)Ep*Fr-;+JrmaycrJ~GtjsdHX<g33-mZ%zuiIjLp&?KFIrPZRH<-rcEf
zbotn+(XF;!{A1UqOF;!0pr(!54gDPchxx4v?A5wTL^E}H(|~vMTCF=YsY?n3EkIX!
z$vuvbN7B@&QV4W>qSsdxiyw-sUrx%tpVeLZG~SU&K~Hjw|HIT-b3BW#-FZ;86sf_Q
z5&^po{K>`rA2u#Pq9v_?-CX^w*)#v*f!V%kU`{~Wr(n$pVry1?tKy7E$$px{{GY1-
z*tq<CFg9*(dk8g2Ca5;3pm#!F`q0Pa?*sk>bH=b-SgRe-3f18Pa3J~91#u?F8j=!)
zY=Y|IDGU?qRHjeu(L*)%KHGS|s#0&FHSzj~>a6D#6)r;&-2{<qzg3{0%%kcj*0l8%
zunJe|ZuLZ<Yar|a=0si0%>}jH0p?c6`T*Ont{19_mReK35<01>D653BRw<`x=^E0X
z+jNYkA+%xJ3V8WurD}zu0+2xyP%$=HK!r+uJJR({HOv<Q*fchKwRL<3u1X-G+~D1z
zQKg>NzZGiFbH3+BZDV*z0;zHF4W#n)3afPsRyXnI6V~rjtVX-%@aO5t-68GfrhTA}
zu&xSgJAwqDc=4n~;UZAq72LU@$gDxlk%lO0HvL)G)s|QQeKynwa;*fw`G}zJB2la^
zp8#9<stb8;1?p$+{+f@{&dkTA2cSMUokl(^ru$=H-FsCus)eFn8UNjrURE9IW^201
zxtinbtAiBNSeiso6=MP3p<d!d);+xdS8z^U6vK391$tQ5>t8=q6U_@mJ_2?D5C0R=
zO!CrA{P_(~@yj0?N5?B_h%#YA@L4%s^|~cmL6q}7_9O%Ci%UypErmK}dI|i(NLq)g
z8whDowv3SgL=%0{RxPtKiDmDAytP=i-U7Zs+Hkfg8o&%)^vC0_EtK75SClPn8Ubj)
zdYj`Y%K!DFOmWumfp(w|64HBcQ~^FKn6z|h)S_42BM=a`t2VWzDbsn|R013OKroXI
zDw5vt7GOM1!7PrTOmAz;v69eB1E`nAvW09)S^23Jj2=*-R#2JGnw4E!pt?SFR_Qfu
z;^T)u`K-FPn50G8QgEb&!%75dT|QH%K!0xYp!aSE*keegAo|Q0t)hUA{%U`SRjbrp
z*M!wkgN4(in>0ExroKAX4F1@aYA6>FAf#gW+$WO0VR5cB?SM1_bPn}9I#;+;&HG8M
z<!Up?OrJuW)EKKP?Hb)Hu{v0^TPsk%Zw|D?{acbto%o2;EihzvW`(uEX{92IjUfPr
zDy0j>WyQ1B8VVH(xquy@-yG=56CC=?pO8N>61cl{jnxz`tK6d{McM*$v>5)=Wj#<9
zdJih7bgkAb`g6dZzI>c>>lu(%{$yl>1hoD#x6g#M$OL(vyuf{&pEJ&5Qkvh{sL{b%
zm#s>h?wB-rfv#7IOzOBk4Pb4~RBo<3G3XbytkTAOb!!^BWlKDArjjD1i+eT30=m%q
zAh3(C;u48U1J?7ge+s#`!xc?8E80Oo`r;u>R~{=-IxCinK`n+o!@AG))Wl1Sf-smg
z6>1jzxr%<N6=i-!rpdaRRiEW^w+0wW*`f<iz_eM3_5s86!N;rBiNdI|Flr!;-#XIM
zB5R*vU-AH_s0B`d@>C3rKHv#fkKsD~KZ12**7_(&YF$Jcu-6v8V9~l%LhTZV{w2Hi
zYk>4|doe&0i59FG!1UWoYYL;P;t4P`SY5~2Lj17`TQ4N8HL3yV3D(8cs%}m%SF4x?
z0qa;p^wC_PWLi%iK%a5|NmGlyyKt^7=c$N6V1T#M>0<pg<9f?bsd=G-^b<T<e!c9g
z&joK~)+LbuicSEdS(gpg8sSQQ0En5|-D()82f*{@&-6|F)ho(FGMe>uHLohp!fLzB
z0>r@Dft9#-g+w9xf3#YTzoKCE80yyybxK#EBV4E_Vts~v|GYoF|M@fiZC}@2HtIAJ
zI7e3TR+S<`MR?6Os_bsYFQ;E*RqC2eLHUcSrK2yiGc|lHwakyTw<6C9?1Cj&(PGoZ
z#|lgUg;@dCF5ar5Myn>Qthzb@G}HI*^v9LnH72F%67>s6ii$ovWKx)C9WPlFgsVvN
zbajOa;KBtk3F=uA{fcgt7noh<eG5_+JvKT_`KoiEOYL&10_~!{R1un60@1vtF%4e$
zfYRQaEp3{}>Ca)-9{|0nTeM_?(FhptwCJ+Hrh&<40V$8G&=P1aAkDGfFo%8AxHyht
zaPMns@>X93dQ`|;3(O>QqZXqJK@fpz6=+`k6UsNeZL&WPR&&(?+=TJpO7}s_VwZ+R
zl!#_p65V?h=g-ZXU-yBE6yfM-T}@Y|%!<Z*r2&n0!K@)U(RW;Zcz<pO>GuZdn&e-P
zul;zp$4C#T35!ZlNMt1zdSt!lyHqg&jfm5eB3ad*oA7`B%)@qZ>Fp!kAjJ76Y@oRn
zTjo^@f^XE`r4}t<>$&QShks^yk7zYI!8q9`s{WSUQJUP8W|Pte=zBp0pkSf!6Mxnt
zGI-*0b6q<4;8p0+_}#0ZN~Eh7N|<(}N1ufEGV@a^R|?;I68z;bRRPo8(y_0hWH9?_
zM70c1#CqLL)4URHP{gJNbv1rLDLw?$Ig_+g8<2k9Ua5RO&U!SKk?HLpd~^1YmLfl3
zfB|W~E{(cerYg8;UaiHb2=OT-NwGe@4KFAF4o}a6dU;#jQ1uMuG*{iZ7)8Z@eDUb7
z)*sun$*SdO)&A##O<9uKfj`y9YZWpBejjK@%>F_&A&pCI=%GJuv40?!V$fCzaX(F+
z^=$DCbE8y{lt`)X(jq%0hq>}gO?Gk|q#XUz?iF<+N0O?)=&1qYBWA%w5uGY7q1;(D
zO)J;HBB+Xp`<^HZ%6(8D2WuDy_5Y2N<*3mfEPz6+U@z*lUDEpSEwFFnA3FMHY#>)l
zD^vV05mlkK<~zP^i@|m0vxZ{bz&DT;dCo^&gW4eX<w@}+5kBL};x^?KfX?Q@^@fdb
zV;luXc71>V5D}FGk8X4V3sKS)QkN-fT96Vy1)u-i+V*;}f(k&M+Ch=4^u?H630BKg
zSZT8=`%IXV^cM!~CDi$@-dfaQr;=ICN|ZttZ2!8`L<Q4u58UundF<6L8Ils$?{`1i
z=v0kE^aNH&O(S8QBpNXE*otBl08rJER5LmSE8pMLyPQ4$?*;4kg7r=P>hI(DJUnC5
zicvhjJd;_`ZvVu00OVu2Su-bqDsw<(lTC<!YfX)7>^<O0N;q@~Uz_gN&r>$QdC&Gk
z->;eK>(drgw?H%qsj~P_FolH!)BWfsNreW`x4-|kzbtiF%+Kwg)7zkRiUk}th_wPI
zhLJj6wsYRHvm{eSRracJZ^7K@#xm3J`tbK`SeLJ|t4C}mTyd8CLSZdy4F%dVbVW(9
z<)z(S{&_~6W|}YO*i-ysK)52HSwSqy%NB@O-BQ1FxqkT=*v;obVmKZ8fcmvBnwNAL
zyXrI_*sue(hsn<d_5O7nl)Ci9{+~rt)lwxOektUsN~rmjwHL8A8nwZ5nVW+ZssVi{
za@SfiKeqEK1zZi6*6sd3x&DIHgCI^HA6XCt6~0S^YM11dmlKgy9cu8w`@vBs<D#U~
z&p(EBOGuMZK?Y8$Dl*e9u)SKuTgs-r+V~KZLF<ryWqXaJhVAX8czaG<J8BG%NKr{)
z%U|?{yQy$h1qz+|FidS$!v^rq@;CH`Pf1p-VAO!wYjX^CYGiK!n+`m_4uc^Y8}@PZ
zFkBsEOlFmkE3w9c$d)55ssY~TdT<pei9aP`sL>n+-WM)_S*n^U!ZAK{4r=A@5NgtZ
zF-=hG=l|NnI+N`}`U`~%&S5CErE*}-v&Lr&{layWb4w790;b^*5?Z48`{MCW?EsJ_
z1dF9Gy6~~%m9@O7S|!S@cBK2C)C=G)KdICVpah0(yXdd*#+8C(ow^o~b+5t%<4U5c
zU-ej@3^8Beg@iPr!>!5{)d}ky3<zQ|^j#BH=Y(_dvXVVr_rG!7kMsTSwr9Wcq~;C_
z56TVngBOGgvS8Wnyv_qGK>2fmGAXM#_t#%8j{>wGC%K2y7dC}dk&g})@31sQs?AT-
zK<C!{ivJcj_3ECT8U~D;hG<PUzWz?V)%_6qXx4S#Vx#bLhe#zJe(+cX$(VL!L0(PK
z74E1Vq+bjvwO24sz{!d}{eYC#a6e+%&PI){;kFqdVO3PN^cttCa8wHdV=d6QhMK^+
zQNOm@I^^$r#H@YvQH?h*_f-HmNh+@dz%~~EM3ul`Dv1-E@7CzHE+IB$2ICD-K>f~8
zf4)ep?&pV0d-L)ahU{Nas(5~;-c>*Td6(~F|Gv9h%K%%;od6r88xdvz>7MAzZa`CJ
zSh`eUJ+~<VJ-JdtzE&R_xdz)FRJ8-At_M!L^n#F7nN$^^)Vxyk*bX1-&k(l#6{S$M
zv^!gs$^wddK^zh8uCVg^9|QO3T>f`PJ-Gjy<TNp9*JD83BBm~L6%if}R5Fi69qZRE
zZvVcIC+VF%RTTL@jlt|C09tbu=(ANZT6E|M<eSy<K3g=kq3fm!^ygT*Cf!{UT)dk!
z+u7Z^w1h)P8^jRNU*#@<RqMocf?C1(6J6?$0b0ME-R{-tb2-ZjQO)?SI!J((ge$US
zG4@mpK<io!XAJ(rJPX|CsDJ7QnVy74cuX-45Kq5FHqgjpsK~0}3TS~Uo%;nLz(1>n
zKRi|qBa9iUiZ_6jkOknY&ykA6x7=n_I!gVTwftXR0F+}#FCfN(@aFR)Rd@i9uK?%L
z@ngCm&F6jkGIdu!kuF5M82VqQ?I3&Ifa&;)L=F<eYRieNCa87^{&7*&jk=>+)=#M_
ze2{4k*J+GXP%Xe=98hV~MLS?YxlJ7*UEBfJu+qn1OOy(Rmm3%$t4FblKw(G*zEwGm
zks!XEG`n~X6OD^u45u4)2kLq0Vxb2`w!+0bH9~O<RDWA^pXrAdloJ<BC&$q~C3fid
zdw382c?{ORtM?2tZm)({Y8N9U$vMd91>)B?c_H69qKeKl{`yH+@pC*%y|_OOgN2tb
z{P1TO{`4;*yoLVH+n@NvzCEem*^WULTu-3UjPK4$lSCGTzgmVgLfv31*vZdf{6zOK
z=-i_mJ=4p>D6{(^L7+jB-i2sl3AQ70k}fo%HP3JTo8d$sJv%48+-|>?us%=#5*=v}
zRPcthf}2=J*l4P#qMX_$=V}9eUC0UUz1a-T)n(@wM+q$YxPgk|gX4Zu7cz}b#`<-F
zH9B>k+Pv<=fCy<}QTI!jcd!Re_=z}|9yGLikh}IFO(N}@SEqizX7e5mrV5%^zk0W)
z)IM6LUGJg#<qMw)nrd!&-c6>{2h^`Ks9(2tDwO__gSEpuqU0Ft$IBzWoeJ5L5Z3R|
zKAFZgWA1xTGYS1FW#)?Fj#l&x)s4D1%uAgVUtTF4;YMhITxWhtSrfP-LTPXnnPaH*
zymw7dhn)|6Q=h(b@v|S^x5#P44U2A%qB+r(RR~Gk?^T?YJY+Wa)k<^{cSm%md+ku-
zDYq);DUEwIOa2km&l9LPR&RgIb9|T3hA_UEQK|kl5C7*2_v%)is(a9U(-3<0hL_9=
zPGvM`P_s|fsuVU`cRHMYeKIlm_lB}25`wQ~7qu&LQKloexsG&`i^^60Ak#}5>>w_)
z)TA3AhD#lhs#>o5v<8}+{jUREu7SgDpL%td8q*Z`)P!~Dnvnj)sESDSFJNV?Z?G#-
z6di=3jYbtjRckzP!1Jx@iU3cfcFH<ZuxiJ<sY_Abo^<vGs%;>DBsrxBG@!sM%8gq1
zf-3ofb)2<nUtFx~u9+q+HA7=50SX|VlmC}MlOGuWP1=w?pMrg4^@*YkQfgrR!R@LF
zqZTt1Rwh#IDpGn?H*1b=2~ZXb^(UzHt6J|+PVDarEorDlSW$!mzVND?qJnuH=*jX=
z3JaSC^@g$3EKUltpH72MuYg*-|IjDwNcOFQ^lYc1lD5Z9Q_q-9J!$Ce@*Z;^e6J>`
zxhXCFm|v<o)=!jHZmBzB%xp^iwvw{HYnyUXXsSo+RKRyUd*bCPApwq(2E?Zq!LSRo
zI!T=)XpLk<2Zj=ETUA`Gxl_HtvNuf1(O;GeDxMTi+q;uZmqsZCPn~eKTGD@i74coD
zF8^2bS6+tb1EK9?0@<%AOD|KE{ddxUiEb5XX@>$Szps$2AT8}W(CAy=V?Y4`sDmxK
zLD~?zBE|+!VNd1W@?Zt-t`e7$H{=4=fd2Vo%%4AZ`4oNC7*fA<*0|{k*mqY|QIHh&
zsLRtLb%8qo&NOqaOA270l{E-x1*^P2OZP!}{^(`LwVw#Uj#B(nEh4+tVZU?+*IHBQ
z#0!16`huJl)wKHcA(iVVDi6}3*B=evp=_JP2GGmt_gSo_0jqe`q929H?Q<q<yXjYD
zzItp$F#v?LL8Aoi*J3y|-k-}Ly(U&=^di-eGS_;6_>%rWBlz3!Hmp=Y8ha6u28ss?
zjJ16ILszW7O&PWppg!IvgPTS^cE=O0ws_#`2T7%&QMw`%STHK5R5a>4RhU1w5M%vi
z*rTDvqt#bUqFM?Q^rD!jE4Y7!&QeuXq%l?zgZ_O-MqiKXiP{Hjs-}rg4ACJ3Fh!hF
zP}GXVr;O&_8p`<Uzx#fsAo|xZB})b4+(*|0L-ghHm*Kyjj(!ble58I4EM?t*^^})J
zF0BYQvHr%RUpAr_9HlzGP>D~yf-`Q^bC7<^`r6PlpIp^=`A4TugRH!mpeaa7k;cMI
zRRh$o&r{#er+?tVYRp9Lj;Ku;;hpztPGBqWf56DPcn1QPfe^&~lxa#Uut8IDkGtV*
zsbdtNlld<vgeTcTr{mobw}UA9(gUCO5Gv1mw1CZhRP8i1O$8U?|D#$9`e|%&BLv2%
z18z7fO}9V|#Q!%W`MN%rh8P3r15n`!q9X-W>3D^8%oS7xRcX2oWNP?3h54C!(>8OJ
zXEtpk)zf$mB2=%Dxq`@=6b$TQRs7U7A@wf)MU?tOZu#t!2tX!PoUenn+T<Zn&w=Lh
zWcv1}{<-~C|CHs)bR%J4I-0n1l<1i+&%ChHjf8TA_bt}K?{;sb$za(*fu_cAIX9IX
zdZmw%X@{O}p72ox8!6TPI;?ON@gBkBF!!O2AMRt5MQh4UNt*79p|ggLqF+Jvvx1Ti
z@JhoG)E^Et8B@)7<oJgil&4JjuHV<SDP2Y-I}IIh_KDylfG)>Ta*Az)WK^ABQIRTs
z6{BKyp6B`foJ02n-i%Mb@2im16_K7)hxr~<`=VWC+%JM(MSRXX)%$dJYuT=p^pkQ3
zVOUC(3vX(1N3Bq=WMB3;NmK2|l01>OXzyi@k}Bv^DV8Qy%BC}>d$2q_fh}VOeRhF1
za8jny4`#YdeE3}mYpiy0{X<eHdGJ0szwAX5E4d-vPxQ;1P87$B_si%I(gae&Mrc90
z&QsR;Gc(t_N8D%Q|AXGaIt8dcXH*NUKRZ|#c8cvAYW<#38#ZfVB_aZ~7>FdIzayw1
zCKsl~tmoNDa;?@EY`wE9k&}R$CVHqyR&y;C75&gfqD~}EN^z^+1D}&f^;7bdTuM=z
zA=iTT%-Rz7BSRfc8sZ-4dRH_msDeII7&j49S8gnj=uQgd{pj^Lu02$<-2oK}<$SJ?
zlqQKf!{R9=I6I9dO2VX2d}`0*0EJ~L`pu!LbCPC-B2;}NWLxysD52I_-*m0kO!|3C
zbCTnnklaxzyJk~Hper!j1$x3lqDYA{{&$(7q{*<uP#*u9qFR-hDk~ENP$$Mm+3+(}
z#QvsBU5HhaPUm08zbIAV_O7hUMok3D5>x2H$#}9#XHf9F%78zQg#XSB`t!(JrJBgT
zJh_32+(E%lRrFchf>ThHvSQEGu=Ejyp1XYgXa8Cl8kT_W6JP!!LoIz4{%e4WRsUyl
zt^Q$O50}5et;%IIq$Vt;=ETaX%fK8ZR8lhaf>k%4^dk6k{=5SZ&?43+vuYAnJ~V6M
zPgTcQKNqYqDqXMru=@6qh6>84IYVW2$h-5Ks=%r#``0yLg@)dJ!nL1Bx#R)N<FXQc
zO3|-&9h>V_lJgP@Vo!-itu8+T7av1Y3`V)>VC@{Q2Va1cNFzbD;<b`R7Df5Q_0)hi
z{=eCPuq6yj%YyqKzzVuw5NE_4s}!H_E3xX38UU*6U-JR+I&69snzXbm;z7`83RD1=
zMsy|Rl<s@Jb@8t{#Mz}@zo=7Nu%9u;i}4bJVm$bmK5+C36q$eslimM9CF-1peOg*@
zQ}zj)rSIw&RXqJ9O4SKcCx(z3-=;;ZO2i(ZPdc~h)KT*q-~HsP4!dXSa-V7rs-hbZ
zooe(cmslN-Rjp@KiZj&jl}S@ijn=<j^=~r{;r*o_K_ou*RJQVZ3b#tElQP9L+$r{k
zJCg&5m>Ox{Vq{9m@&yQ!jMDMqPtK|yhUepf=?9Ph#6u!U`djxd@O)6oOpCfO7*i@6
z;FA<T#$o;|$&`)@bjn*_9q-CQ^#NIx_*O}t?pFodt4N&G?pC$w=l5+8`$y{7s!g61
zjrTe<{-C+4a8{LMr3oWUYE`RP^s}x@u&tsrk~=u0t5k@apwrw3EEr$&u|ndqjx;^~
z#{znXSHQ`{!bG744LxIVWpSu_<5%Ioy0Z9FQ>9{ibFyUf`tQvmr)eONQ3SlKl5@K#
zFz7AWio`>uZs!)ok0}LkX#<?{(ywHS5e0a(*2xuCU%mn$QLGphSe<eqhSv&qwMqTa
z_h79ZdU_RxbqnB{o7(S(0q6lmrP@oH?-gdj>Zb|+<O5`5eoY|1=sMsaKX1S9gLwKK
zo_}@Y^Y7=U@!7n*-^Z6H%uHIo`jZbJ@|m&;rB&PeKQ;a!y&k>4ZpA~O3RXffQT=^Z
z7aua^)z5y5+WDl4>JzV%jyo2Me~49Q_4oQl{k(OH-`_79Km7^Th6n(SUm;MBl9oIG
z(8WdNEzkcQV$40Yq(l}Lc$<gd{GyWkp-SpfaXJPqyw<BS1yK%FoXR9r6_`Zsf+LjP
zs{1&s@+w2RzE(Ma23?|=i=+cNx$m>vx1e2QY3QX&YT7lh4J413S;(exe|)CE_OAGB
zztV-X>N+k-ac<QJMG>Ze)!%)wE;*}t{j)Nqvt>)Y$~;7T{c)dGP@>`LHaXM-hk0HD
zYRaoin?>qF8mhA#>_YHiluA_U3)asAthezWJ^m9v!$1E#{qy=y_$LfH{uw_nI{x%B
zihovKwcER?iYGOd10{ND1dS^e73>Jrxv3s#gdn-MbF+SZX1<p?y;mmu>03gq?aZ1;
z6H7>2*fr+T4~zj;!&cb9UB_4cx*wSJDO6(l)A9Cn>peTatB*ijed3ezC~Ha*rXdfY
zV=U#$bVUj{{>p=itbVQzmi**cHKc$|9tHQ{Bj8Lls$k6$L_(LYstQg9^td%?-&3vA
zkRn-$9DvdMvNC0@3Uw8gZ`D(z1oaNq<ddlCfdOEP6ad(>LHKO~U;HwpQnNyN1JKj8
z-*C`0GP}m3B#49r4U@}JTFAY!i&_I%xt!uj4|enHH+6iY^p8A+2d`5x_6wA5R(Uj_
z_$FSre+AX`<BoQAQ*W|E-<%^oV&3hqjJvrdHt{MqQCC2c*PkhznNrSEPf;L#^9KiP
z6-Twqd{Hi4B9E%T2+olfespk&dAJS~`*Y!*%@v7p`C~8Z-!_{_Is`PEsy(ky;8ZP{
zv_w4>Zig@Z4D|EX{|4#9J2F(%=l`0}afyo4;pDyNla7)^2axeykA9>QDN9aJY~q@b
ze*8&7zEk&?|4{ft9n(3Lag^V=@RJ@5ST$t4^gA{^r3&6D@Z(W+q5?AU&K4eiOAmd$
z<U>u=Q1y$iL)@yI&`lKtIjHth$+wCVCIw|%ms;tZ8>3B!HMiU4(1f(;(e@|fY4W`*
zLsRsBg!1r<97vr)O=Ql$(&;CY+7-3u#;US`O=Cyjm)Vs^Co)a^lhR8SE5o!!xw``z
zl?dWXDx*&t|2B;ALs?Geih^*e`PXpnyrXRA-f5C<O+4N@?ZMT3nMp%oQkhtXt2psS
z(bf5u#^;hf-&WYKZq-zswouE9w`q*{cyE%E>${)yy(=(ho4(@)eR8Eq@ey034LS5Z
z)og~<vgnjHD!u%lr#R`Cw+Qq;>ZjC4wcP}Jc3mASeOONe)k9{<R%PEu1kCm;s0%#C
zJ*#;C7a0B1e$~{Zs37yo(>^6R0XhB3-Rd|$Z<s>!X%y|q?Z52wr)D~|)k2Io-2q2f
zeWvwn!HOr}yiTfqX$PEdfkyQKun&{73IziG3aqfwM1pU^dQXQ7pY`$I$J)&4UW$A#
zb}feK>B@q1?sv|m9^m|;U^-=fUnXEXRe=4%-)fL{OP5**5;LfZjZxxV%?xzxvj;TZ
zS^tHle-#Pj^Ap^bmt0>_Dt%db*5%BvTsv1w;4i5I7zGtoIM-emUQ3uS*u%w7SoahN
zicf!N*YieY2SF;(W&5J_#}}6?c~?@-kLw}I)KNS2_#PAL?B^(^<kSvbaw?{PQVlIC
z$p05@D!xdm68Q)D*A=5uLgi_C0q(OQPr+7Gs<}@w?t&0KZJ`Uhx?=rN`jg^T$2p&?
z@kE#QK?=%3Ja>UwjMK_QQjwh26PR2>bHZuHzeoE%&McHW#CnYpE0_FZs~@;l3Wn&D
ze^mvVyq4VjvFcBof4n}EMjZ;!$uWAJ1t`XTC6QB*%JXLu&pHJ9NrOPo6r0#<w7v&3
z|2gr8iyz`c82964-(r{+b2ZeWP_rl9Q^V8x9G&x$v=F~+)IPaQ9h3W%iJXGe5{`gU
zN_bHuPH8_SMn?_S4iA9559u8^omiTJtFlQQoxkYK(L|_MAoaIk7LV0sqgE_R3{@nB
zk`GkR{y8?QX!5T&YFP_hT})77Vu>e$j<HUv#DM4X6Obr2-E#gf>t*eTQqQ|}g;Y|T
zl50b%S*~KxtFh0Wl$CBRQ)wc1mvFDRV21A0%`BId>~crtjPz$8X~e1%BsHq1_(jC)
zG;Z37Hd*37df9KNI2o|pY7#P}g|9-rpPjhXO!^J=bOU~Z|B8nR*S>J>$u0b)dhZTs
zhy#d)+Cqa!$9upPNXuN1O@#S%pouTr;RX=&g>Hl72dK%im%OTEQ)GZhg~AAn+In%S
ziu{VSNir`{)SN;6jCa7FKW_A36s4wAH2%49lBFD<)h5K&)l}eGKb-^RBlK_$UMEJ?
zQ<y*+gNi8!Sl<2Yr$;!ST(3a;fNm9*3BcSUd;iI#cvTb1i0LOG`~EIa;{AUL_vrpT
zJ#|l`9pAc>?$1e(a`IJmK2zlrz%nc@F+PF%gEv&^Vx|gCbp&f}HIpW1#qXw751)Vr
zrPn<bb+!jeuu5iq$0o4V&sD0@JuU4cyDF79i`$^)lWx}t-9FTcmUbSiiSH`fRJwSy
zWMNZ%vvy_E#Ho7<Gaf4gh0K@n>17J9YrV>C?AmL|9A?wvt}-?$`&EIal$FP;pGGJn
zP*<{PY=}xKoVrWdLc|M@xPJ<py6xhFPql_KWmr#oly?@t-v7PP%ULWcyTYPwV){5o
zNqPxl@%Rq#UT3A)QBP)Ai(p0NV5J5W0~K!$q)%sE%GYnIJs~Q#17zW>n5;oq$NG8G
zuh9d5MRmWZQ^QG7(+uln7hq44c!M?f|0Kr>lNEKWKa^pMPfNe%f#2H~m+#j-Spc>j
ziYi8Gu(r2J;`t2lICTB5u@7V3!D-HF=n`DS1}pK*RknRE*}g-t%`99rgM)9IW-Mx!
zO>OJpkXP$byKd!k7poS%;1H;#q;M4{!hskMpRE2CX<a^dPd$}EEnWkyCa4vl-^8eM
z26GFw;ga3cdf_{uNMKr>WvS|5zi;VOO>8SeKg_%<<58}qDJWcX*B@Cnsl9ehmem+e
z8S~J}&>QMmAMy-K`g-Y|-DiR2b}e4oB=#GjHkj2eeamgdp(epMB7+n*P@k>kxz-g_
z3Si>-YT5zIPHlu*ZwTCx`#FMYO-f+2KwZJg8FzC6wc#Du2tF~SzCb$3tyF<dS>YiI
zCoxy%h_tpL)v{qOXF!YR%K0gKRl}J>mvt;Kmon@l8+AvQwm`bng{9`rK`lt9L)EfR
z8x&MmLci?j>y9b?N@A{1sdjY)^M_#Oae(w#y~#Ahu!1cg8wp(rmn=3dgue5Q>TVT#
zQjkU6K_6~z2Lj9P+!{3QUH+|aRVhozu<GKe>XO!99qP}|ExS_({Z@NBpk5h17Te)_
zp|}Rp3<D*HPVs(YIM^j&?{OXk**LKe)v?xuaU5)5{x<mIUO^dG#|pu^&#xN>d3rke
z-nSdN;q}^)2w8ST(IPJ0#JXF@eAD6YgwhX3q3!h@yrte}Pz!I{(_yo>2IJm_uu``A
z_g38%31L{(#;hXK@0{-gx@=EF+DAn~3gs2HZH?VRHMi=1#Tn~I*H1J-dqsHnKcZ-{
zSk;#CR>(z-Ey&Lq<WEX!KaKB+$jGG{(Or$N>E{Ehu4`~|`!eisF*2z9=2DYj;ejO+
zNYs&{RfT6y*S=*hpR0{t99w_8y<?SXLOLjChmJOE-m$L(Yi{0--qu<7cVt2qn-+pr
z&<uG^tafK0rvm)Iynu$T;d-TTk9D0vb-Y_BOpPrQ%+Fnz`198H_QU#*@%g`o=Q$26
zi}7bJ%${KRmVJ)_E%&L*m+GlG>2%HPdlT&n+opckru8noJt9uFQ;Tk~bf)5&s{w^K
zpgeKc7HxQ|jj&I~dEV8;Y2r9e&J(AotAT7<L5bB5FrD1npNFTvQLnrt3Fl~wswOq#
zqPMY%75D$2`K9tLlt%UZ>kj?mvR8|)rnFb_Qq^!qq*m~<z{F)(R1P0ZwUjI&+^ogH
zk|E^=>sZ0R<U=CiOA0(YpX)p!8%CqWtU(q?vtLy~nan!aAvkNPlpP;z3S7?asy0TW
zHLDr3Dmt4M25S*mUZ8~fSwj7~{YncTpMRU1M*DR<yPp@xU6@Mj+^jpK_ROw|=iljI
zDSNA%^(z6!>h~{wVum;=9zE8XUlKDj(2nskr#n`LX%zFwxM?%IeZ0Nh@7Mq??tv$^
z2}0Q043mpBb>yzeEp|C>8B#tro^Atp169M~=?YXF=u=uqx0tozJy=s5F54EsQJYc!
zpZLF}WP3OM*B_d7LQ7QAln!#L+LSr<51bS79M2AO%*C|;vP+sCt{E48AqB9*&@9SP
z&ui{+UCvv<6wNM*I^JaFx;lnQn|8dzvQKBFm=>-}xfv$1<QT%6DF|5*d~-e)K>04f
zl>mQ;*nYH2p71H|18RNj7d=6-%gHvikC${DLKb~aG5(`g7irZple<B=F=K)oKlAZl
zo9(Xe`sx1k`|0UfKKZAor$0}-D07F449BEl3L}|asv9pDMYqCu0CD+Yr&|QV@J~A4
z1o!*+`)O_$*tev%I~eyk7}zNS4f9OGpme4;-vXWNSblE3`I`dP=^VYIhvZ2=t?z+`
z{A={{2(~u|qvIU4S~To2NKQ%Z2FSO2QG$EI4dDILzk{0dn$WWEku7xbE@CGtVolwy
zndOjxe9r6t^HrRtY}UU;|4TzLe!NE?<|*9mu;w`_cY-;ixKqbOho0kHcn($S$g;Py
z+i61k=KHcGqgI05HENh`I!7-G6o<k0A-?+Dq8h7j+3DXb`)1*_#ZhuPo>O;!a$Rsw
zzn=6g$kq27i|*?%%VOoB7Kr(APj{3n)J!tWiQho4k?z_uK2AZt4<>qnf9|9CW8L^3
zMxwSNZ;;gZ63$aG6IsRSOPtRO%5QWCe&zHU0`2dw_?x73@r@1p70=QAx9Xq%m@#z9
zjOtJSIvO?JpPpXte<02NH1{%OF_ZtMRxl~CsBrLi7Sx<9=-0+fiJmZ}c!^@s`N1*4
z%JZvImH9H|I0tMZRztotR1tosh5Xzu<g)ouhWt$f%&$ul!dDPVz0XpHkPbv@LF$y?
zSFXj3<aTDYl3kGT^CxbuKkB{j91cZ5@qbQ{u0ZB{M(y=|lAH#HzhjgNf;!u})L)+m
zCnDo}b7T1XSM-Jc$i57X&Q5M=M`3uzSt?1gbSUm0+VQi%IG@_@pXGhI-8mYY?!frR
z)6@8p2I*d2&g{6?Ka1WT)I~_zE8f}hnLgc*Ut5TU8O87#w_v6td>TzWu?V>@no)J-
zHSH+=L5r7zq*q}^bGX#>=Iz|lbSj3?5B~i<UNJAf@Avyt{O0}Mb>91K7~ap$jL<tV
z@Rrw;y?KWD5vd2&Z?B&npyc0#^mfoJYUs+Ocw5Q2xTPnC7a1L2iMgqcFE}Ap2gcm`
z5X;Ga8V5N%y^b&AeX2iGsr1y4WjyAV)VC#VT#MqZz*uyF?|EXyLf}A82a4Z0Z1_x%
z3WNC9UoJjFe?>pfzV?5;3afs<ieE8|TPR41M`RB{bj5boGGqApo=?R7q4>Eyzx^`m
zt^XtLVNfc#LAYQ{=k7|G;qA$YKW6v2UF|rz_FYX}$QbFxE1C#9_zie=ET-k0KlnG8
z9y|KrI{EWDVCLmdh|gW%L$3#M!5-o99*ZFNu=&Z*CiRiFoD4l?9$}5jD@h36={NH%
zMAOL-h5xd{$yQ=rn51$HBX$kD2AeMG>73w;dmwU=8`m|id!RAxF7PvKkNno}v_-cO
z7InGyDC*<n{v8hn12|PSPTh!A*2*m#lqQHU_hhyp_qON3rA=0x;(thpM2A)XU%PB<
z2fkQY6SAC{K?Vy)LZi!>dDfmEb)WJSrz`!F@)X%-Z*OD2TMyVh>g5Bt4O7=ReTnm#
zq5P)w)c2ALAd4=Z1dR+l)9p9(V>iKy2Pn{37HoR&*Pb;201q%27lP%Bxyh4Mj1>R(
zyD_|t=FOxRPLTWR4LPwPnQwP7PlI{#TcfLBd(x^l#mf;EBZZVGS<b`P{QZRZi$I3I
z<Fo71yq|L^?KuCz`>29E0KguOz;{!a?eukOCrh7a`y}EYcdPC6d6Lt2XS*pJ?Iu*0
zX$p~;DE`e#4nuVC;)?x_izY<8-|_}x(k?A4DT~Vu_gXx$MCWa}oP&|;d{+(N?4zva
zxuy$l7}S$$K!`WE%*JTsH{(0u(Z=mu;9ltg%-zsqHss+azHon>orv-GJ?Aysc;W1E
zI={s)jZP}q{8UbG>65G+`+Vp3j7o=3yKMy5^KiM|C8F;f&_xn`0p`raC{_AZCN`hk
zjWH>skcfuw!|*~+#%9)RZ(mAZ21`4s*kn$)_(&B8BE@zbWk(8`9OTY=r%U<!Yi`x|
zaWneFgOGORAe|hXdV2bEw&E-InP)UHxEXzWkc<|)QkOjFKfBK0e5NMC=9#i5_m9j=
z{`U6gPjtP`VovP}*HyY=_(eL`M|YT*(AS3CqWs)FIH?Ki<kS1#FWs--rzqu)gT2o&
z#g30>t;8Ul%B7*73Gn9RAX=Ha+g`F*>%r%&yG3MATk*JQOWwwOSD1H!N|cD*p`P#r
zlUl~^fB*eA<%C^7+@rg&BQn2B-?qvxt)=Bx)Y6MlqnoxcOD8W|gd?O3=;uwnyu9SX
eI|}e)>LY$VF2l=xso6QK|NjFqE)HdPaR>l(AaSh#

diff --git a/tests/refactoring.txt b/tests/refactoring.txt
index 75db3c7..bc7d086 100644
--- a/tests/refactoring.txt
+++ b/tests/refactoring.txt
@@ -57,7 +57,7 @@ overlap() -> getOverlap()
 Backwards incompatible changes:
 ================================
 
-1. Empty cigarstring now returns None (intstead of '')
+1. Empty cigarstring now returns None (instead of '')
 
 2. Empty cigar now returns None (instead of [])
 
diff --git a/tests/samtools_test.py b/tests/samtools_test.py
index f0d52c9..7c40237 100644
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -18,12 +18,16 @@ import pysam.samtools
 import pysam.bcftools
 from TestUtils import checkBinaryEqual, check_lines_equal, \
     check_samtools_view_equal, get_temp_filename, force_bytes, WORKDIR, \
-    BAM_DATADIR
+    make_data_files, BAM_DATADIR
 
 
 IS_PYTHON3 = sys.version_info[0] >= 3
 
 
+def setUpModule():
+    make_data_files(BAM_DATADIR)
+
+
 def run_command(cmd):
     '''run a samtools command'''
     try:
@@ -93,7 +97,7 @@ class SamtoolsTest(unittest.TestCase):
         # Samtools-htslib-API: bam_get_library() not yet implemented
         # causes downstream problems
         # TODO: The following cause subsequent commands to fail
-        # unknow option
+        # unknown option
         # "rmdup -s ex1.bam %(out)s_ex1.rmdup.bam",
         # "merge -f %(out)s_ex1.merge.bam ex1.bam ex1.bam",
         "reheader ex2.sam ex1.bam > %(out)s_ex1.reheader.bam",
@@ -242,9 +246,10 @@ class SamtoolsTest(unittest.TestCase):
     def testStatements(self):
         for statement in self.statements:
             command = self.get_command(statement, map_to_internal=False)
-            # bam2fq differs between version 1.5 and 1.6 - reenable if
+            # bam2fq differs between version 1.5 and 1.6 - re-enable if
             # bioconda samtools will be available.
-            if command in ("bedcov", "stats", "dict", "bam2fq"):
+            # flagstat differs between version <=1.12 and >=1.13
+            if command in ("bedcov", "stats", "dict", "bam2fq", "flagstat"):
                 continue
 
             if (command == "calmd" and
@@ -401,7 +406,7 @@ if sys.platform != "darwin":
 #         # "filter -s A ex1.vcf.gz  > %(out)s_ex1.filter",
 #         # exit
 #         # "gtcheck -s A ex1.vcf.gz  > %(out)s_ex1.gtcheck",
-#         # segfauld, used to work wit bcftools 1.3
+#         # segfault, used to work with bcftools 1.3
 #         # "roh -s A ex1.vcf.gz > %(out)s_ex1.roh",
 #         "stats ex1.vcf.gz > %(out)s_ex1.stats",
 #     ]
@@ -415,9 +420,6 @@ if sys.platform != "darwin":
 
 
 if __name__ == "__main__":
-    # build data files
-    print("building data files")
-    subprocess.call("make -C %s" % BAM_DATADIR, shell=True)
     print("starting tests")
     unittest.main()
     print("completed tests")
diff --git a/tests/tabix_data/Makefile b/tests/tabix_data/Makefile
new file mode 100644
index 0000000..22e5f55
--- /dev/null
+++ b/tests/tabix_data/Makefile
@@ -0,0 +1,7 @@
+all: all.stamp
+
+all.stamp:
+	touch $@
+
+clean:
+	-rm -f all.stamp
diff --git a/tests/tabix_data/example.bed.gz.tbi b/tests/tabix_data/example.bed.gz.tbi
index a529607bb4551fa61b3ed359b8e33c2d9e92bf5d..cf79b95ae232e068968aa9d863a03164350253bd 100644
GIT binary patch
delta 40
tcmX@WxQ|g#zMF%E0R;ZDF*rFhF)W-Y*u?F$xj9IRjX~6Z|CNag6ac@<3f2Gs

delta 42
vcmdnTcz{t*zMF%E0R;ZDF*rFhF)W@a*u=X?XfL0oK?4JWtJ+@OiSrZy(P;{_

diff --git a/tests/tabix_data/example.gff3.gz.tbi b/tests/tabix_data/example.gff3.gz.tbi
index 855e13926d35975f3b8d38b5cba65228dd4ecd00..d23afbb6fc39c07e57879c620a1b4b113dd33f9c 100644
GIT binary patch
delta 1388
zcmV-y1(W)*3$6<hABzYC000000RIL6LPG)o9FY+@f1^Yf5k@y5mf}Ssc+thK!~#PE
zA$L)>_5FNcI}Bv=ob8;m_kr=+_j{f_&+q@)&Ytr;-7OvY`FZge@zLS0g5j^#%8h)#
z|G<&Dyq=M0d2DpOBA$G2Z8)ns^0!+ezOW<waAoAnH$;3-&MA!i(42@bJ{Hd26#1`S
zium0<f8ocakw4!U@v)|Gp0@XPdtTP_iOBz?T70r5T=0D4@3uvJsXaWtQ006?+ZTkN
zslKjfS7lhZF7k(WM%=b5ELs(L*YniIu&+GwwOiF*+rkq%u8+5OcKEsSj;qv8{mMIb
z`<+-4`9n)0eqm|&h1$9A@rd`&4NoqO{OOX2e~+$IzBuxZ+gF8OsvTValBzJFc5wYm
z)UKsEuKU%;FRqDve`~~Uzth^k_j^Z{EmQlb|6VH#zg9oG|4wy;6-AM6S`hL4r^0X6
zM1J3vh}{nZi&UQZ5g&L`<yjH=hV>E8tqrR*4*zV3*yFLfB=YY+7jbueSh-Q{*%|Tf
ze;32EY9IGU-4o$@mEYwalr`Fqf4+3LeootayKkRXeV>i|HI1_~+r#fPP6~@7z9Ba#
zf4L~)lZ(T8^{@NY^QNvc@(Im%&-=65kH?>n^Ml&i?d$$&&^TQ8OvHIB^t|Tpt<s1+
zkAKwuUGDD_%4__1-27Y}Zr1qmxT!DKe{q#}T%h%=S><s#uDU#0XS}|Yy`uVRz8~2X
zaqH%=adqUqPI=r7tc`qcbHr}Hi>jCF^U<=fUH$8R{a|7E>*B~)EQ)wYc4|NFKd%Ru
zw7s`;KVMe)U2c!-POUS399Ph#{kR;j7KR;aAGgc!i2Q#<cYZp&*5!?UME;Guf3c6>
zG#@+<?#o|PAGb%(@~~UwbGdqSkO8d=UJpF)e%HF@^(>)rwO8Z7{qNVcL9N$br#+7b
zHLpCM+>fuT9lhRrJiMuK@A_Ps7rv!-bo;p<Z)zNS{Pxv^e`q~AR~NC@nVZUcee&}M
z^?K~rVXwCbwT^iG_*?#^yyGiff8imuv)k9NkGIvHZb#44x3vy>{qyVoJ1VE^<8}3@
z=9}l4Uypj#Zf-BH&;MvVcpP}$_*dn3x&8Y?zUKYthsR8+blc>b?(3PZe>a<~$-l!5
zA!6F~-yXT9|CU0;?APxBWQzV9499vO5i!H{dnVJZ-?PEY(C@uZmwwM5e-YEK|A#nP
z2b1*wpt4Q>uL=<n5iwbxi;*k(oDxJF=iE0$d}w{HX}0KdViBi8p9`NY`keg-c_T&o
zocC0le9ky#wmz2|b5nn&W};rjtkR$1z=`S4Tp}VO;>^*XO~#p_KRcbN-a<^HzNQCa
zCg^JnAtGj*zQ$FinS6~ce;nWIh=@2wUwai1F;n!lkkh5Fh5R7zWVyc9DW+CmYZVa@
z5fKqF)%u<eIJNrTABdw(zE=$5c=}#Fh!fHGc0xo%M4WE#AtFw(zV|O;I`zG!5!0dX
z1)uBsUiXMI(esUnGtXm9h=@2H?%RThh=_=Y>9Te!A|fIpA|fKDf67_g5L4$hL_|bH
zL`1|1cxF3hqrJ_Dh=>nwq6HCga^6NnL_|bHM4Un|>_EiH+TV(Zh*Rm|ors8tnC-Uh
znw%wVx#3+zL_|bHL_|bHM8wSXKc;m(NdW%<03VA81ONa4009360763o0CWKD#X$;0
zPyhf>XHjz&jE`}dPARuhZll~r*(zl-*=WiP-rKxrOrnKt+qP}nwr$(CZQH)yeQMk1
uy3MwKKW*RrkZ&Ic-{Hx&KOTzzxGTTAsrFsH{EHv;v9D(G5}Z|&kp&<d$?S#z

delta 1391
zcmZ9MSx}P)6ou0&E;u?wDpXkl!L2H&MFeCOBD5kXi-=0dR0@&Zm_PzqeoK``0W%H-
zL?x+9Rknmh0wY;~M4+MsWC?o|2%x`6NCF{yQKv8c=FWY&=iW2t<uW(aY=FXY*R6(p
z#<&LJ=l?Cl>K4=;h(%aJ16=KkCXT@gJujHT)&5uQ@S*HQJ<0E6b|xE}(KV!PCaCpv
zv`C=^rrkxE^l>(`wKbcNr+zzovS(IwS=wgc5=Fn|Wb<k+Vv=!Xd+4O3IJc_9a`6b<
zo4bRAPng;(Mv%h0cHh)><WWVQ--Vzo)gpTC@@pI5omP6wr{8+ve$k~OwkpGeE>>Lc
z09fZp)$n@?+!QcRT~0<52+Wu2EA0KIK4D~}i2k9PaC%CWmfskAKB{(~oH<id|IRmV
z3Un>!9=;Zci-x7mC}puy8}ZZ9HHG+(OCGg$a;*hI_o`K9Uax2tK<DXOZIR!rC*z<1
zEV`VOFlB>x+Te5AqtOuO@{8ZxTTO8pvNJteRY60YvsAY%QO79l^}xI;r!wl%QmBy|
z0i`f0IHL_{T5%jM`El385bYYOlB#2Fgf_u)rpr9*9L5z-U)F7x2Uj@I&%y>STF0c^
z7@VwO?~|wbY{8_(MsXwOhHMs=0JNYW5vB?hIJ`?Nlt^uh(uZjPpVUM-IYMk=*`6UY
z@^r`Y>&)*8SS}$qf*g4J#Jd6J_KAlILBJwQ+pl<h|A>73YrIHXFidM_&=gg}Lvx@Z
z;f-7LB0^{qq~uJ`*UPD1G=8rE`6+Gl%Oo9PRCmgXDPCS^OsMPwFopQ&M~juGr5Jao
zM$dC-4ljc9mnM-1i?YOMHPxA!Q{0fu(T!DkBp^wc%6Bd=1{LEEs>Aglg^W&J?$vdi
zd7Vo@WF+}~@?ot-*K-@NK8$cRGL;6VRyk9wBfB5!8?E<S{DDk_HFVBWb!Y@0aC<(m
zWz;~`?KJ`L-jo57%(HO^5J4WbCFcra3PsWny+?DNIT5^M9PKgij=1h>TYpIYDit3k
z&Z&<fmM(d28{o)EmwkAZ8Op5r1zC9;7sOH`m+uVs%*{|Z4=^7WJWg2}$`@xlbEw>|
z=+JSnfFY?6$MQE3xnmXSLa-esca_>O=7II9veGdgh0I|t;XJbFJZ5?~$Uvzir`(Y+
zR>fKuiKj@H)yK-f5;Mt$@X40RX0Q#lhu=BJhnc#}_b$i=J6*@b;YiwIM6ls*7f@$^
zL1X<-xE}Zc@rtac|HPhE!DS(OAPgZ)UW_c7RV5$rP!iDve@hgBLL~@@2EzIoUORy`
zB}p1*X9NNv{FUq!#GAz)H(TJ(bl6WU>s8iotb1mJ@OVE4v2?HIo}Ty~t7!+Nhe}AB
zyy6?Yi>(yljE4YU<KcGODyI2QB|gRs3dMYwO>%tO*DX0Jiqw~;3UP#u2XmG6ab{3#
zFv`kt<OFZpVjKyeh^A1eAc#sm`<P`?>O{_$65l87m??KX;HVj{p?<-<YX1F|l73aU
zFK29u*kU&xEb(8rKv8l_SA3wm&P$RuLl1_JBVJwMH5|HdMCsM3KuSPc%$}_J^_UNt
z<y%)o0)NLZw{AP(Q+!uqeolJ5mI63T2;Ec6;a`|As^HU)b{%$r!*5CqkqBYmiD&j-
zNAX=C^>*!(20QqLoi@pVE6dS%<3QiX!6H~uy4_dJ=N``mR)x7Y;~+)|<p2MU0GWGn
z)HK@{0%_L&Y^JoR8en^eIw-ALB&+oSvTeBYgSqSCk6=qKJ#8&c8Dald<6Xa}7>8KT
vNq9C_H4>yIEZTYpjC8@y$eT*=z}+QhwdE6SEyLDc@eSO#<x{~;6T5!_w{`AA

diff --git a/tests/tabix_data/example.gtf.gz.tbi b/tests/tabix_data/example.gtf.gz.tbi
index 6e4fb0bfea07421dacec207d3f59c87e0a3a76c4..aa5009d670651dcf2d21e9c62982396099705b52 100644
GIT binary patch
delta 21
dcmX@Yc!Y7nOzu^(Q!W`WFwD`9TR3rv0sveN2s;1(

delta 21
dcmX@Yc!Y7nOzzT*me~pn41R0P&rMvS003G22rvKu

diff --git a/tests/tabix_data/example.vcf.gz.tbi b/tests/tabix_data/example.vcf.gz.tbi
index ddb120e44132d17fd5b151c9c6af8351dbd22398..97c80efec315040f37a7d0c230ce868d17739b1b 100644
GIT binary patch
delta 46
zcmdnSxP?(rzMF%E0R;ZDF*rFhF-)H*SR{Dyt;|yQOo17mioDDW^Ov&RnK(@W01jjg
AH~;_u

delta 48
zcmdnOxQ$UzzMF%E0R;ZDF*rFhG0dDOSS0NJGEiz}+fv1%moqauWf?jzoDZHjMF9Xg
C><_{K

diff --git a/tests/tabix_data/example_badcomments.bed.gz.tbi b/tests/tabix_data/example_badcomments.bed.gz.tbi
index 04631805ed33127ce32fccbbfe15d90115e442f4..0ab947f612fe56e7947ef2af0285085eebdba6dc 100644
GIT binary patch
delta 50
ycmX@ac!+VrbnS(=95s2F8NTk^es>=u1A}}w2MYrT{AXiua%N(XW?%+$K?DGiPYW*q

delta 50
ycmX@ac!+VrbnU=)w;h~J44%8b9Nx#sz#!kv!NLFn|JfLvoS7J;8JNLb5CH&rR|=B=

diff --git a/tests/tabix_data/example_badcomments.gtf.gz.tbi b/tests/tabix_data/example_badcomments.gtf.gz.tbi
index c7731fc26fc6d9d607c81bbae0a274f1e775cc29..16fb1355db063d4ecb95544d85244910b42d1f89 100644
GIT binary patch
delta 21
dcmX@cc#LtvZ0=REQ!W`WFwC{xTQzZ+0svkd2#Npz

delta 21
dcmX@cc#LtvZ0^#Gme~pn3_A~Z@J(E%003Nf2uc6|

diff --git a/tests/tabix_data/example_badcomments.vcf.gz.tbi b/tests/tabix_data/example_badcomments.vcf.gz.tbi
index 366004b49e71da8b701f63febfc6b5e4a5930b21..38f4b591f25cea7ada5d617a324070f784b2cfd2 100644
GIT binary patch
delta 46
zcmdnRxPwtpzMF%E0R;ZDF*rFhG0dJQSSEP!t;|yQOo0{4C$up#`2Bk|d*Unw03wbL
AO#lD@

delta 48
zcmdnNxQkIxzMF%E0R;ZDF*rFhG0dGPSSIZLGEiz}+fv1%OqZx;28P=~+A0%gC;$K}
CI}OnQ

diff --git a/tests/tabix_data/example_comments.bed.gz.tbi b/tests/tabix_data/example_comments.bed.gz.tbi
index 42544b2390d8b8a725b4fd64161c47dff4d6ff36..89b1bb3384da9062c4055b64eb115855b3170981 100644
GIT binary patch
delta 51
zcmX@ac!+VrG@Y(A_N68~3=DTU*FW6H$iN`q&B4L|0{__<oSc~$q#2mOTo3^Obbks;

delta 51
zcmX@ac!+VrG#xKhsfTJD3=GO!Td(h9WMGi*=3rp}f&Xj_PR>jW(hSUCE{FgCUjPZj

diff --git a/tests/tabix_data/example_comments.gtf.gz.tbi b/tests/tabix_data/example_comments.gtf.gz.tbi
index 2f33d40ff588ca01095f089f0d184bd4aca2cd9d..54f5389f960f8bb24e6375befeb007509d2bc953 100644
GIT binary patch
delta 21
dcmX@cc#LtvZ0=REQ!W`WF!cQGPoB6;0RUj*2=o8|

delta 21
dcmX@cc#LtvZ0^#Gme~pn42KpoY@N7F0RUU-2vPt5

diff --git a/tests/tabix_data/example_comments.vcf.gz.tbi b/tests/tabix_data/example_comments.vcf.gz.tbi
index 366004b49e71da8b701f63febfc6b5e4a5930b21..38f4b591f25cea7ada5d617a324070f784b2cfd2 100644
GIT binary patch
delta 46
zcmdnRxPwtpzMF%E0R;ZDF*rFhG0dJQSSEP!t;|yQOo0{4C$up#`2Bk|d*Unw03wbL
AO#lD@

delta 48
zcmdnNxQkIxzMF%E0R;ZDF*rFhG0dGPSSIZLGEiz}+fv1%OqZx;28P=~+A0%gC;$K}
CI}OnQ

diff --git a/tests/tabix_test.py b/tests/tabix_test.py
index c17f7ff..7546175 100644
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -11,15 +11,18 @@ import shutil
 import gzip
 import pysam
 import unittest
-import subprocess
 import glob
 import re
 from TestUtils import checkBinaryEqual, checkGZBinaryEqual, check_url, \
-    load_and_convert, TABIX_DATADIR, get_temp_filename
+    load_and_convert, make_data_files, TABIX_DATADIR, get_temp_filename
 
 IS_PYTHON3 = sys.version_info[0] >= 3
 
 
+def setUpModule():
+    make_data_files(TABIX_DATADIR)
+
+
 def myzip_open(infile, mode="r"):
     '''open compressed file and decode.'''
 
@@ -1239,5 +1242,4 @@ class TestMultithreadTabixFile(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    subprocess.call("make -C %s" % TABIX_DATADIR, shell=True)
     unittest.main()
diff --git a/tests/tabixproxies_test.py b/tests/tabixproxies_test.py
index 7ad7db0..1806909 100644
--- a/tests/tabixproxies_test.py
+++ b/tests/tabixproxies_test.py
@@ -5,7 +5,11 @@ import sys
 import re
 import copy
 import gzip
-from TestUtils import load_and_convert, TABIX_DATADIR
+from TestUtils import load_and_convert, make_data_files, TABIX_DATADIR
+
+
+def setUpModule():
+    make_data_files(TABIX_DATADIR)
 
 
 class TestParser(unittest.TestCase):
diff --git a/tests/test_samtools_python.py b/tests/test_samtools_python.py
index f30ff9c..da4d332 100644
--- a/tests/test_samtools_python.py
+++ b/tests/test_samtools_python.py
@@ -1,7 +1,11 @@
 import pysam
 import os
 import pytest
-from TestUtils import BAM_DATADIR
+from TestUtils import make_data_files, BAM_DATADIR
+
+
+def setUpModule():
+    make_data_files(BAM_DATADIR)
 
 
 def test_idxstats_parse_split_lines():
-- 
2.30.2