New upstream version 0.15.4+ds

author Michael R. Crusoe <michael.crusoe@gmail.com>

Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)

committer Michael R. Crusoe <michael.crusoe@gmail.com>

Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)
author Michael R. Crusoe <michael.crusoe@gmail.com>
Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)
committer Michael R. Crusoe <michael.crusoe@gmail.com>
Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)
diff --git a/.travis.yml b/.travis.yml

index b30d9b8cfdea0c11e829231d84aace2015cd5d4a..30c7c9777397dc093587fe0c72b3a7e7523ad4e5 100644 (file)
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,11 @@ os:
  
  language: c
  
+stages:
+  - test
+  - name: deploy
+    if: tag IS present
+
  env:
    matrix:
      - CONDA_PY=2.7
@@ -12,12 +17,26 @@ env:
    global:
      - PYSAM_LINKING_TEST=1
      - TWINE_USERNAME=grepall
-    - secure: 'OcwwP8/o21+SGW0UVAnnCQwllhGSCq2HJzpI9EhX3kh6J9RTkyx/+drkg45bx1Z5u8zymuAFappEYzlpzqZE886XezkjOYGVa/u+Coqr1oT/BEJHFCkCA4o26yESp7Zy8aNj/juhB7Rfa77pIDXBayqTzbALz/AURMtZapasB18='
+    - secure: bTbky3Un19NAl62lix8bMLmBv9IGNhFkRXlZH+B253nYub7jwQwPQKum3ct9ea+XHJT5//uM0B8WAF6eyugpNkPQ7+S7SEH5BJuCt30nv6qvGhSO2AffZKeHEDnfW2kqGrivn87TqeomlSBlO742CD/V0wOIUwkTT9tutd+E7FU=
  
-_deploy_common: &deploy_common
-  if: tag IS present
+_cibw_common: &cibw_common
+  addons: {}
    install:
-    - python3 -m pip install cibuildwheel twine
+    - python3 -m pip install cibuildwheel>=1.1.0 twine
+  script:
+    - set -e
+    - cibuildwheel --output-dir dist
+    - twine check dist/*
+    - twine upload --skip-existing dist/*
+
+_cibw_linux: &cibw_linux
+  stage: deploy
+  os: linux
+  language: python
+  python: '3.5'
+  services:
+    - docker
+  <<: *cibw_common
  
  matrix:
    include:
@@ -25,11 +44,6 @@ matrix:
        os: linux
        language: python
        python: '3.5'
-      services:
-        - docker
-      env:
-        - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && pip install -r requirements.txt"
-        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
        addons:
          apt:
            packages:
@@ -37,28 +51,36 @@ matrix:
              - g++
              - libcurl4-openssl-dev  # for libcurl support in sdist
              - libssl-dev  # for s3 support in sdist
-      <<: *deploy_common
+      install:
+        - python3 -m pip install Cython twine
        script:
          - set -e
-        - cibuildwheel --output-dir dist
-        - python3 -m pip install Cython
          - python3 setup.py build_ext --inplace
          - python3 setup.py sdist
          - twine check dist/*
          - twine upload --skip-existing dist/*
+    - <<: *cibw_linux
+      env:
+        - CIBW_BUILD="*_x86_64"
+        - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+        - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+        - CIBW_TEST_COMMAND='python -c "import pysam"'
+    - <<: *cibw_linux
+      env:
+        - CIBW_BUILD="*_i686"
+        - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+        - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+        - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+        - CIBW_TEST_COMMAND='python -c "import pysam"'
      - stage: deploy
        os: osx
        language: generic
        env:
-        - CIBW_BEFORE_BUILD="pip install -r requirements.txt"
+        - CIBW_BEFORE_BUILD="python -m pip install -r requirements.txt"
          - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
-      addons: {}
-      <<: *deploy_common
-      script:
-        - set -e
-        - cibuildwheel --output-dir dist
-        - twine check dist/*
-        - twine upload --skip-existing dist/*
+        - CIBW_TEST_COMMAND='python -c "import pysam"'
+      <<: *cibw_common
  
  addons:
    apt:
diff --git a/doc/release.rst b/doc/release.rst

index ee1875b1b4241bc37305172b5928e827dc5cd9d5..07d099daacf0882635fda54610c1512a45f48351 100644 (file)
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,26 @@
  Release notes
  =============
  
+Release 0.15.4
+==============
+
+Bugfix release. Principal reason for release is to update cython
+version in order to fix pip install pysam with python 3.8.
+
+* [#879] Fix add_meta function in libcbcf.pyx, so meta-information
+  lines in header added with this function have double-quoting rules
+  in accordance to rules specified in VCF4.2 and VCF4.3 specifications
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#875] Bump minimum Cython version
+* [#868] Prevent segfault on Python 2.7 AlignedSegment.compare(other=None)
+* [#867] Fix wheel building on TravisCI
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#799] disambiguate interpretation of bcf_read return code
+* [#841] Fix silent truncation of FASTQ with bad q strings
+* [#846] Prevent segmentation fault on ID, when handling malformed records
+* [#829] Run configure with the correct CC/CFLAGS/LDFLAGS env vars
+
+
  Release 0.15.3
  ==============
  
diff --git a/pysam/bcftools.py b/pysam/bcftools.py

index ab891d4baf1704c43f9035f7cbe8e14240daf998..9f4ce0e2aacee378adf61279da5121ee95eae4df 100644 (file)
--- a/pysam/bcftools.py
+++ b/pysam/bcftools.py
@@ -1,4 +1,4 @@
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
  
  BCFTOOLS_DISPATCH = [
      "index",
diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx

index 02ef5b679902d24e017e8bc776d0e926ab5bb0bd..9e298f1c1096387ddb66593bfc219f4395208fb6 100644 (file)
--- a/pysam/libcalignedsegment.pyx
+++ b/pysam/libcalignedsegment.pyx
@@ -133,7 +133,7 @@ cdef inline uint8_t strand_mark_char(uint8_t ch, bam1_t *b):
          else:
              return toupper(ch)
  
-    
+
  cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
      cdef uint32_t c
      if p.qpos < p.b.core.l_qseq:
@@ -143,7 +143,7 @@ cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
      if c < threshold:
          return True
      return False
-    
+
  
  cdef inline char map_typecode_htslib_to_python(uint8_t s):
      """map an htslib typecode to the corresponding python typecode
@@ -223,7 +223,7 @@ cdef inline uint8_t get_tag_typecode(value, value_type=None):
      """
      # 0 is unknown typecode
      cdef char typecode = 0
-    
+
      if value_type is None:
          if isinstance(value, int):
              if value < 0:
@@ -341,7 +341,7 @@ cdef inline pack_tags(tags):
      """
      fmts, args = ["<"], []
  
-    # htslib typecode 
+    # htslib typecode
      cdef uint8_t typecode
      for tag in tags:
  
@@ -394,7 +394,7 @@ cdef inline pack_tags(tags):
  
              if typecode not in DATATYPE2FORMAT:
                  raise ValueError("invalid value type '{}' ({})".format(chr(typecode), array.typecode))
-            
+
              # use array.tostring() to retrieve byte representation and
              # save as bytes
              datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1])
@@ -416,7 +416,7 @@ cdef inline pack_tags(tags):
                  typecode = get_tag_typecode(value)
                  if typecode == 0:
                      raise ValueError("could not deduce typecode for value {}".format(value))
-                
+
              if typecode == 'a' or typecode == 'A' or typecode == 'Z' or typecode == 'H':
                  value = force_bytes(value)
  
@@ -427,7 +427,7 @@ cdef inline pack_tags(tags):
                  datafmt = "2sB%is" % (len(value)+1)
              else:
                  datafmt = "2sB%s" % DATATYPE2FORMAT[typecode][0]
-                
+
              args.extend([pytag[:2],
                           typecode,
                           value])
@@ -924,9 +924,10 @@ cdef class AlignedSegment:
      Parameters
      ----------
  
-    header -- :class:`~pysam.AlignmentHeader` object to map numerical
-              identifiers to chromosome names. If not given, an empty
-              header is created.
+    header:
+         :class:`~pysam.AlignmentHeader` object to map numerical
+         identifiers to chromosome names. If not given, an empty
+         header is created.
      '''
  
      # Now only called when instances are created from Python
@@ -957,9 +958,9 @@ cdef class AlignedSegment:
          self.cache_query_alignment_qualities = None
          self.cache_query_sequence = None
          self.cache_query_alignment_sequence = None
-        
+
          self.header = header
-        
+
      def __dealloc__(self):
          bam_destroy1(self._delegate)
  
@@ -999,6 +1000,10 @@ cdef class AlignedSegment:
          <,=,> to *other*
          '''
  
+        # avoid segfault when other equals None
+        if other is None:
+            return -1
+
          cdef int retval, x
          cdef bam1_t *t
          cdef bam1_t *o
@@ -1021,7 +1026,7 @@ cdef class AlignedSegment:
  
          cdef uint8_t *a = <uint8_t*>&t.core
          cdef uint8_t *b = <uint8_t*>&o.core
-        
+
          retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
          if retval:
              return retval
@@ -1072,7 +1077,7 @@ cdef class AlignedSegment:
                  raise ValueError('sam_format failed')
          else:
              raise NotImplementedError("todo")
-        
+
          ret = force_str(line.s[:line.l])
  
          if line.m:
@@ -1088,7 +1093,8 @@ cdef class AlignedSegment:
  
          Parameters
          ----------
-        sam -- :term:`SAM` formatted string
+        sam:
+            :term:`SAM` formatted string
  
          """
          cdef AlignedSegment dest = cls.__new__(cls)
@@ -1101,7 +1107,7 @@ cdef class AlignedSegment:
          line.s = _sam
  
          sam_parse1(&line, dest.header.ptr, dest._delegate)
-        
+
          return dest
  
      cpdef tostring(self, htsfile=None):
@@ -1110,13 +1116,14 @@ cdef class AlignedSegment:
          Parameters
          ----------
  
-        htsfile -- (deprecated) AlignmentFile object to map numerical
-                   identifiers to chromosome names. This parameter is present
-                   for backwards compatibility and ignored.
+        htsfile:
+            (deprecated) AlignmentFile object to map numerical
+            identifiers to chromosome names. This parameter is present
+            for backwards compatibility and ignored.
          """
  
          return self.to_string()
-    
+
      def to_dict(self):
          """returns a json representation of the aligned segment.
  
@@ -1133,8 +1140,9 @@ cdef class AlignedSegment:
  
          Parameters
          ----------
-        sam_dict -- dictionary of alignment values, keys corresponding to output from
-                    :meth:`todict()`.
+        sam_dict:
+            dictionary of alignment values, keys corresponding to output from
+            :meth:`todict()`.
  
          """
          # let htslib do the parsing
@@ -1143,7 +1151,7 @@ cdef class AlignedSegment:
              "\t".join((sam_dict[x] for x in KEY_NAMES[:-1])) +
              "\t" +
              "\t".join(sam_dict.get(KEY_NAMES[-1], [])), header)
-    
+
      ########################################################
      ## Basic attributes in order of appearance in SAM format
      property query_name:
@@ -1187,7 +1195,7 @@ cdef class AlignedSegment:
  
              src.core.l_extranul = l_extranul
              src.core.l_qname = l + l_extranul
-            
+
              # re-acquire pointer to location in memory
              # as it might have moved
              p = pysam_bam_get_qname(src)
@@ -1321,7 +1329,7 @@ cdef class AlignedSegment:
                  return self.header.get_reference_name(self._delegate.core.mtid)
              else:
                  raise ValueError("next_reference_name unknown if no header associated with record")
-            
+
          def __set__(self, reference):
              cdef int mtid
              if reference is None or reference == "*":
@@ -1434,7 +1442,7 @@ cdef class AlignedSegment:
                                                      nbytes_old,
                                                      nbytes_new,
                                                      p)
-            
+
              if retval == NULL:
                  raise MemoryError("could not allocate memory")
  
@@ -1564,7 +1572,7 @@ cdef class AlignedSegment:
              # setting the unmapped flag requires recalculation of
              # bin as alignment length is now implicitely 1
              update_bin(self._delegate)
-                
+
      property mate_is_unmapped:
          """true if the mate is unmapped"""
          def __get__(self):
@@ -1834,7 +1842,7 @@ cdef class AlignedSegment:
  
      def get_forward_sequence(self):
          """return the original read sequence.
-        
+
          Reads mapping to the reverse strand will be reverse
          complemented.
  
@@ -1849,7 +1857,7 @@ cdef class AlignedSegment:
  
      def get_forward_qualities(self):
          """return base qualities of the read sequence.
-        
+
          Reads mapping to the reverse strand will be reversed.
          """
          if self.is_reverse:
@@ -1857,7 +1865,7 @@ cdef class AlignedSegment:
          else:
              return self.query_qualities
  
-    
+
      def get_aligned_pairs(self, matches_only=False, with_seq=False):
          """a list of aligned read (query) and reference positions.
  
@@ -2075,15 +2083,11 @@ cdef class AlignedSegment:
  
          If no cigar string is present, empty arrays will be returned.
  
-        Parameters
-        ----------
-
-        Returns
-        -------
-
-        arrays : two arrays. The first contains the nucleotide counts within
-           each cigar operation, the second contains the number of blocks for
-           each cigar operation.
+        Returns:
+            arrays :
+                two arrays. The first contains the nucleotide counts within
+                each cigar operation, the second contains the number of blocks
+                for each cigar operation.
  
          """
  
@@ -2199,7 +2203,7 @@ cdef class AlignedSegment:
                  values = []
  
              cdef uint32_t ncigar = len(values)
-            
+
              cdef bam1_t * retval = pysam_bam_update(src,
                                                      pysam_get_n_cigar(src) * 4,
                                                      ncigar * 4,
@@ -2245,7 +2249,7 @@ cdef class AlignedSegment:
  
          This method accepts valid SAM specification value types, which
          are::
-        
+
             A: printable char
             i: signed int
             f: float
@@ -2260,12 +2264,12 @@ cdef class AlignedSegment:
  
          When deducing the type code by the python type of *value*, the
          following mapping is applied::
-        
+
              i: python int
              f: python float
              Z: python str or bytes
              B: python array.array, list or tuple
-            
+
          Note that a single character string will be output as 'Z' and
          not 'A' as the former is the more general type.
          """
@@ -2406,27 +2410,24 @@ cdef class AlignedSegment:
          specification) as well as additional value type 'd' as
          implemented in htslib.
  
-        Parameters
-        ----------
+        Parameters:
  
-        tag :
-            data tag.
+            tag :
+                data tag.
  
-        with_value_type : Optional[bool]
-            if set to True, the return value is a tuple of (tag value, type code).
-            (default False)
+            with_value_type : Optional[bool]
+                if set to True, the return value is a tuple of (tag value, type
+                code). (default False)
  
-        Returns
-        -------
+        Returns:
  
-        A python object with the value of the `tag`. The type of the
-        object depends on the data type in the data record.
+            A python object with the value of the `tag`. The type of the
+            object depends on the data type in the data record.
  
-        Raises
-        ------
+        Raises:
  
-        KeyError
-            If `tag` is not present, a KeyError is raised.
+            KeyError
+                If `tag` is not present, a KeyError is raised.
  
          """
          cdef uint8_t * v
@@ -2799,14 +2800,14 @@ cdef class PileupColumn:
          """set the minimum base quality for this pileup column.
          """
          self.min_base_quality = min_base_quality
-    
+
      def __len__(self):
          """return number of reads aligned to this column.
  
          see :meth:`get_num_aligned`
          """
          return self.get_num_aligned()
-    
+
      property reference_id:
          '''the reference sequence number as defined in the header'''
          def __get__(self):
@@ -2883,7 +2884,7 @@ cdef class PileupColumn:
  
      def get_num_aligned(self):
          """return number of aligned bases at pileup column position.
-        
+
          This method applies a base quality filter and the number is
          equal to the size of :meth:`get_query_sequences`,
          :meth:`get_mapping_qualities`, etc.
@@ -2895,7 +2896,7 @@ cdef class PileupColumn:
          cdef bam_pileup1_t * p = NULL
          if self.plp == NULL or self.plp[0] == NULL:
              raise ValueError("PileupColumn accessed after iterator finished")
-        
+
          for x from 0 <= x < self.n_pu:
              p = &(self.plp[0][x])
              if p == NULL:
@@ -2912,7 +2913,7 @@ cdef class PileupColumn:
  
          Optionally, the bases/sequences can be annotated according to the samtools
          mpileup format. This is the format description from the samtools mpileup tool::
-        
+
             Information on match, mismatch, indel, strand, mapping
             quality and start and end of a read are all encoded at the
             read base column. At this column, a dot stands for a match
@@ -2934,7 +2935,7 @@ cdef class PileupColumn:
  
          To reproduce samtools mpileup format, set all of mark_matches,
          mark_ends and add_indels to True.
-        
+
          Parameters
          ----------
  
@@ -2954,7 +2955,7 @@ cdef class PileupColumn:
            If True, add bases for bases inserted into the reference and
            'N's for base skipped from the reference. If a reference sequence
            is given, add the actual bases.
- 
+
          Returns
          -------
  
@@ -2968,7 +2969,7 @@ cdef class PileupColumn:
          cdef uint8_t rb = 0
          cdef kstring_t * buf = &self.buf
          cdef bam_pileup1_t * p = NULL
-        
+
          if self.plp == NULL or self.plp[0] == NULL:
              raise ValueError("PileupColumn accessed after iterator finished")
  
@@ -2987,7 +2988,7 @@ cdef class PileupColumn:
              # see samtools pileup_seq
              if mark_ends and p.is_head:
                  kputc('^', buf)
-                
+
                  if p.b.core.qual > 93:
                      kputc(126, buf)
                  else:
@@ -3059,7 +3060,7 @@ cdef class PileupColumn:
                  raise ValueError(
                      "pileup buffer out of sync - most likely use of iterator "
                      "outside loop")
-            
+
              if p.qpos < p.b.core.l_qseq:
                  c = bam_get_qual(p.b)[p.qpos]
              else:
@@ -3079,7 +3080,7 @@ cdef class PileupColumn:
          """
          if self.plp == NULL or self.plp[0] == NULL:
              raise ValueError("PileupColumn accessed after iterator finished")
-        
+
          cdef uint32_t x = 0
          cdef bam_pileup1_t * p = NULL
          result = []
@@ -3089,7 +3090,7 @@ cdef class PileupColumn:
                  raise ValueError(
                      "pileup buffer out of sync - most likely use of iterator "
                      "outside loop")
-            
+
              if pileup_base_qual_skip(p, self.min_base_quality):
                  continue
              result.append(p.b.core.qual)
@@ -3115,7 +3116,7 @@ cdef class PileupColumn:
                  raise ValueError(
                      "pileup buffer out of sync - most likely use of iterator "
                      "outside loop")
-            
+
              if pileup_base_qual_skip(p, self.min_base_quality):
                  continue
              result.append(p.qpos)
@@ -3131,7 +3132,7 @@ cdef class PileupColumn:
          """
          if self.plp == NULL or self.plp[0] == NULL:
              raise ValueError("PileupColumn accessed after iterator finished")
-        
+
          cdef uint32_t x = 0
          cdef bam_pileup1_t * p = NULL
          result = []
@@ -3141,12 +3142,12 @@ cdef class PileupColumn:
                  raise ValueError(
                      "pileup buffer out of sync - most likely use of iterator "
                      "outside loop")
-            
+
              if pileup_base_qual_skip(p, self.min_base_quality):
                  continue
              result.append(charptr_to_str(pysam_bam_get_qname(p.b)))
          return result
-            
+
  
  cdef class PileupRead:
      '''Representation of a read aligned to a particular position in the
@@ -3230,7 +3231,7 @@ cdef class PileupRead:
          def __get__(self):
              return self._is_refskip
  
-        
+
  
  cpdef enum CIGAR_OPS:
      CMATCH = 0
@@ -3246,30 +3247,30 @@ cpdef enum CIGAR_OPS:
  
  
  cpdef enum SAM_FLAGS:
-    # the read is paired in sequencing, no matter whether it is mapped in a pair 
+    # the read is paired in sequencing, no matter whether it is mapped in a pair
      FPAIRED = 1
-    # the read is mapped in a proper pair 
+    # the read is mapped in a proper pair
      FPROPER_PAIR = 2
-    # the read itself is unmapped; conflictive with FPROPER_PAIR 
+    # the read itself is unmapped; conflictive with FPROPER_PAIR
      FUNMAP = 4
-    # the mate is unmapped 
+    # the mate is unmapped
      FMUNMAP = 8
-    # the read is mapped to the reverse strand 
+    # the read is mapped to the reverse strand
      FREVERSE = 16
-    # the mate is mapped to the reverse strand 
+    # the mate is mapped to the reverse strand
      FMREVERSE = 32
-    # this is read1 
+    # this is read1
      FREAD1 = 64
-    # this is read2 
+    # this is read2
      FREAD2 = 128
-    # not primary alignment 
+    # not primary alignment
      FSECONDARY = 256
-    # QC failure 
+    # QC failure
      FQCFAIL = 512
-    # optical or PCR duplicate 
+    # optical or PCR duplicate
      FDUP = 1024
-    # supplementary alignment 
-    FSUPPLEMENTARY = 2048      
+    # supplementary alignment
+    FSUPPLEMENTARY = 2048
  
  
  __all__ = [
diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx

index d35b0db3ff295541390344239dae28fda4a3bf40..0c69a4fe2e61551e536b92dce83a826a2e578dff 100644 (file)
--- a/pysam/libcalignmentfile.pyx
+++ b/pysam/libcalignmentfile.pyx
@@ -265,7 +265,7 @@ cdef class AlignmentHeader(object):
                  if self.ptr.target_name[x] == NULL:
                      raise MemoryError("could not allocate {} bytes".format(len(name) + 1, sizeof(char)))
                  strncpy(self.ptr.target_name[x], name, len(name))
-        
+
          return self
  
      @classmethod
@@ -282,9 +282,9 @@ cdef class AlignmentHeader(object):
                      raise KeyError("incomplete sequence information in '%s'" % str(fields))
                  except ValueError:
                      raise ValueError("wrong sequence information in '%s'" % str(fields))
-                
+
          return cls._from_text_and_lengths(text, reference_names, reference_lengths)
-        
+
      @classmethod
      def from_dict(cls, header_dict):
  
@@ -380,12 +380,12 @@ cdef class AlignmentHeader(object):
  
      def _build_sequence_section(self):
          """return sequence section of header.
-    
+
          The sequence section is built from the list of reference names and
          lengths stored in the BAM-file and not from any @SQ entries that
          are part of the header's text section.
          """
-        
+
          cdef int x
          text = []
          for x in range(self.ptr.n_targets):
@@ -393,7 +393,7 @@ cdef class AlignmentHeader(object):
                  force_str(self.ptr.target_name[x]),
                  self.ptr.target_len[x]))
          return "".join(text)
-        
+
      def to_dict(self):
          """return two-level dictionary with header information from the file.
  
@@ -504,7 +504,7 @@ cdef class AlignmentHeader(object):
              raise KeyError("unknown reference {}".format(reference))
          else:
              return self.ptr.target_len[tid]
-    
+
      def is_valid_tid(self, int tid):
          """
          return True if the numerical :term:`tid` is valid; False otherwise.
@@ -522,7 +522,7 @@ cdef class AlignmentHeader(object):
          """
          reference = force_bytes(reference)
          return bam_name2id(self.ptr, reference)
-        
+
      def __str__(self):
          '''string with the full contents of the :term:`sam file` header as a
          string.
@@ -561,7 +561,7 @@ cdef class AlignmentHeader(object):
  
      def get(self, *args):
          return self.to_dict().get(*args)
-    
+
      def __len__(self):
          return self.to_dict().__len__()
  
@@ -887,7 +887,7 @@ cdef class AlignmentFile(HTSFile):
                  raise ValueError(
                      "either supply options `template`, `header`, `text` or  both `reference_names` "
                      "and `reference_lengths` for writing")
-            
+
              if template:
                  # header is copied, though at the moment not strictly
                  # necessary as AlignmentHeader is immutable.
@@ -978,7 +978,7 @@ cdef class AlignmentFile(HTSFile):
                              "SAM? file does not have a valid header (mode='%s'), "
                              "please provide reference_names and reference_lengths")
                      self.header = makeAlignmentHeader(hdr)
-                
+
              # set filename with reference sequences
              if self.is_cram and reference_filename:
                  creference_filename = self.reference_filename
@@ -1283,7 +1283,7 @@ cdef class AlignmentFile(HTSFile):
  
              ignore orphans (paired reads that are not in a proper pair).
              The default is to ignore orphans.
-   
+
          min_base_quality: int
  
             Minimum base quality. Bases below the minimum quality will
@@ -1323,7 +1323,7 @@ cdef class AlignmentFile(HTSFile):
          """
          cdef int rtid, has_coord
          cdef int32_t rstart, rstop
-        
+
          if not self.is_open:
              raise ValueError("I/O operation on closed file")
  
@@ -1534,7 +1534,7 @@ cdef class AlignmentFile(HTSFile):
              raise ValueError("interval of size 0")
          if _stop < _start:
              raise ValueError("interval of size less than 0")
-        
+
          cdef int length = _stop - _start
          cdef c_array.array int_array_template = array.array('L', [])
          cdef c_array.array count_a
@@ -1582,7 +1582,7 @@ cdef class AlignmentFile(HTSFile):
              # count
              seq = read.seq
              quality = read.query_qualities
-            
+
              for qpos, refpos in read.get_aligned_pairs(True):
                  if qpos is not None and refpos is not None and \
                     _start <= refpos < _stop:
@@ -1646,14 +1646,14 @@ cdef class AlignmentFile(HTSFile):
              base_position = r.pos
  
              for op, nt in r.cigartuples:
-                if op in match_or_deletion: 
+                if op in match_or_deletion:
                      base_position += nt
-                elif op == BAM_CREF_SKIP: 
+                elif op == BAM_CREF_SKIP:
                      junc_start = base_position
                      base_position += nt
                      res[(junc_start, base_position)] += 1
          return res
- 
+
  
      def close(self):
          '''closes the :class:`pysam.AlignmentFile`.'''
@@ -1705,16 +1705,14 @@ cdef class AlignmentFile(HTSFile):
          '''
          write a single :class:`pysam.AlignedSegment` to disk.
  
-        Raises
-        ------
-        ValueError
-            if the writing failed
-
-        Returns
-        -------
+        Raises:
+            ValueError
+                if the writing failed
  
-        int : the number of bytes written. If the file is closed,
-              this will be 0.
+        Returns:
+            int :
+                the number of bytes written. If the file is closed,
+                this will be 0.
          '''
          if not self.is_open:
              return 0
@@ -1724,7 +1722,7 @@ cdef class AlignmentFile(HTSFile):
                  "AlignedSegment refers to reference number {} that "
                  "is larger than the number of references ({}) in the header".format(
                      read._delegate.core.tid, self.header.ptr.n_targets))
-        
+
          cdef int ret
          with nogil:
              ret = sam_write1(self.htsfile,
@@ -1800,12 +1798,12 @@ cdef class AlignmentFile(HTSFile):
          """return statistics about mapped/unmapped reads per chromosome as
          they are stored in the index.
  
-        Returns
-        -------
-        list : a list of records for each chromosome. Each record has the attributes 'contig',
-               'mapped', 'unmapped' and 'total'.
+        Returns:
+            list :
+                a list of records for each chromosome. Each record has the
+                attributes 'contig', 'mapped', 'unmapped' and 'total'.
          """
-        
+
          self.check_index()
          cdef int tid
          cdef uint64_t mapped, unmapped
@@ -1820,7 +1818,7 @@ cdef class AlignmentFile(HTSFile):
                      mapped,
                      unmapped,
                      mapped + unmapped)))
-                
+
          return results
  
      ###############################################################
@@ -1899,7 +1897,7 @@ cdef class AlignmentFile(HTSFile):
          if self.header is None:
              raise ValueError("header not available in closed files")
          return self.header.get_reference_length(reference)
-    
+
      property nreferences:
          """int with the number of :term:`reference` sequences in the file.
          This is a read-only attribute."""
@@ -1978,7 +1976,7 @@ cdef class IteratorRow:
          cdef char *cfilename
          cdef char *creference_filename
          cdef char *cindexname = NULL
-        
+
          if not samfile.is_open:
              raise ValueError("I/O operation on closed file")
  
@@ -1989,7 +1987,7 @@ cdef class IteratorRow:
          # reopen the file - note that this makes the iterator
          # slow and causes pileup to slow down significantly.
          if multiple_iterators:
-            
+
              cfilename = samfile.filename
              with nogil:
                  self.htsfile = hts_open(cfilename, 'r')
@@ -2002,7 +2000,7 @@ cdef class IteratorRow:
                      self.index = sam_index_load2(self.htsfile, cfilename, cindexname)
              else:
                  self.index = NULL
-                
+
              # need to advance in newly opened file to position after header
              # better: use seek/tell?
              with nogil:
@@ -2012,7 +2010,7 @@ cdef class IteratorRow:
              self.header = makeAlignmentHeader(hdr)
  
              self.owns_samfile = True
-            
+
              # options specific to CRAM files
              if samfile.is_cram and samfile.reference_filename:
                  creference_filename = samfile.reference_filename
@@ -2094,7 +2092,7 @@ cdef class IteratorRowRegion(IteratorRow):
              raise IOError('truncated file')
          else:
              raise IOError("error while reading file {}: {}".format(self.samfile.filename, self.retval))
-        
+
      def __dealloc__(self):
          hts_itr_destroy(self.iter)
  
@@ -2326,7 +2324,7 @@ cdef int __advance_nofilter(void *data, bam1_t *b):
  
  
  cdef int __advance_all(void *data, bam1_t *b):
-    '''only use reads for pileup passing basic filters such as 
+    '''only use reads for pileup passing basic filters such as
  
      BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, BAM_FDUP
      '''
@@ -2362,7 +2360,7 @@ cdef int __advance_samtools(void * data, bam1_t * b):
              continue
          if d.flag_require and not (b.core.flag & d.flag_require):
              continue
-        
+
          # reload sequence
          if d.fastafile != NULL and b.core.tid != d.tid:
              if d.seq != NULL:
@@ -2388,21 +2386,21 @@ cdef int __advance_samtools(void * data, bam1_t * b):
                  sam_prob_realn(b, d.seq, d.seq_len, 7)
              else:
                  sam_prob_realn(b, d.seq, d.seq_len, 3)
-                
+
          if d.seq != NULL and d.adjust_capq_threshold > 10:
              q = sam_cap_mapq(b, d.seq, d.seq_len, d.adjust_capq_threshold)
              if q < 0:
                  continue
              elif b.core.qual > q:
                  b.core.qual = q
-                
+
          if b.core.qual < d.min_mapping_quality:
              continue
          if d.ignore_orphans and b.core.flag & BAM_FPAIRED and not (b.core.flag & BAM_FPROPER_PAIR):
              continue
-        
+
          break
-        
+
      return ret
  
  
@@ -2453,7 +2451,7 @@ cdef class IteratorColumn:
          self.iterdata.compute_baq = kwargs.get("compute_baq", True)
          self.iterdata.redo_baq = kwargs.get("redo_baq", False)
          self.iterdata.ignore_orphans = kwargs.get("ignore_orphans", True)
-        
+
          self.tid = 0
          self.pos = 0
          self.n_plp = 0
@@ -2497,7 +2495,7 @@ cdef class IteratorColumn:
          '''
          return true if iterator is associated with a reference'''
          return self.fastafile
-            
+
      cdef _setup_iterator(self,
                           int tid,
                           int start,
@@ -2523,7 +2521,7 @@ cdef class IteratorColumn:
  
          cdef void * data[1]
          data[0] = <void*>&self.iterdata
-        
+
          if self.stepper is None or self.stepper == "all":
              with nogil:
                  self.pileup_iter = bam_mplp_init(1,
@@ -2550,7 +2548,7 @@ cdef class IteratorColumn:
          if self.ignore_overlaps:
              with nogil:
                  bam_mplp_init_overlaps(self.pileup_iter)
-                
+
      cdef reset(self, tid, start, stop):
          '''reset iterator position.
  
@@ -2572,7 +2570,7 @@ cdef class IteratorColumn:
          #                                  &self.iterdata)
          with nogil:
              bam_mplp_reset(self.pileup_iter)
-        
+
      cdef _free_pileup_iter(self):
          '''free the memory alloc'd by bam_plp_init.
  
@@ -2593,9 +2591,9 @@ cdef class IteratorColumn:
          if self.iterdata.seq != NULL:
              free(self.iterdata.seq)
              self.iterdata.seq = NULL
-        
+
      # backwards compatibility
-    
+
      def hasReference(self):
          return self.has_reference()
      cdef char * getSequence(self):
@@ -2603,7 +2601,7 @@ cdef class IteratorColumn:
      def addReference(self, FastaFile fastafile):
          return self.add_reference(fastafile)
  
-            
+
  cdef class IteratorColumnRegion(IteratorColumn):
      '''iterates over a region only.
      '''
@@ -2630,7 +2628,7 @@ cdef class IteratorColumnRegion(IteratorColumn):
      def __next__(self):
  
          cdef int n
-        
+
          while 1:
              n = self.cnext()
              if n < 0:
@@ -2823,7 +2821,7 @@ cdef class IndexedReads:
  
          cdef uint64_t pos
          cdef bam_hdr_t * hdr = self.header.ptr
-        
+
          while ret > 0:
              with nogil:
                  pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
diff --git a/pysam/libcbcf.pyx b/pysam/libcbcf.pyx

index e40a8013035a39c33bda78933079981c126de0dc..c0cee5c8076b040bd8af889b330c825ad6e2966e 100644 (file)
--- a/pysam/libcbcf.pyx
+++ b/pysam/libcbcf.pyx
@@ -418,7 +418,7 @@ cdef bcf_copy_expand_array(void *src_data, int src_type, size_t src_values,
      """copy data from src to dest where the size of the elements (src_type/dst_type) differ
      as well as the number of elements (src_values/dst_values).
      """
-    
+
      cdef char    *src_datac
      cdef char    *dst_datac
      cdef int8_t  *src_datai8
@@ -817,7 +817,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
      if key == 'phased':
          sample.phased = bool(value)
          return
-    
+
      cdef bcf_hdr_t *hdr = sample.record.header.ptr
      cdef bcf1_t *r = sample.record.ptr
      cdef int fmt_id
@@ -863,7 +863,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value):
                                &value_count, &scalar, &realloc)
      vlen = value_count < 0
      value_count = len(values)
-    
+
      # If we can, write updated values to existing allocated storage.
      if fmt and not realloc:
          r.d.indiv_dirty = 1
@@ -2124,6 +2124,7 @@ cdef class VariantHeader(object):
          if self.ptr.dirty:
              bcf_hdr_sync(self.ptr)
  
+
      def add_meta(self, key, value=None, items=None):
          """Add metadata to this header"""
          if not ((value is not None) ^ (items is not None)):
@@ -2140,11 +2141,16 @@ cdef class VariantHeader(object):
                  hrec.value = strdup(force_bytes(value))
              else:
                  for key, value in items:
+                
+                    quoted = True
+                    if key in set(("ID", "Number", "Type")):
+                      quoted = False
+
                      key = force_bytes(key)
                      bcf_hrec_add_key(hrec, key, <int>len(key))
  
                      value = force_bytes(str(value))
-                    quoted = strpbrk(value, ' ;,"\t<>') != NULL
+
                      bcf_hrec_set_val(hrec, hrec.nkeys-1, value, <int>len(value), quoted)
          except:
              bcf_hrec_destroy(hrec)
@@ -3134,6 +3140,8 @@ cdef class VariantRecord(object):
              raise ValueError('Error unpacking VariantRecord')
          # causes a memory leak https://github.com/pysam-developers/pysam/issues/773
          # return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
+        if (r.d.m_id == 0):
+            raise ValueError('Error extracing ID')
          return charptr_to_str(r.d.id) if r.d.id != b'.' else None
  
      @id.setter
@@ -4080,6 +4088,7 @@ cdef class VariantFile(HTSFile):
  
      def __next__(self):
          cdef int ret
+        cdef int errcode
          cdef bcf1_t *record = bcf_init1()
  
          if not record:
@@ -4093,7 +4102,10 @@ cdef class VariantFile(HTSFile):
              ret = bcf_read1(self.htsfile, self.header.ptr, record)
  
          if ret < 0:
+            errcode = record.errcode
              bcf_destroy1(record)
+            if errcode:
+                raise IOError('unable to parse next record')
              if ret == -1:
                  raise StopIteration
              elif ret == -2:
@@ -4445,3 +4457,4 @@ cdef class VariantFile(HTSFile):
          # potentially unnecessary optimization that also sets max_unpack
          if not include_samples:
              self.drop_samples = True
+
diff --git a/pysam/libcfaidx.pyx b/pysam/libcfaidx.pyx

index 40d8430fbdab48bb75d59833029b103f01482924..f9b8f6e619886e5405ced27b45c847aec7373e6f 100644 (file)
--- a/pysam/libcfaidx.pyx
+++ b/pysam/libcfaidx.pyx
@@ -646,8 +646,14 @@ cdef class FastxFile:
              if self.persist:
                  return FastxRecord(proxy=makeFastqProxy(self.entry))
              return makeFastqProxy(self.entry)
-        else:
+        elif (l == -1):
              raise StopIteration
+        elif (l == -2):
+            raise ValueError('truncated quality string in {0}'
+                             .format(self._filename))
+        else:
+            raise ValueError('unknown problem parsing {0}'
+                             .format(self._filename))
  
  # Compatibility Layer for pysam 0.8.1
  cdef class FastqFile(FastxFile):
diff --git a/pysam/libchtslib.pyx b/pysam/libchtslib.pyx

index c03c7cfab6d63302cdb8651071083b7a7c0504be..b4dcaa8b7e4f5b1213de4e00f076bbaeba91eac9 100644 (file)
--- a/pysam/libchtslib.pyx
+++ b/pysam/libchtslib.pyx
@@ -617,9 +617,10 @@ cdef class HTSFile(object):
          Returns
          -------
  
-        tuple : a tuple of `flag`, :term:`tid`, `start` and
-        `stop`. The flag indicates whether no coordinates were
-        supplied and the genomic region is the complete genomic space.
+        tuple :
+            a tuple of `flag`, :term:`tid`, `start` and
+            `stop`. The flag indicates whether no coordinates were
+            supplied and the genomic region is the complete genomic space.
  
          Raises
          ------
diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx

index ab8e9a63f6a17bec2fb8c36fe052faad23cdbe16..fe61bb8f9db2a534a16369b7a2e48fe6b8a4d0b0 100644 (file)
--- a/pysam/libcutils.pyx
+++ b/pysam/libcutils.pyx
@@ -284,7 +284,7 @@ def _pysam_dispatch(collection,
              if skip_next:
                  skip_next = False
                  continue
-            if arg in SIMPLE_FLAGS or (len(arg) > 2 and arg.startswith('-@')):
+            if arg in SIMPLE_FLAGS or (len(arg) > 2 and force_bytes(arg).startswith(b'-@')):
                  continue
              if arg in ARGUMENTS:
                  skip_next = True
diff --git a/pysam/samtools.py b/pysam/samtools.py

index f81fe8f4452567fb2a6abb90403dcd6bb21fcac4..da3044dd68a21a72ee0fc445487c309f35adb40e 100644 (file)
--- a/pysam/samtools.py
+++ b/pysam/samtools.py
@@ -1,4 +1,4 @@
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
  
  # samtools command line options to export in python
  #
diff --git a/pysam/version.py b/pysam/version.py

index 49afa1770f0a0e20dfc159093304a4fa50f028df..da5883424e7433bc547278a7c6740e46b282b0ad 100644 (file)
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,5 +1,5 @@
  # pysam versioning information
-__version__ = "0.15.3"
+__version__ = "0.15.4"
  
  # TODO: upgrade number
  __samtools_version__ = "1.9"
diff --git a/requirements.txt b/requirements.txt

index 6e8fc4445a0c7d5fcfd66e128283ec0d41f39011..f937d1ca07aee8843f5feb73dbdcba815abb17d2 100644 (file)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-cython>=0.24.1
+cython>=0.29.12
diff --git a/setup.py b/setup.py

index 4c97e8746431fa587a03817f7aa99feb52aa9928..970e4fcdee2ca7fe6ec69c0760c7dc825372c4e7 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@ import subprocess
  import sys
  import sysconfig
  from contextlib import contextmanager
-from setuptools import Extension, setup
+from setuptools import setup
  from cy_build import CyExtension as Extension, cy_build_ext as build_ext
  try:
      import cython
@@ -443,7 +443,7 @@ metadata = {
      'classifiers': [_f for _f in classifiers.split("\n") if _f],
      'url': "https://github.com/pysam-developers/pysam",
      'packages': package_list,
-    'requires': ['cython (>=0.21)'],
+    'requires': ['cython (>=0.29.12)'],
      'ext_modules': [Extension(**opts) for opts in modules],
      'cmdclass': cmdclass,
      'package_dir': package_dirs,
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py

index 48589e6d1e56a86cb58a32aecad00fb8f1fd7b12..3c5dda59567559d0ed7fa34a8733980dd806b2b8 100644 (file)
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -78,6 +78,12 @@ class TestAlignedSegment(ReadTest):
      def testCompare(self):
          '''check comparison functions.'''
          a = self.build_read()
+        b = None
+
+        self.assertFalse(a is b)
+        self.assertFalse(a == b)
+        self.assertEqual(-1, a.compare(b))
+
          b = self.build_read()
  
          self.assertEqual(0, a.compare(b))
author	Michael R. Crusoe <michael.crusoe@gmail.com>
	Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)
committer	Michael R. Crusoe <michael.crusoe@gmail.com>
	Thu, 23 Jan 2020 12:46:18 +0000 (13:46 +0100)
.travis.yml		patch \| blob \| history
doc/release.rst		patch \| blob \| history
pysam/bcftools.py		patch \| blob \| history
pysam/libcalignedsegment.pyx		patch \| blob \| history
pysam/libcalignmentfile.pyx		patch \| blob \| history
pysam/libcbcf.pyx		patch \| blob \| history
pysam/libcfaidx.pyx		patch \| blob \| history
pysam/libchtslib.pyx		patch \| blob \| history
pysam/libcutils.pyx		patch \| blob \| history
pysam/samtools.py		patch \| blob \| history
pysam/version.py		patch \| blob \| history
requirements.txt		patch \| blob \| history
setup.py		patch \| blob \| history
tests/AlignedSegment_test.py		patch \| blob \| history