From: Michael R. Crusoe Date: Thu, 23 Jan 2020 12:46:18 +0000 (+0100) Subject: New upstream version 0.15.4+ds X-Git-Tag: archive/raspbian/0.22.0+ds-1+rpi1~1^2^2~12^2~6 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=f4cfe1fb151bb1d64dda10ff78b974e968826c5e;p=python-pysam.git New upstream version 0.15.4+ds --- diff --git a/.travis.yml b/.travis.yml index b30d9b8..30c7c97 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,11 @@ os: language: c +stages: + - test + - name: deploy + if: tag IS present + env: matrix: - CONDA_PY=2.7 @@ -12,12 +17,26 @@ env: global: - PYSAM_LINKING_TEST=1 - TWINE_USERNAME=grepall - - secure: 'OcwwP8/o21+SGW0UVAnnCQwllhGSCq2HJzpI9EhX3kh6J9RTkyx/+drkg45bx1Z5u8zymuAFappEYzlpzqZE886XezkjOYGVa/u+Coqr1oT/BEJHFCkCA4o26yESp7Zy8aNj/juhB7Rfa77pIDXBayqTzbALz/AURMtZapasB18=' + - secure: bTbky3Un19NAl62lix8bMLmBv9IGNhFkRXlZH+B253nYub7jwQwPQKum3ct9ea+XHJT5//uM0B8WAF6eyugpNkPQ7+S7SEH5BJuCt30nv6qvGhSO2AffZKeHEDnfW2kqGrivn87TqeomlSBlO742CD/V0wOIUwkTT9tutd+E7FU= -_deploy_common: &deploy_common - if: tag IS present +_cibw_common: &cibw_common + addons: {} install: - - python3 -m pip install cibuildwheel twine + - python3 -m pip install cibuildwheel>=1.1.0 twine + script: + - set -e + - cibuildwheel --output-dir dist + - twine check dist/* + - twine upload --skip-existing dist/* + +_cibw_linux: &cibw_linux + stage: deploy + os: linux + language: python + python: '3.5' + services: + - docker + <<: *cibw_common matrix: include: @@ -25,11 +44,6 @@ matrix: os: linux language: python python: '3.5' - services: - - docker - env: - - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && pip install -r requirements.txt" - - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"' addons: apt: packages: @@ -37,28 +51,36 @@ matrix: - g++ - libcurl4-openssl-dev # for libcurl support in sdist - libssl-dev # for s3 support in sdist - <<: *deploy_common + install: + - python3 -m pip install Cython twine script: - set -e - - cibuildwheel --output-dir dist - - python3 -m pip install Cython - python3 setup.py build_ext --inplace - python3 setup.py sdist - twine check dist/* - twine upload --skip-existing dist/* + - <<: *cibw_linux + env: + - CIBW_BUILD="*_x86_64" + - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt" + - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"' + - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}' + - CIBW_TEST_COMMAND='python -c "import pysam"' + - <<: *cibw_linux + env: + - CIBW_BUILD="*_i686" + - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt" + - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"' + - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}' + - CIBW_TEST_COMMAND='python -c "import pysam"' - stage: deploy os: osx language: generic env: - - CIBW_BEFORE_BUILD="pip install -r requirements.txt" + - CIBW_BEFORE_BUILD="python -m pip install -r requirements.txt" - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"' - addons: {} - <<: *deploy_common - script: - - set -e - - cibuildwheel --output-dir dist - - twine check dist/* - - twine upload --skip-existing dist/* + - CIBW_TEST_COMMAND='python -c "import pysam"' + <<: *cibw_common addons: apt: diff --git a/doc/release.rst b/doc/release.rst index ee1875b..07d099d 100644 --- a/doc/release.rst +++ b/doc/release.rst @@ -2,6 +2,26 @@ Release notes ============= +Release 0.15.4 +============== + +Bugfix release. Principal reason for release is to update cython +version in order to fix pip install pysam with python 3.8. + +* [#879] Fix add_meta function in libcbcf.pyx, so meta-information + lines in header added with this function have double-quoting rules + in accordance to rules specified in VCF4.2 and VCF4.3 specifications +* [#863] Force arg to bytes to support non-ASCII encoding +* [#875] Bump minimum Cython version +* [#868] Prevent segfault on Python 2.7 AlignedSegment.compare(other=None) +* [#867] Fix wheel building on TravisCI +* [#863] Force arg to bytes to support non-ASCII encoding +* [#799] disambiguate interpretation of bcf_read return code +* [#841] Fix silent truncation of FASTQ with bad q strings +* [#846] Prevent segmentation fault on ID, when handling malformed records +* [#829] Run configure with the correct CC/CFLAGS/LDFLAGS env vars + + Release 0.15.3 ============== diff --git a/pysam/bcftools.py b/pysam/bcftools.py index ab891d4..9f4ce0e 100644 --- a/pysam/bcftools.py +++ b/pysam/bcftools.py @@ -1,4 +1,4 @@ -from utils import PysamDispatcher +from pysam.utils import PysamDispatcher BCFTOOLS_DISPATCH = [ "index", diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx index 02ef5b6..9e298f1 100644 --- a/pysam/libcalignedsegment.pyx +++ b/pysam/libcalignedsegment.pyx @@ -133,7 +133,7 @@ cdef inline uint8_t strand_mark_char(uint8_t ch, bam1_t *b): else: return toupper(ch) - + cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold): cdef uint32_t c if p.qpos < p.b.core.l_qseq: @@ -143,7 +143,7 @@ cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold): if c < threshold: return True return False - + cdef inline char map_typecode_htslib_to_python(uint8_t s): """map an htslib typecode to the corresponding python typecode @@ -223,7 +223,7 @@ cdef inline uint8_t get_tag_typecode(value, value_type=None): """ # 0 is unknown typecode cdef char typecode = 0 - + if value_type is None: if isinstance(value, int): if value < 0: @@ -341,7 +341,7 @@ cdef inline pack_tags(tags): """ fmts, args = ["<"], [] - # htslib typecode + # htslib typecode cdef uint8_t typecode for tag in tags: @@ -394,7 +394,7 @@ cdef inline pack_tags(tags): if typecode not in DATATYPE2FORMAT: raise ValueError("invalid value type '{}' ({})".format(chr(typecode), array.typecode)) - + # use array.tostring() to retrieve byte representation and # save as bytes datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1]) @@ -416,7 +416,7 @@ cdef inline pack_tags(tags): typecode = get_tag_typecode(value) if typecode == 0: raise ValueError("could not deduce typecode for value {}".format(value)) - + if typecode == 'a' or typecode == 'A' or typecode == 'Z' or typecode == 'H': value = force_bytes(value) @@ -427,7 +427,7 @@ cdef inline pack_tags(tags): datafmt = "2sB%is" % (len(value)+1) else: datafmt = "2sB%s" % DATATYPE2FORMAT[typecode][0] - + args.extend([pytag[:2], typecode, value]) @@ -924,9 +924,10 @@ cdef class AlignedSegment: Parameters ---------- - header -- :class:`~pysam.AlignmentHeader` object to map numerical - identifiers to chromosome names. If not given, an empty - header is created. + header: + :class:`~pysam.AlignmentHeader` object to map numerical + identifiers to chromosome names. If not given, an empty + header is created. ''' # Now only called when instances are created from Python @@ -957,9 +958,9 @@ cdef class AlignedSegment: self.cache_query_alignment_qualities = None self.cache_query_sequence = None self.cache_query_alignment_sequence = None - + self.header = header - + def __dealloc__(self): bam_destroy1(self._delegate) @@ -999,6 +1000,10 @@ cdef class AlignedSegment: <,=,> to *other* ''' + # avoid segfault when other equals None + if other is None: + return -1 + cdef int retval, x cdef bam1_t *t cdef bam1_t *o @@ -1021,7 +1026,7 @@ cdef class AlignedSegment: cdef uint8_t *a = &t.core cdef uint8_t *b = &o.core - + retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t)) if retval: return retval @@ -1072,7 +1077,7 @@ cdef class AlignedSegment: raise ValueError('sam_format failed') else: raise NotImplementedError("todo") - + ret = force_str(line.s[:line.l]) if line.m: @@ -1088,7 +1093,8 @@ cdef class AlignedSegment: Parameters ---------- - sam -- :term:`SAM` formatted string + sam: + :term:`SAM` formatted string """ cdef AlignedSegment dest = cls.__new__(cls) @@ -1101,7 +1107,7 @@ cdef class AlignedSegment: line.s = _sam sam_parse1(&line, dest.header.ptr, dest._delegate) - + return dest cpdef tostring(self, htsfile=None): @@ -1110,13 +1116,14 @@ cdef class AlignedSegment: Parameters ---------- - htsfile -- (deprecated) AlignmentFile object to map numerical - identifiers to chromosome names. This parameter is present - for backwards compatibility and ignored. + htsfile: + (deprecated) AlignmentFile object to map numerical + identifiers to chromosome names. This parameter is present + for backwards compatibility and ignored. """ return self.to_string() - + def to_dict(self): """returns a json representation of the aligned segment. @@ -1133,8 +1140,9 @@ cdef class AlignedSegment: Parameters ---------- - sam_dict -- dictionary of alignment values, keys corresponding to output from - :meth:`todict()`. + sam_dict: + dictionary of alignment values, keys corresponding to output from + :meth:`todict()`. """ # let htslib do the parsing @@ -1143,7 +1151,7 @@ cdef class AlignedSegment: "\t".join((sam_dict[x] for x in KEY_NAMES[:-1])) + "\t" + "\t".join(sam_dict.get(KEY_NAMES[-1], [])), header) - + ######################################################## ## Basic attributes in order of appearance in SAM format property query_name: @@ -1187,7 +1195,7 @@ cdef class AlignedSegment: src.core.l_extranul = l_extranul src.core.l_qname = l + l_extranul - + # re-acquire pointer to location in memory # as it might have moved p = pysam_bam_get_qname(src) @@ -1321,7 +1329,7 @@ cdef class AlignedSegment: return self.header.get_reference_name(self._delegate.core.mtid) else: raise ValueError("next_reference_name unknown if no header associated with record") - + def __set__(self, reference): cdef int mtid if reference is None or reference == "*": @@ -1434,7 +1442,7 @@ cdef class AlignedSegment: nbytes_old, nbytes_new, p) - + if retval == NULL: raise MemoryError("could not allocate memory") @@ -1564,7 +1572,7 @@ cdef class AlignedSegment: # setting the unmapped flag requires recalculation of # bin as alignment length is now implicitely 1 update_bin(self._delegate) - + property mate_is_unmapped: """true if the mate is unmapped""" def __get__(self): @@ -1834,7 +1842,7 @@ cdef class AlignedSegment: def get_forward_sequence(self): """return the original read sequence. - + Reads mapping to the reverse strand will be reverse complemented. @@ -1849,7 +1857,7 @@ cdef class AlignedSegment: def get_forward_qualities(self): """return base qualities of the read sequence. - + Reads mapping to the reverse strand will be reversed. """ if self.is_reverse: @@ -1857,7 +1865,7 @@ cdef class AlignedSegment: else: return self.query_qualities - + def get_aligned_pairs(self, matches_only=False, with_seq=False): """a list of aligned read (query) and reference positions. @@ -2075,15 +2083,11 @@ cdef class AlignedSegment: If no cigar string is present, empty arrays will be returned. - Parameters - ---------- - - Returns - ------- - - arrays : two arrays. The first contains the nucleotide counts within - each cigar operation, the second contains the number of blocks for - each cigar operation. + Returns: + arrays : + two arrays. The first contains the nucleotide counts within + each cigar operation, the second contains the number of blocks + for each cigar operation. """ @@ -2199,7 +2203,7 @@ cdef class AlignedSegment: values = [] cdef uint32_t ncigar = len(values) - + cdef bam1_t * retval = pysam_bam_update(src, pysam_get_n_cigar(src) * 4, ncigar * 4, @@ -2245,7 +2249,7 @@ cdef class AlignedSegment: This method accepts valid SAM specification value types, which are:: - + A: printable char i: signed int f: float @@ -2260,12 +2264,12 @@ cdef class AlignedSegment: When deducing the type code by the python type of *value*, the following mapping is applied:: - + i: python int f: python float Z: python str or bytes B: python array.array, list or tuple - + Note that a single character string will be output as 'Z' and not 'A' as the former is the more general type. """ @@ -2406,27 +2410,24 @@ cdef class AlignedSegment: specification) as well as additional value type 'd' as implemented in htslib. - Parameters - ---------- + Parameters: - tag : - data tag. + tag : + data tag. - with_value_type : Optional[bool] - if set to True, the return value is a tuple of (tag value, type code). - (default False) + with_value_type : Optional[bool] + if set to True, the return value is a tuple of (tag value, type + code). (default False) - Returns - ------- + Returns: - A python object with the value of the `tag`. The type of the - object depends on the data type in the data record. + A python object with the value of the `tag`. The type of the + object depends on the data type in the data record. - Raises - ------ + Raises: - KeyError - If `tag` is not present, a KeyError is raised. + KeyError + If `tag` is not present, a KeyError is raised. """ cdef uint8_t * v @@ -2799,14 +2800,14 @@ cdef class PileupColumn: """set the minimum base quality for this pileup column. """ self.min_base_quality = min_base_quality - + def __len__(self): """return number of reads aligned to this column. see :meth:`get_num_aligned` """ return self.get_num_aligned() - + property reference_id: '''the reference sequence number as defined in the header''' def __get__(self): @@ -2883,7 +2884,7 @@ cdef class PileupColumn: def get_num_aligned(self): """return number of aligned bases at pileup column position. - + This method applies a base quality filter and the number is equal to the size of :meth:`get_query_sequences`, :meth:`get_mapping_qualities`, etc. @@ -2895,7 +2896,7 @@ cdef class PileupColumn: cdef bam_pileup1_t * p = NULL if self.plp == NULL or self.plp[0] == NULL: raise ValueError("PileupColumn accessed after iterator finished") - + for x from 0 <= x < self.n_pu: p = &(self.plp[0][x]) if p == NULL: @@ -2912,7 +2913,7 @@ cdef class PileupColumn: Optionally, the bases/sequences can be annotated according to the samtools mpileup format. This is the format description from the samtools mpileup tool:: - + Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all encoded at the read base column. At this column, a dot stands for a match @@ -2934,7 +2935,7 @@ cdef class PileupColumn: To reproduce samtools mpileup format, set all of mark_matches, mark_ends and add_indels to True. - + Parameters ---------- @@ -2954,7 +2955,7 @@ cdef class PileupColumn: If True, add bases for bases inserted into the reference and 'N's for base skipped from the reference. If a reference sequence is given, add the actual bases. - + Returns ------- @@ -2968,7 +2969,7 @@ cdef class PileupColumn: cdef uint8_t rb = 0 cdef kstring_t * buf = &self.buf cdef bam_pileup1_t * p = NULL - + if self.plp == NULL or self.plp[0] == NULL: raise ValueError("PileupColumn accessed after iterator finished") @@ -2987,7 +2988,7 @@ cdef class PileupColumn: # see samtools pileup_seq if mark_ends and p.is_head: kputc('^', buf) - + if p.b.core.qual > 93: kputc(126, buf) else: @@ -3059,7 +3060,7 @@ cdef class PileupColumn: raise ValueError( "pileup buffer out of sync - most likely use of iterator " "outside loop") - + if p.qpos < p.b.core.l_qseq: c = bam_get_qual(p.b)[p.qpos] else: @@ -3079,7 +3080,7 @@ cdef class PileupColumn: """ if self.plp == NULL or self.plp[0] == NULL: raise ValueError("PileupColumn accessed after iterator finished") - + cdef uint32_t x = 0 cdef bam_pileup1_t * p = NULL result = [] @@ -3089,7 +3090,7 @@ cdef class PileupColumn: raise ValueError( "pileup buffer out of sync - most likely use of iterator " "outside loop") - + if pileup_base_qual_skip(p, self.min_base_quality): continue result.append(p.b.core.qual) @@ -3115,7 +3116,7 @@ cdef class PileupColumn: raise ValueError( "pileup buffer out of sync - most likely use of iterator " "outside loop") - + if pileup_base_qual_skip(p, self.min_base_quality): continue result.append(p.qpos) @@ -3131,7 +3132,7 @@ cdef class PileupColumn: """ if self.plp == NULL or self.plp[0] == NULL: raise ValueError("PileupColumn accessed after iterator finished") - + cdef uint32_t x = 0 cdef bam_pileup1_t * p = NULL result = [] @@ -3141,12 +3142,12 @@ cdef class PileupColumn: raise ValueError( "pileup buffer out of sync - most likely use of iterator " "outside loop") - + if pileup_base_qual_skip(p, self.min_base_quality): continue result.append(charptr_to_str(pysam_bam_get_qname(p.b))) return result - + cdef class PileupRead: '''Representation of a read aligned to a particular position in the @@ -3230,7 +3231,7 @@ cdef class PileupRead: def __get__(self): return self._is_refskip - + cpdef enum CIGAR_OPS: CMATCH = 0 @@ -3246,30 +3247,30 @@ cpdef enum CIGAR_OPS: cpdef enum SAM_FLAGS: - # the read is paired in sequencing, no matter whether it is mapped in a pair + # the read is paired in sequencing, no matter whether it is mapped in a pair FPAIRED = 1 - # the read is mapped in a proper pair + # the read is mapped in a proper pair FPROPER_PAIR = 2 - # the read itself is unmapped; conflictive with FPROPER_PAIR + # the read itself is unmapped; conflictive with FPROPER_PAIR FUNMAP = 4 - # the mate is unmapped + # the mate is unmapped FMUNMAP = 8 - # the read is mapped to the reverse strand + # the read is mapped to the reverse strand FREVERSE = 16 - # the mate is mapped to the reverse strand + # the mate is mapped to the reverse strand FMREVERSE = 32 - # this is read1 + # this is read1 FREAD1 = 64 - # this is read2 + # this is read2 FREAD2 = 128 - # not primary alignment + # not primary alignment FSECONDARY = 256 - # QC failure + # QC failure FQCFAIL = 512 - # optical or PCR duplicate + # optical or PCR duplicate FDUP = 1024 - # supplementary alignment - FSUPPLEMENTARY = 2048 + # supplementary alignment + FSUPPLEMENTARY = 2048 __all__ = [ diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx index d35b0db..0c69a4f 100644 --- a/pysam/libcalignmentfile.pyx +++ b/pysam/libcalignmentfile.pyx @@ -265,7 +265,7 @@ cdef class AlignmentHeader(object): if self.ptr.target_name[x] == NULL: raise MemoryError("could not allocate {} bytes".format(len(name) + 1, sizeof(char))) strncpy(self.ptr.target_name[x], name, len(name)) - + return self @classmethod @@ -282,9 +282,9 @@ cdef class AlignmentHeader(object): raise KeyError("incomplete sequence information in '%s'" % str(fields)) except ValueError: raise ValueError("wrong sequence information in '%s'" % str(fields)) - + return cls._from_text_and_lengths(text, reference_names, reference_lengths) - + @classmethod def from_dict(cls, header_dict): @@ -380,12 +380,12 @@ cdef class AlignmentHeader(object): def _build_sequence_section(self): """return sequence section of header. - + The sequence section is built from the list of reference names and lengths stored in the BAM-file and not from any @SQ entries that are part of the header's text section. """ - + cdef int x text = [] for x in range(self.ptr.n_targets): @@ -393,7 +393,7 @@ cdef class AlignmentHeader(object): force_str(self.ptr.target_name[x]), self.ptr.target_len[x])) return "".join(text) - + def to_dict(self): """return two-level dictionary with header information from the file. @@ -504,7 +504,7 @@ cdef class AlignmentHeader(object): raise KeyError("unknown reference {}".format(reference)) else: return self.ptr.target_len[tid] - + def is_valid_tid(self, int tid): """ return True if the numerical :term:`tid` is valid; False otherwise. @@ -522,7 +522,7 @@ cdef class AlignmentHeader(object): """ reference = force_bytes(reference) return bam_name2id(self.ptr, reference) - + def __str__(self): '''string with the full contents of the :term:`sam file` header as a string. @@ -561,7 +561,7 @@ cdef class AlignmentHeader(object): def get(self, *args): return self.to_dict().get(*args) - + def __len__(self): return self.to_dict().__len__() @@ -887,7 +887,7 @@ cdef class AlignmentFile(HTSFile): raise ValueError( "either supply options `template`, `header`, `text` or both `reference_names` " "and `reference_lengths` for writing") - + if template: # header is copied, though at the moment not strictly # necessary as AlignmentHeader is immutable. @@ -978,7 +978,7 @@ cdef class AlignmentFile(HTSFile): "SAM? file does not have a valid header (mode='%s'), " "please provide reference_names and reference_lengths") self.header = makeAlignmentHeader(hdr) - + # set filename with reference sequences if self.is_cram and reference_filename: creference_filename = self.reference_filename @@ -1283,7 +1283,7 @@ cdef class AlignmentFile(HTSFile): ignore orphans (paired reads that are not in a proper pair). The default is to ignore orphans. - + min_base_quality: int Minimum base quality. Bases below the minimum quality will @@ -1323,7 +1323,7 @@ cdef class AlignmentFile(HTSFile): """ cdef int rtid, has_coord cdef int32_t rstart, rstop - + if not self.is_open: raise ValueError("I/O operation on closed file") @@ -1534,7 +1534,7 @@ cdef class AlignmentFile(HTSFile): raise ValueError("interval of size 0") if _stop < _start: raise ValueError("interval of size less than 0") - + cdef int length = _stop - _start cdef c_array.array int_array_template = array.array('L', []) cdef c_array.array count_a @@ -1582,7 +1582,7 @@ cdef class AlignmentFile(HTSFile): # count seq = read.seq quality = read.query_qualities - + for qpos, refpos in read.get_aligned_pairs(True): if qpos is not None and refpos is not None and \ _start <= refpos < _stop: @@ -1646,14 +1646,14 @@ cdef class AlignmentFile(HTSFile): base_position = r.pos for op, nt in r.cigartuples: - if op in match_or_deletion: + if op in match_or_deletion: base_position += nt - elif op == BAM_CREF_SKIP: + elif op == BAM_CREF_SKIP: junc_start = base_position base_position += nt res[(junc_start, base_position)] += 1 return res - + def close(self): '''closes the :class:`pysam.AlignmentFile`.''' @@ -1705,16 +1705,14 @@ cdef class AlignmentFile(HTSFile): ''' write a single :class:`pysam.AlignedSegment` to disk. - Raises - ------ - ValueError - if the writing failed - - Returns - ------- + Raises: + ValueError + if the writing failed - int : the number of bytes written. If the file is closed, - this will be 0. + Returns: + int : + the number of bytes written. If the file is closed, + this will be 0. ''' if not self.is_open: return 0 @@ -1724,7 +1722,7 @@ cdef class AlignmentFile(HTSFile): "AlignedSegment refers to reference number {} that " "is larger than the number of references ({}) in the header".format( read._delegate.core.tid, self.header.ptr.n_targets)) - + cdef int ret with nogil: ret = sam_write1(self.htsfile, @@ -1800,12 +1798,12 @@ cdef class AlignmentFile(HTSFile): """return statistics about mapped/unmapped reads per chromosome as they are stored in the index. - Returns - ------- - list : a list of records for each chromosome. Each record has the attributes 'contig', - 'mapped', 'unmapped' and 'total'. + Returns: + list : + a list of records for each chromosome. Each record has the + attributes 'contig', 'mapped', 'unmapped' and 'total'. """ - + self.check_index() cdef int tid cdef uint64_t mapped, unmapped @@ -1820,7 +1818,7 @@ cdef class AlignmentFile(HTSFile): mapped, unmapped, mapped + unmapped))) - + return results ############################################################### @@ -1899,7 +1897,7 @@ cdef class AlignmentFile(HTSFile): if self.header is None: raise ValueError("header not available in closed files") return self.header.get_reference_length(reference) - + property nreferences: """int with the number of :term:`reference` sequences in the file. This is a read-only attribute.""" @@ -1978,7 +1976,7 @@ cdef class IteratorRow: cdef char *cfilename cdef char *creference_filename cdef char *cindexname = NULL - + if not samfile.is_open: raise ValueError("I/O operation on closed file") @@ -1989,7 +1987,7 @@ cdef class IteratorRow: # reopen the file - note that this makes the iterator # slow and causes pileup to slow down significantly. if multiple_iterators: - + cfilename = samfile.filename with nogil: self.htsfile = hts_open(cfilename, 'r') @@ -2002,7 +2000,7 @@ cdef class IteratorRow: self.index = sam_index_load2(self.htsfile, cfilename, cindexname) else: self.index = NULL - + # need to advance in newly opened file to position after header # better: use seek/tell? with nogil: @@ -2012,7 +2010,7 @@ cdef class IteratorRow: self.header = makeAlignmentHeader(hdr) self.owns_samfile = True - + # options specific to CRAM files if samfile.is_cram and samfile.reference_filename: creference_filename = samfile.reference_filename @@ -2094,7 +2092,7 @@ cdef class IteratorRowRegion(IteratorRow): raise IOError('truncated file') else: raise IOError("error while reading file {}: {}".format(self.samfile.filename, self.retval)) - + def __dealloc__(self): hts_itr_destroy(self.iter) @@ -2326,7 +2324,7 @@ cdef int __advance_nofilter(void *data, bam1_t *b): cdef int __advance_all(void *data, bam1_t *b): - '''only use reads for pileup passing basic filters such as + '''only use reads for pileup passing basic filters such as BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, BAM_FDUP ''' @@ -2362,7 +2360,7 @@ cdef int __advance_samtools(void * data, bam1_t * b): continue if d.flag_require and not (b.core.flag & d.flag_require): continue - + # reload sequence if d.fastafile != NULL and b.core.tid != d.tid: if d.seq != NULL: @@ -2388,21 +2386,21 @@ cdef int __advance_samtools(void * data, bam1_t * b): sam_prob_realn(b, d.seq, d.seq_len, 7) else: sam_prob_realn(b, d.seq, d.seq_len, 3) - + if d.seq != NULL and d.adjust_capq_threshold > 10: q = sam_cap_mapq(b, d.seq, d.seq_len, d.adjust_capq_threshold) if q < 0: continue elif b.core.qual > q: b.core.qual = q - + if b.core.qual < d.min_mapping_quality: continue if d.ignore_orphans and b.core.flag & BAM_FPAIRED and not (b.core.flag & BAM_FPROPER_PAIR): continue - + break - + return ret @@ -2453,7 +2451,7 @@ cdef class IteratorColumn: self.iterdata.compute_baq = kwargs.get("compute_baq", True) self.iterdata.redo_baq = kwargs.get("redo_baq", False) self.iterdata.ignore_orphans = kwargs.get("ignore_orphans", True) - + self.tid = 0 self.pos = 0 self.n_plp = 0 @@ -2497,7 +2495,7 @@ cdef class IteratorColumn: ''' return true if iterator is associated with a reference''' return self.fastafile - + cdef _setup_iterator(self, int tid, int start, @@ -2523,7 +2521,7 @@ cdef class IteratorColumn: cdef void * data[1] data[0] = &self.iterdata - + if self.stepper is None or self.stepper == "all": with nogil: self.pileup_iter = bam_mplp_init(1, @@ -2550,7 +2548,7 @@ cdef class IteratorColumn: if self.ignore_overlaps: with nogil: bam_mplp_init_overlaps(self.pileup_iter) - + cdef reset(self, tid, start, stop): '''reset iterator position. @@ -2572,7 +2570,7 @@ cdef class IteratorColumn: # &self.iterdata) with nogil: bam_mplp_reset(self.pileup_iter) - + cdef _free_pileup_iter(self): '''free the memory alloc'd by bam_plp_init. @@ -2593,9 +2591,9 @@ cdef class IteratorColumn: if self.iterdata.seq != NULL: free(self.iterdata.seq) self.iterdata.seq = NULL - + # backwards compatibility - + def hasReference(self): return self.has_reference() cdef char * getSequence(self): @@ -2603,7 +2601,7 @@ cdef class IteratorColumn: def addReference(self, FastaFile fastafile): return self.add_reference(fastafile) - + cdef class IteratorColumnRegion(IteratorColumn): '''iterates over a region only. ''' @@ -2630,7 +2628,7 @@ cdef class IteratorColumnRegion(IteratorColumn): def __next__(self): cdef int n - + while 1: n = self.cnext() if n < 0: @@ -2823,7 +2821,7 @@ cdef class IndexedReads: cdef uint64_t pos cdef bam_hdr_t * hdr = self.header.ptr - + while ret > 0: with nogil: pos = bgzf_tell(hts_get_bgzfp(self.htsfile)) diff --git a/pysam/libcbcf.pyx b/pysam/libcbcf.pyx index e40a801..c0cee5c 100644 --- a/pysam/libcbcf.pyx +++ b/pysam/libcbcf.pyx @@ -418,7 +418,7 @@ cdef bcf_copy_expand_array(void *src_data, int src_type, size_t src_values, """copy data from src to dest where the size of the elements (src_type/dst_type) differ as well as the number of elements (src_values/dst_values). """ - + cdef char *src_datac cdef char *dst_datac cdef int8_t *src_datai8 @@ -817,7 +817,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value): if key == 'phased': sample.phased = bool(value) return - + cdef bcf_hdr_t *hdr = sample.record.header.ptr cdef bcf1_t *r = sample.record.ptr cdef int fmt_id @@ -863,7 +863,7 @@ cdef bcf_format_set_value(VariantRecordSample sample, key, value): &value_count, &scalar, &realloc) vlen = value_count < 0 value_count = len(values) - + # If we can, write updated values to existing allocated storage. if fmt and not realloc: r.d.indiv_dirty = 1 @@ -2124,6 +2124,7 @@ cdef class VariantHeader(object): if self.ptr.dirty: bcf_hdr_sync(self.ptr) + def add_meta(self, key, value=None, items=None): """Add metadata to this header""" if not ((value is not None) ^ (items is not None)): @@ -2140,11 +2141,16 @@ cdef class VariantHeader(object): hrec.value = strdup(force_bytes(value)) else: for key, value in items: + + quoted = True + if key in set(("ID", "Number", "Type")): + quoted = False + key = force_bytes(key) bcf_hrec_add_key(hrec, key, len(key)) value = force_bytes(str(value)) - quoted = strpbrk(value, ' ;,"\t<>') != NULL + bcf_hrec_set_val(hrec, hrec.nkeys-1, value, len(value), quoted) except: bcf_hrec_destroy(hrec) @@ -3134,6 +3140,8 @@ cdef class VariantRecord(object): raise ValueError('Error unpacking VariantRecord') # causes a memory leak https://github.com/pysam-developers/pysam/issues/773 # return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None + if (r.d.m_id == 0): + raise ValueError('Error extracing ID') return charptr_to_str(r.d.id) if r.d.id != b'.' else None @id.setter @@ -4080,6 +4088,7 @@ cdef class VariantFile(HTSFile): def __next__(self): cdef int ret + cdef int errcode cdef bcf1_t *record = bcf_init1() if not record: @@ -4093,7 +4102,10 @@ cdef class VariantFile(HTSFile): ret = bcf_read1(self.htsfile, self.header.ptr, record) if ret < 0: + errcode = record.errcode bcf_destroy1(record) + if errcode: + raise IOError('unable to parse next record') if ret == -1: raise StopIteration elif ret == -2: @@ -4445,3 +4457,4 @@ cdef class VariantFile(HTSFile): # potentially unnecessary optimization that also sets max_unpack if not include_samples: self.drop_samples = True + diff --git a/pysam/libcfaidx.pyx b/pysam/libcfaidx.pyx index 40d8430..f9b8f6e 100644 --- a/pysam/libcfaidx.pyx +++ b/pysam/libcfaidx.pyx @@ -646,8 +646,14 @@ cdef class FastxFile: if self.persist: return FastxRecord(proxy=makeFastqProxy(self.entry)) return makeFastqProxy(self.entry) - else: + elif (l == -1): raise StopIteration + elif (l == -2): + raise ValueError('truncated quality string in {0}' + .format(self._filename)) + else: + raise ValueError('unknown problem parsing {0}' + .format(self._filename)) # Compatibility Layer for pysam 0.8.1 cdef class FastqFile(FastxFile): diff --git a/pysam/libchtslib.pyx b/pysam/libchtslib.pyx index c03c7cf..b4dcaa8 100644 --- a/pysam/libchtslib.pyx +++ b/pysam/libchtslib.pyx @@ -617,9 +617,10 @@ cdef class HTSFile(object): Returns ------- - tuple : a tuple of `flag`, :term:`tid`, `start` and - `stop`. The flag indicates whether no coordinates were - supplied and the genomic region is the complete genomic space. + tuple : + a tuple of `flag`, :term:`tid`, `start` and + `stop`. The flag indicates whether no coordinates were + supplied and the genomic region is the complete genomic space. Raises ------ diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx index ab8e9a6..fe61bb8 100644 --- a/pysam/libcutils.pyx +++ b/pysam/libcutils.pyx @@ -284,7 +284,7 @@ def _pysam_dispatch(collection, if skip_next: skip_next = False continue - if arg in SIMPLE_FLAGS or (len(arg) > 2 and arg.startswith('-@')): + if arg in SIMPLE_FLAGS or (len(arg) > 2 and force_bytes(arg).startswith(b'-@')): continue if arg in ARGUMENTS: skip_next = True diff --git a/pysam/samtools.py b/pysam/samtools.py index f81fe8f..da3044d 100644 --- a/pysam/samtools.py +++ b/pysam/samtools.py @@ -1,4 +1,4 @@ -from utils import PysamDispatcher +from pysam.utils import PysamDispatcher # samtools command line options to export in python # diff --git a/pysam/version.py b/pysam/version.py index 49afa17..da58834 100644 --- a/pysam/version.py +++ b/pysam/version.py @@ -1,5 +1,5 @@ # pysam versioning information -__version__ = "0.15.3" +__version__ = "0.15.4" # TODO: upgrade number __samtools_version__ = "1.9" diff --git a/requirements.txt b/requirements.txt index 6e8fc44..f937d1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -cython>=0.24.1 +cython>=0.29.12 diff --git a/setup.py b/setup.py index 4c97e87..970e4fc 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ import subprocess import sys import sysconfig from contextlib import contextmanager -from setuptools import Extension, setup +from setuptools import setup from cy_build import CyExtension as Extension, cy_build_ext as build_ext try: import cython @@ -443,7 +443,7 @@ metadata = { 'classifiers': [_f for _f in classifiers.split("\n") if _f], 'url': "https://github.com/pysam-developers/pysam", 'packages': package_list, - 'requires': ['cython (>=0.21)'], + 'requires': ['cython (>=0.29.12)'], 'ext_modules': [Extension(**opts) for opts in modules], 'cmdclass': cmdclass, 'package_dir': package_dirs, diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py index 48589e6..3c5dda5 100644 --- a/tests/AlignedSegment_test.py +++ b/tests/AlignedSegment_test.py @@ -78,6 +78,12 @@ class TestAlignedSegment(ReadTest): def testCompare(self): '''check comparison functions.''' a = self.build_read() + b = None + + self.assertFalse(a is b) + self.assertFalse(a == b) + self.assertEqual(-1, a.compare(b)) + b = self.build_read() self.assertEqual(0, a.compare(b))