From c78787bab14a9327e1fb5b3200738ff74db3a267 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=89tienne=20Mollier?= Date: Sun, 12 May 2024 14:24:32 +0200 Subject: [PATCH] New upstream version 0.22.1+ds --- .cirrus.yml | 10 ++- NEWS | 30 +++++++ devtools/artifactname.py | 25 ++++++ devtools/environment-dev.yaml | 6 ++ devtools/install-prerequisites.sh | 10 +-- doc/conf.py | 2 +- linker_tests/link_pre_489/cy_build.py | 6 +- pysam/__init__.py | 2 +- pysam/libcalignedsegment.pyi | 8 ++ pysam/libcalignmentfile.pyx | 39 ++++------ pysam/libcbcf.pyi | 2 +- pysam/libctabix.pyx | 6 +- pysam/libctabixproxies.pyx | 13 ++-- pysam/libcutils.pyx | 4 +- pysam/version.py | 2 +- setup.py | 21 ++--- tests/AlignedSegment_test.py | 12 +-- tests/AlignmentFileHeader_test.py | 4 +- tests/AlignmentFilePileup_test.py | 4 +- tests/AlignmentFile_test.py | 17 +--- tests/StreamFiledescriptors_test.py | 4 - tests/TestUtils.py | 35 +++------ tests/VariantFile_test.py | 30 +++---- tests/VariantRecord_test.py | 5 -- tests/samtools_test.py | 35 ++------- tests/tabix_test.py | 108 +++++++++++++------------- tests/tabixproxies_test.py | 21 ++--- 27 files changed, 218 insertions(+), 243 deletions(-) create mode 100755 devtools/artifactname.py create mode 100644 devtools/environment-dev.yaml diff --git a/.cirrus.yml b/.cirrus.yml index edf235c..2b20b31 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -17,7 +17,7 @@ build_wheels_task: - name: Build ARM macOS wheels macos_instance: - image: ghcr.io/cirruslabs/macos-ventura-base:latest + image: ghcr.io/cirruslabs/macos-sonoma-base:latest env: CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*" @@ -26,11 +26,17 @@ build_wheels_task: env: CIRRUS_CLONE_DEPTH: 1 + VENV: $HOME/relenv + PATH: $VENV/bin:$PATH + CIBW_SKIP: "*-musllinux_*" + CIBW_BUILD_VERBOSITY: 1 + CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 install_script: | - python3 -m pip install cibuildwheel==2.16.2 + python3 -m venv $VENV + pip3 install cibuildwheel==2.17.0 build_script: | cibuildwheel diff --git a/NEWS b/NEWS index ad7cfb1..83ee803 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,36 @@ .. An online version of the release history can be found here: .. http://pysam.readthedocs.io/en/latest/release.html +Release 0.22.1 +============== + +.. rubric:: 24 April 2024 + +Bugfix release, which still wraps htslib/samtools/bcftools 1.18. + +Bugs fixed: + +* Preserve all header field tags defined in the SAM specification (notably TP) + in :meth:`.AlignmentHeader.from_dict` and :meth:`.AlignmentHeader.to_dict` + (#1237, PR #1238, thanks to Tim Fennell and Nils Homer) + +* Adjust HTSlib's Makefile so that ``make distclean`` no longer tries to + rebuild the htscodecs configury (PR #1247, reported by Nicola Soranzo) + +* Reinstate S3 support in pre-built Linux wheels: support for this protocol + was inadvertently omitted from the pre-built 0.22.0 wheels on Linux + (#1249, #1277, etc varying circumstances; likely it is this that was + reported by Mathew Baines, Benjamin Sargsyan, et al) + +* Add missing :attr:`.AlignedSegment.is_mapped` etc properties to type stubs + (PR #1273, thanks to Matt Stone) + +* Fix off-by-one NamedTupleProxy, :class:`.asBed`, etc array bounds check + (#1279, reported by Dan Bolser) + +* Make pysam's klib headers compatible with C++ (reported by Martin Grigorov) + + Release 0.22.0 ============== diff --git a/devtools/artifactname.py b/devtools/artifactname.py new file mode 100755 index 0000000..8d3de4d --- /dev/null +++ b/devtools/artifactname.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +import os +import re +import sys + +pattern = re.compile(r'-cp([^-]*)-cp[^-]*-([^_]*)[0-9_]*_([^.]*)') +vers = set() +plats = set() +arches = set() + +for fname in sys.argv[1:]: + m = pattern.search(fname) + vers.add(int(m[1])) + plats.add(m[2]) + arches.add(m[3]) + +plat = '-'.join(sorted(plats)) +arch = '-'.join(sorted(arches)) +ver = '-'.join(map(str, sorted(vers))) + +tag = os.environ.get('NAMETAG', 'none') +tag = f'-{tag}' if tag != 'none' else '' + +print(f'artifactname=wheels-{plat}-{arch}-{ver}{tag}') diff --git a/devtools/environment-dev.yaml b/devtools/environment-dev.yaml new file mode 100644 index 0000000..c98cc47 --- /dev/null +++ b/devtools/environment-dev.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - cython + - setuptools diff --git a/devtools/install-prerequisites.sh b/devtools/install-prerequisites.sh index eaedce1..83be6aa 100755 --- a/devtools/install-prerequisites.sh +++ b/devtools/install-prerequisites.sh @@ -3,26 +3,26 @@ if test -x /usr/bin/dnf; then echo Installing prerequisites via dnf... dnf -y install epel-release - dnf -y install zlib-devel bzip2-devel xz-devel curl-devel samtools bcftools htslib-tools + dnf -y install zlib-devel bzip2-devel xz-devel curl-devel openssl-devel samtools bcftools htslib-tools elif test -x /usr/bin/yum; then if yum -y install epel-release; then echo Installing prerequisites via yum... - yum -y install zlib-devel bzip2-devel xz-devel curl-devel samtools bcftools htslib-tools + yum -y install zlib-devel bzip2-devel xz-devel curl-devel openssl-devel samtools bcftools htslib-tools else echo Installing non-test prerequisites via yum... - yum -y install zlib-devel bzip2-devel xz-devel curl-devel + yum -y install zlib-devel bzip2-devel xz-devel curl-devel openssl-devel fi elif test -d /etc/dpkg; then echo Installing prerequisites via apt-get... apt-get update - apt-get install -y --no-install-recommends --no-install-suggests libcurl4-openssl-dev zlib1g-dev libbz2-dev liblzma-dev samtools bcftools tabix + apt-get install -y --no-install-recommends --no-install-suggests libcurl4-openssl-dev libssl-dev zlib1g-dev libbz2-dev liblzma-dev samtools bcftools tabix elif test -x /sbin/apk; then echo Installing non-test prerequisites via apk... apk update - apk add zlib-dev bzip2-dev xz-dev curl-dev + apk add zlib-dev bzip2-dev xz-dev curl-dev openssl-dev elif test -x ${HOMEBREW_PREFIX-/usr/local}/bin/brew; then echo Installing prerequisites via brew... diff --git a/doc/conf.py b/doc/conf.py index 1ada4bc..40081eb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -51,7 +51,7 @@ master_doc = 'index' # General information about the project. project = u'pysam' -copyright = '2009–2023 Andreas Heger, John Marshall, Kevin Jacobs, et al' +copyright = '2009–2024 Andreas Heger, John Marshall, Kevin Jacobs, et al' # Included at the end of each rst file rst_epilog = ''' diff --git a/linker_tests/link_pre_489/cy_build.py b/linker_tests/link_pre_489/cy_build.py index d741d49..2f7a6b9 100644 --- a/linker_tests/link_pre_489/cy_build.py +++ b/linker_tests/link_pre_489/cy_build.py @@ -31,7 +31,7 @@ def is_pip_install(): class CyExtension(Extension): def __init__(self, *args, **kwargs): self._init_func = kwargs.pop("init_func", None) - Extension.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) def extend_includes(self, includes): self.include_dirs.extend(includes) @@ -65,7 +65,7 @@ class cy_build_ext(build_ext): # @loader_path. This will allow Python packages to find the library # in the expected place, while still giving enough flexibility to # external applications to link against the library. - relative_module_path = ext.name.replace(".", os.sep) + (get_config_var('EXT_SUFFIX') or get_config_var('SO')) + relative_module_path = ext.name.replace(".", os.sep) + get_config_var('EXT_SUFFIX') library_path = os.path.join( "@rpath", os.path.basename(relative_module_path) ) @@ -83,4 +83,4 @@ class cy_build_ext(build_ext): ext.extra_link_args += ['-Wl,-rpath,$ORIGIN'] - build_ext.build_extension(self, ext) + super().build_extension(ext) diff --git a/pysam/__init__.py b/pysam/__init__.py index 9920f64..81fbe6b 100644 --- a/pysam/__init__.py +++ b/pysam/__init__.py @@ -96,5 +96,5 @@ def get_libraries(): if pysam.config.HTSLIB == "builtin": pysam_libs.append('libchtslib') - so = sysconfig.get_config_var('EXT_SUFFIX') or sysconfig.get_config_var('SO') + so = sysconfig.get_config_var('EXT_SUFFIX') return [os.path.join(dirname, x + so) for x in pysam_libs] diff --git a/pysam/libcalignedsegment.pyi b/pysam/libcalignedsegment.pyi index 4e4b1d6..bea806e 100644 --- a/pysam/libcalignedsegment.pyi +++ b/pysam/libcalignedsegment.pyi @@ -88,8 +88,12 @@ class AlignedSegment: is_proper_pair: bool is_unmapped: bool mate_is_unmapped: bool + is_mapped: bool + mate_is_mapped: bool is_reverse: bool mate_is_reverse: bool + is_forward: bool + mate_is_forward: bool is_read1: bool is_read2: bool is_secondary: bool @@ -121,6 +125,10 @@ class AlignedSegment: @property def query_alignment_end(self) -> int: ... @property + def modified_bases(self) -> Optional[Dict[Tuple[str, int, str], List[Tuple[int, int]]]]: ... + @property + def modified_bases_forward(self) -> Optional[Dict[Tuple[str, int, str], List[Tuple[int, int]]]]: ... + @property def query_alignment_length(self) -> int: ... def infer_query_length(self) -> Optional[int]: ... def infer_read_length(self) -> Optional[int]: ... diff --git a/pysam/libcalignmentfile.pyx b/pysam/libcalignmentfile.pyx index 97d4e6d..9a92d07 100644 --- a/pysam/libcalignmentfile.pyx +++ b/pysam/libcalignmentfile.pyx @@ -111,27 +111,30 @@ VALID_HEADER_TYPES = {"HD" : Mapping, VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO") # default type conversions within SAM header records -KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str}, +KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str, + "SS" : str,}, "SQ" : {"SN" : str, "LN" : int, "AS" : str, "M5" : str, "SP" : str, "UR" : str, - "AH" : str,}, + "AH" : str, "TP" : str, "DS" : str, + "AN" : str,}, "RG" : {"ID" : str, "CN" : str, "DS" : str, "DT" : str, "FO" : str, "KS" : str, "LB" : str, "PG" : str, "PI" : str, "PL" : str, "PM" : str, "PU" : str, - "SM" : str,}, + "SM" : str, "BC" : str,}, "PG" : {"ID" : str, "PN" : str, "CL" : str, "PP" : str, "DS" : str, "VN" : str,},} # output order of fields within records. Ensure that CL is at # the end as parsing a CL will ignore any subsequent records. -VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"), +VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "SS", "GO"), "SQ" : ("SN", "LN", "AS", "M5", - "UR", "SP", "AH"), + "UR", "SP", "AH", "TP", + "DS", "AN"), "RG" : ("ID", "CN", "SM", "LB", "PU", "PI", "DT", "DS", "PL", "FO", "KS", "PG", - "PM"), + "PM", "BC"), "PG" : ("PN", "ID", "VN", "PP", "DS", "CL"),} @@ -2073,8 +2076,7 @@ cdef class IteratorRowRegion(IteratorRow): if not samfile.has_index(): raise ValueError("no index available for iteration") - IteratorRow.__init__(self, samfile, - multiple_iterators=multiple_iterators) + super().__init__(samfile, multiple_iterators=multiple_iterators) with nogil: self.iter = sam_itr_queryi( self.index, @@ -2130,9 +2132,7 @@ cdef class IteratorRowHead(IteratorRow): AlignmentFile samfile, int n, int multiple_iterators=False): - - IteratorRow.__init__(self, samfile, - multiple_iterators=multiple_iterators) + super().__init__(samfile, multiple_iterators=multiple_iterators) self.max_rows = n self.current_row = 0 @@ -2180,11 +2180,8 @@ cdef class IteratorRowAll(IteratorRow): """ - def __init__(self, AlignmentFile samfile, - int multiple_iterators=False): - - IteratorRow.__init__(self, samfile, - multiple_iterators=multiple_iterators) + def __init__(self, AlignmentFile samfile, int multiple_iterators=False): + super().__init__(samfile, multiple_iterators=multiple_iterators) def __iter__(self): return self @@ -2223,11 +2220,8 @@ cdef class IteratorRowAllRefs(IteratorRow): """ - def __init__(self, AlignmentFile samfile, - multiple_iterators=False): - - IteratorRow.__init__(self, samfile, - multiple_iterators=multiple_iterators) + def __init__(self, AlignmentFile samfile, multiple_iterators=False): + super().__init__(samfile, multiple_iterators=multiple_iterators) if not samfile.has_index(): raise ValueError("no index available for fetch") @@ -2288,8 +2282,7 @@ cdef class IteratorRowSelection(IteratorRow): """ def __init__(self, AlignmentFile samfile, positions, int multiple_iterators=True): - - IteratorRow.__init__(self, samfile, multiple_iterators=multiple_iterators) + super().__init__(samfile, multiple_iterators=multiple_iterators) self.positions = positions self.current_pos = 0 diff --git a/pysam/libcbcf.pyi b/pysam/libcbcf.pyi index bb875dd..d62f169 100644 --- a/pysam/libcbcf.pyi +++ b/pysam/libcbcf.pyi @@ -20,7 +20,7 @@ if sys.version_info < (3, 8): else: from typing import Literal -from .libchtslib import HTSFile, _HasFileNo +from pysam.libchtslib import HTSFile, _HasFileNo _D = TypeVar("_D") _K = TypeVar("_K", str, Union[int, str]) diff --git a/pysam/libctabix.pyx b/pysam/libctabix.pyx index 97e3403..b38cf55 100644 --- a/pysam/libctabix.pyx +++ b/pysam/libctabix.pyx @@ -697,10 +697,8 @@ cdef class TabixIteratorParsed(TabixIterator): Returns parsed data. """ - def __init__(self, - Parser parser): - - TabixIterator.__init__(self) + def __init__(self, Parser parser): + super().__init__() self.parser = parser def __next__(self): diff --git a/pysam/libctabixproxies.pyx b/pysam/libctabixproxies.pyx index 9aebf0b..152c3c3 100644 --- a/pysam/libctabixproxies.pyx +++ b/pysam/libctabixproxies.pyx @@ -365,14 +365,14 @@ cdef class NamedTupleProxy(TupleProxy): '''set attribute.''' cdef int idx idx, f = self.map_key2field[key] - if self.nfields < idx: + if idx >= self.nfields: raise KeyError("field %s not set" % key) TupleProxy.__setitem__(self, idx, str(value)) def __getattr__(self, key): cdef int idx idx, f = self.map_key2field[key] - if self.nfields < idx: + if idx >= self.nfields: raise KeyError("field %s not set" % key) if f == str: return force_str(self.fields[idx], @@ -494,7 +494,7 @@ cdef class GTFProxy(NamedTupleProxy): toDot(self.frame), self.attributes)) else: - return TupleProxy.__str__(self) + return super().__str__() def invert(self, int lcontig): '''invert coordinates to negative strand coordinates @@ -732,7 +732,7 @@ cdef class BedProxy(NamedTupleProxy): nbytes does not include the terminal '\0'. ''' - TupleProxy.update(self, buffer, nbytes) + NamedTupleProxy.update(self, buffer, nbytes) if self.nfields < 3: raise ValueError( @@ -754,11 +754,10 @@ cdef class BedProxy(NamedTupleProxy): # def __get__( self ): return self.end def __str__(self): - cdef int save_fields = self.nfields # ensure fields to use correct format self.nfields = self.bedfields - retval = TupleProxy.__str__(self) + retval = super().__str__() self.nfields = save_fields return retval @@ -801,7 +800,7 @@ cdef class VCFProxy(NamedTupleProxy): nbytes does not include the terminal '\0'. ''' - TupleProxy.update(self, buffer, nbytes) + NamedTupleProxy.update(self, buffer, nbytes) self.contig = self.fields[0] # vcf counts from 1 - correct here diff --git a/pysam/libcutils.pyx b/pysam/libcutils.pyx index cb08ef2..64cb97a 100644 --- a/pysam/libcutils.pyx +++ b/pysam/libcutils.pyx @@ -20,10 +20,10 @@ from libc.stdio cimport stdout as c_stdout from posix.fcntl cimport open as c_open, O_WRONLY from posix.unistd cimport SEEK_SET, SEEK_CUR, SEEK_END -from libcsamtools cimport samtools_dispatch, samtools_set_stdout, samtools_set_stderr, \ +from pysam.libcsamtools cimport samtools_dispatch, samtools_set_stdout, samtools_set_stderr, \ samtools_close_stdout, samtools_close_stderr, samtools_set_stdout_fn -from libcbcftools cimport bcftools_dispatch, bcftools_set_stdout, bcftools_set_stderr, \ +from pysam.libcbcftools cimport bcftools_dispatch, bcftools_set_stdout, bcftools_set_stderr, \ bcftools_close_stdout, bcftools_close_stderr, bcftools_set_stdout_fn ##################################################################### diff --git a/pysam/version.py b/pysam/version.py index 62a9f31..8625167 100644 --- a/pysam/version.py +++ b/pysam/version.py @@ -1,5 +1,5 @@ # pysam versioning information -__version__ = "0.22.0" +__version__ = "0.22.1" __samtools_version__ = "1.18" __bcftools_version__ = "1.18" diff --git a/setup.py b/setup.py index a4bf36d..dbef939 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,6 @@ try: except ImportError: HAVE_CYTHON = False -IS_PYTHON3 = sys.version_info.major >= 3 IS_DARWIN = platform.system() == 'Darwin' log = logging.getLogger('pysam') @@ -79,9 +78,7 @@ def run_make(targets): def run_make_print_config(): - stdout = subprocess.check_output(["make", "-s", "print-config"]) - if IS_PYTHON3: - stdout = stdout.decode("ascii") + stdout = subprocess.check_output(["make", "-s", "print-config"], encoding="ascii") make_print_config = {} for line in stdout.splitlines(): @@ -94,9 +91,7 @@ def run_make_print_config(): def run_nm_defined_symbols(objfile): - stdout = subprocess.check_output(["nm", "-g", "-P", objfile]) - if IS_PYTHON3: - stdout = stdout.decode("ascii") + stdout = subprocess.check_output(["nm", "-g", "-P", objfile], encoding="ascii") symbols = set() for line in stdout.splitlines(): @@ -233,7 +228,7 @@ class cythonize_sdist(sdist): def run(self): from Cython.Build import cythonize cythonize(self.distribution.ext_modules) - sdist.run(self) + super().run() # Override Cythonised build_ext command to customise macOS shared libraries. @@ -242,7 +237,7 @@ class CyExtension(Extension): def __init__(self, *args, **kwargs): self._init_func = kwargs.pop("init_func", None) self._prebuild_func = kwargs.pop("prebuild_func", None) - Extension.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) def extend_includes(self, includes): self.include_dirs.extend(includes) @@ -279,7 +274,7 @@ class cy_build_ext(build_ext): ldshared = os.environ.get('LDSHARED', sysconfig.get_config_var('LDSHARED')) os.environ['LDSHARED'] = ldshared.replace('-bundle', '') - build_ext.run(self) + super().run() try: if HTSLIB_MODE != 'separate': self.check_ext_symbol_conflicts() @@ -302,7 +297,7 @@ class cy_build_ext(build_ext): # @loader_path. This will allow Python packages to find the library # in the expected place, while still giving enough flexibility to # external applications to link against the library. - relative_module_path = ext.name.replace(".", os.sep) + (sysconfig.get_config_var('EXT_SUFFIX') or sysconfig.get_config_var('SO')) + relative_module_path = ext.name.replace(".", os.sep) + sysconfig.get_config_var('EXT_SUFFIX') library_path = os.path.join( "@rpath", os.path.basename(relative_module_path) ) @@ -323,7 +318,7 @@ class cy_build_ext(build_ext): if isinstance(ext, CyExtension) and ext._prebuild_func: ext._prebuild_func(ext, self.force) - build_ext.build_extension(self, ext) + super().build_extension(ext) class clean_ext(Command): @@ -537,7 +532,7 @@ else: define_macros = [] -suffix = sysconfig.get_config_var('EXT_SUFFIX') or sysconfig.get_config_var('SO') +suffix = sysconfig.get_config_var('EXT_SUFFIX') internal_htslib_libraries = [ os.path.splitext("chtslib{}".format(suffix))[0]] diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py index 855ae47..2669df2 100644 --- a/tests/AlignedSegment_test.py +++ b/tests/AlignedSegment_test.py @@ -3,7 +3,6 @@ import pysam import unittest import json import collections -import string import struct import copy import array @@ -14,16 +13,9 @@ from TestUtils import ( BAM_DATADIR, get_temp_filename, get_temp_context, - IS_PYTHON3, ) -if IS_PYTHON3: - maketrans = str.maketrans -else: - maketrans = string.maketrans - - def setUpModule(): make_data_files(BAM_DATADIR) @@ -1161,7 +1153,7 @@ class TestBaseModifications(unittest.TestCase): self.assertDictEqual(r.modified_bases, expect[r.query_name][0]) self.assertDictEqual(r.modified_bases_forward, expect[r.query_name][1]) for (B, s, _), mods in r.modified_bases.items(): - C = B.translate(maketrans("ACGTacgtNnXx", "TGCAtgcaNnXx")) + C = B.translate(str.maketrans("ACGTacgtNnXx", "TGCAtgcaNnXx")) for pos, _ in mods: if r.is_reverse: if s == 1: @@ -1714,7 +1706,7 @@ class TestForwardStrandValues(ReadTest): a.is_reverse = False fwd_seq = a.query_sequence - rev_seq = fwd_seq.translate(maketrans("ACGTacgtNnXx", "TGCAtgcaNnXx"))[::-1] + rev_seq = fwd_seq.translate(str.maketrans("ACGTacgtNnXx", "TGCAtgcaNnXx"))[::-1] self.assertEqual(fwd_seq, a.get_forward_sequence()) a.is_reverse = True self.assertEqual(fwd_seq, a.query_sequence) diff --git a/tests/AlignmentFileHeader_test.py b/tests/AlignmentFileHeader_test.py index 3d9e2e4..91e044c 100644 --- a/tests/AlignmentFileHeader_test.py +++ b/tests/AlignmentFileHeader_test.py @@ -24,7 +24,7 @@ class TestHeaderConstruction(unittest.TestCase): header_dict = odict( [('SQ', [odict([('LN', 1575), ('SN', 'chr1'), ('AH', 'chr1:5000000-5010000')]), - odict([('LN', 1584), ('SN', 'chr2'), ('AH', '*')])]), + odict([('LN', 1584), ('SN', 'chr2'), ('AH', '*'), ('TP', 'linear')])]), ('RG', [odict([('LB', 'SC_1'), ('ID', 'L1'), ('SM', 'NA12891'), ('PU', 'SC_1_10'), ("CN", "name:with:colon")]), odict([('LB', 'SC_2'), ('ID', 'L2'), ('SM', 'NA12891'), @@ -37,7 +37,7 @@ class TestHeaderConstruction(unittest.TestCase): header_text = ("@HD\tVN:1.0\n" "@SQ\tSN:chr1\tLN:1575\tAH:chr1:5000000-5010000\n" - "@SQ\tSN:chr2\tLN:1584\tAH:*\n" + "@SQ\tSN:chr2\tLN:1584\tAH:*\tTP:linear\n" "@RG\tID:L1\tPU:SC_1_10\tLB:SC_1\tSM:NA12891\tCN:name:with:colon\n" "@RG\tID:L2\tPU:SC_2_12\tLB:SC_2\tSM:NA12891\tCN:name:with:colon\n" "@PG\tID:P1\tVN:1.0\n" diff --git a/tests/AlignmentFilePileup_test.py b/tests/AlignmentFilePileup_test.py index 083b6e5..7b56b1a 100644 --- a/tests/AlignmentFilePileup_test.py +++ b/tests/AlignmentFilePileup_test.py @@ -3,7 +3,7 @@ import os import pysam import sys import unittest -from TestUtils import make_data_files, BAM_DATADIR, IS_PYTHON3, force_str, flatten_nested_list +from TestUtils import make_data_files, BAM_DATADIR, force_str, flatten_nested_list import PileupTestUtils @@ -353,8 +353,6 @@ class TestIteratorColumn2(unittest.TestCase): self.assertEqual(len(s.split("\n")), 2) -@unittest.skipIf(not IS_PYTHON3, - "tests requires at least python3 for subprocess context manager") class PileUpColumnTests(unittest.TestCase): fn = os.path.join(BAM_DATADIR, "ex2.bam") diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py index 4107c64..0d599df 100644 --- a/tests/AlignmentFile_test.py +++ b/tests/AlignmentFile_test.py @@ -9,15 +9,11 @@ import unittest import pytest import os import shutil -import sys import collections import subprocess import logging import array -if sys.version_info.major >= 3: - from io import StringIO -else: - from StringIO import StringIO +from io import StringIO from functools import partial @@ -361,12 +357,9 @@ class BasicTestSAMFromStringIO(BasicTestBAMFromFetch): def testRaises(self): statement = "samtools view -h {}".format( os.path.join(BAM_DATADIR, "ex3.bam")) - stdout = subprocess.check_output(statement.split(" ")) + stdout = subprocess.check_output(statement.split(" "), encoding="ascii") bam = StringIO() - if sys.version_info.major >= 3: - bam.write(stdout.decode('ascii')) - else: - bam.write(stdout) + bam.write(stdout) bam.seek(0) self.assertRaises(NotImplementedError, pysam.AlignmentFile, bam) @@ -2132,9 +2125,7 @@ class TestAlignmentFileUtilityFunctions(unittest.TestCase): '''test mate access.''' with open(os.path.join(BAM_DATADIR, "ex1.sam"), "rb") as inf: - readnames = [x.split(b"\t")[0] for x in inf.readlines()] - if sys.version_info[0] >= 3: - readnames = [name.decode('ascii') for name in readnames] + readnames = [x.decode("ascii").split("\t")[0] for x in inf.readlines()] counts = collections.defaultdict(int) for x in readnames: diff --git a/tests/StreamFiledescriptors_test.py b/tests/StreamFiledescriptors_test.py index 07adea8..eaa471c 100644 --- a/tests/StreamFiledescriptors_test.py +++ b/tests/StreamFiledescriptors_test.py @@ -1,5 +1,4 @@ import os -import sys import subprocess import threading import errno @@ -7,8 +6,6 @@ import unittest from pysam import AlignmentFile from TestUtils import make_data_files, BAM_DATADIR -IS_PYTHON2 = sys.version_info[0] == 2 - def setUpModule(): make_data_files(BAM_DATADIR) @@ -44,7 +41,6 @@ class StreamTest(unittest.TestCase): read += 1 return 0, read - @unittest.skipIf(IS_PYTHON2, "no context manager in py2") def test_text_processing(self): with subprocess.Popen('head -n200', diff --git a/tests/TestUtils.py b/tests/TestUtils.py index e5dee6c..cde197e 100644 --- a/tests/TestUtils.py +++ b/tests/TestUtils.py @@ -1,4 +1,3 @@ -import sys import os import glob import gzip @@ -27,34 +26,20 @@ LINKDIR = os.path.abspath(os.path.join( TESTS_TEMPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "tmp")) -IS_PYTHON3 = sys.version_info[0] >= 3 +from itertools import zip_longest +from urllib.request import urlopen -if IS_PYTHON3: - from itertools import zip_longest - from urllib.request import urlopen -else: - from itertools import izip as zip_longest - from urllib2 import urlopen - - -if IS_PYTHON3: - def force_str(s): - try: - return s.decode('ascii') - except AttributeError: - return s - - def force_bytes(s): - try: - return s.encode('ascii') - except AttributeError: - return s -else: - def force_str(s): +def force_str(s): + try: + return s.decode('ascii') + except AttributeError: return s - def force_bytes(s): +def force_bytes(s): + try: + return s.encode('ascii') + except AttributeError: return s diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py index 6224f0c..56083cf 100644 --- a/tests/VariantFile_test.py +++ b/tests/VariantFile_test.py @@ -2,16 +2,11 @@ import os import glob -import sys import unittest import pysam import shutil import gzip - -try: - from pathlib import Path -except ImportError: - Path = None +from pathlib import Path from TestUtils import get_temp_filename, check_lines_equal, load_and_convert, make_data_files, CBCF_DATADIR, get_temp_context @@ -124,13 +119,11 @@ class TestOpening(unittest.TestCase): pass self.assertRaises(ValueError, pysam.VariantFile, fn) - if Path and sys.version_info >= (3, 6): - def testEmptyFileVCFFromPath(self): - with get_temp_context("tmp_testEmptyFile.vcf") as fn: - with open(fn, "w"): - pass - self.assertRaises(ValueError, pysam.VariantFile, - Path(fn)) + def testEmptyFileVCFFromPath(self): + with get_temp_context("tmp_testEmptyFile.vcf") as fn: + with open(fn, "w"): + pass + self.assertRaises(ValueError, pysam.VariantFile, Path(fn)) def testEmptyFileVCFGZWithIndex(self): with get_temp_context("tmp_testEmptyFile.vcf") as fn: @@ -312,12 +305,11 @@ class TestParsing(unittest.TestCase): chrom = [rec.chrom for rec in v] self.assertEqual(chrom, ['M', '17', '20', '20', '20']) - if Path and sys.version_info >= (3, 6): - def testChromFromPath(self): - fn = os.path.join(CBCF_DATADIR, self.filename) - v = pysam.VariantFile(Path(fn)) - chrom = [rec.chrom for rec in v] - self.assertEqual(chrom, ['M', '17', '20', '20', '20']) + def testChromFromPath(self): + fn = os.path.join(CBCF_DATADIR, self.filename) + v = pysam.VariantFile(Path(fn)) + chrom = [rec.chrom for rec in v] + self.assertEqual(chrom, ['M', '17', '20', '20', '20']) def testPos(self): fn = os.path.join(CBCF_DATADIR, self.filename) diff --git a/tests/VariantRecord_test.py b/tests/VariantRecord_test.py index c44d9a6..310b838 100644 --- a/tests/VariantRecord_test.py +++ b/tests/VariantRecord_test.py @@ -1,11 +1,6 @@ import pysam import pytest -try: - from pathlib import Path -except ImportError: - Path = None - from TestUtils import make_data_files, CBCF_DATADIR diff --git a/tests/samtools_test.py b/tests/samtools_test.py index 7c40237..6d49a94 100644 --- a/tests/samtools_test.py +++ b/tests/samtools_test.py @@ -21,9 +21,6 @@ from TestUtils import checkBinaryEqual, check_lines_equal, \ make_data_files, BAM_DATADIR -IS_PYTHON3 = sys.version_info[0] >= 3 - - def setUpModule(): make_data_files(BAM_DATADIR) @@ -44,10 +41,8 @@ def get_version(executable): with subprocess.Popen(executable, shell=True, stderr=subprocess.PIPE).stderr as pipe: - lines = b"".join(pipe.readlines()) + lines = b"".join(pipe.readlines()).decode("ascii") - if IS_PYTHON3: - lines = lines.decode('ascii') try: x = re.search(r"Version:\s+(\S+)", lines).groups()[0] except AttributeError: @@ -90,8 +85,6 @@ class SamtoolsTest(unittest.TestCase): "idxstats ex1.bam > %(out)s_ex1.idxstats", "fixmate ex1.bam %(out)s_ex1.fixmate.bam", "flagstat ex1.bam > %(out)s_ex1.flagstat", - # Fails python 3.3 on linux, passes on OsX and when - # run locally "calmd ex1.bam ex1.fa > %(out)s_ex1.calmd.bam", # use -s option, otherwise the following error in samtools 1.2: # Samtools-htslib-API: bam_get_library() not yet implemented @@ -252,16 +245,9 @@ class SamtoolsTest(unittest.TestCase): if command in ("bedcov", "stats", "dict", "bam2fq", "flagstat"): continue - if (command == "calmd" and - list(sys.version_info[:2]) == [3, 3]): - # skip calmd test, fails only on python 3.3.5 - # in linux (empty output). Works in OsX and passes - # for 3.4 and 3.5, see issue #293 - continue self.check_statement(statement) - @unittest.skipIf(sys.platform == "darwin", "not supported, pattern does not match") - @unittest.skipIf(not sys.stdin.isatty(), "skipping usage tests, stdin is not a tty") + @unittest.skipUnless(sys.stdin.isatty(), "skipping usage tests, stdin is not a tty") def testUsage(self): if self.executable == "bcftools": # bcftools usage messages end with exit(1) @@ -314,24 +300,15 @@ if sys.platform != "darwin": def testReturnValueString(self): retval = pysam.idxstats(os.path.join(BAM_DATADIR, "ex1.bam")) - if IS_PYTHON3: - self.assertFalse(isinstance(retval, bytes)) - self.assertTrue(isinstance(retval, str)) - else: - self.assertTrue(isinstance(retval, bytes)) - self.assertTrue(isinstance(retval, basestring)) + self.assertFalse(isinstance(retval, bytes)) + self.assertTrue(isinstance(retval, str)) def testReturnValueData(self): args = "-O BAM {}".format(os.path.join(BAM_DATADIR, "ex1.bam")).split(" ") retval = pysam.view(*args) - - if IS_PYTHON3: - self.assertTrue(isinstance(retval, bytes)) - self.assertFalse(isinstance(retval, str)) - else: - self.assertTrue(isinstance(retval, bytes)) - self.assertTrue(isinstance(retval, basestring)) + self.assertTrue(isinstance(retval, bytes)) + self.assertFalse(isinstance(retval, str)) class StdoutTest(unittest.TestCase): '''test if stdout can be redirected.''' diff --git a/tests/tabix_test.py b/tests/tabix_test.py index b1ddb78..d82379d 100644 --- a/tests/tabix_test.py +++ b/tests/tabix_test.py @@ -5,7 +5,6 @@ Execute in the :file:`tests` directory as it requires the Makefile and data files located there. ''' -import sys import os import shutil import gzip @@ -16,8 +15,6 @@ import re from TestUtils import checkBinaryEqual, checkGZBinaryEqual, check_url, \ load_and_convert, make_data_files, TABIX_DATADIR, get_temp_filename -IS_PYTHON3 = sys.version_info[0] >= 3 - def setUpModule(): make_data_files(TABIX_DATADIR) @@ -30,11 +27,8 @@ def myzip_open(infile, mode="r"): for l in f: yield l.decode("ascii") - if IS_PYTHON3: - if mode == "r": - return _convert(gzip.open(infile, "r")) - else: - return gzip.open(mode) + if mode == "r": + return _convert(gzip.open(infile, "r")) def splitToBytes(s): @@ -236,8 +230,7 @@ class TestGZFile(IterationTest): with_comments = True def setUp(self): - - IterationTest.setUp(self) + super().setUp() self.gzfile = pysam.GZIterator(self.filename) def testAll(self): @@ -255,7 +248,7 @@ class TestIterationWithoutComments(IterationTest): "example.gtf.gz") def setUp(self): - IterationTest.setUp(self) + super().setUp() self.tabix = pysam.TabixFile(self.filename) def tearDown(self): @@ -380,9 +373,6 @@ class TestIterationWithComments(TestIterationWithoutComments): filename = os.path.join(TABIX_DATADIR, "example_comments.gtf.gz") - def setUp(self): - TestIterationWithoutComments.setUp(self) - class TestIterators(unittest.TestCase): filename = os.path.join(TABIX_DATADIR, "example.gtf.gz") @@ -511,6 +501,9 @@ class TestBed(unittest.TestCase): self.assertEqual(c[0], r.contig) self.assertEqual(int(c[1]), r.start) self.assertEqual(int(c[2]), r.end) + # Needs lambda so that the property getter isn't called too soon + self.assertRaises(KeyError, lambda: r.name) + self.assertRaises(KeyError, lambda: r.score) self.assertEqual(list(c), list(r)) self.assertEqual("\t".join(map(str, c)), str(r)) @@ -534,6 +527,12 @@ class TestBed(unittest.TestCase): self.assertEqual(int(c[2]) + 1, r.end) self.assertEqual(str(int(c[2]) + 1), r[2]) + with self.assertRaises(IndexError): + r.name = "test" + + with self.assertRaises(IndexError): + r.score = 1 + class TestVCF(unittest.TestCase): @@ -550,46 +549,45 @@ class TestVCF(unittest.TestCase): os.unlink(self.tmpfilename + ".gz.tbi") -if IS_PYTHON3: - class TestUnicode(unittest.TestCase): +class TestUnicode(unittest.TestCase): - '''test reading from a file with non-ascii characters.''' + '''test reading from a file with non-ascii characters.''' - filename = os.path.join(TABIX_DATADIR, "example_unicode.vcf") + filename = os.path.join(TABIX_DATADIR, "example_unicode.vcf") - def setUp(self): - self.tmpfilename = get_temp_filename(suffix="vcf") - shutil.copyfile(self.filename, self.tmpfilename) - pysam.tabix_index(self.tmpfilename, preset="vcf") + def setUp(self): + self.tmpfilename = get_temp_filename(suffix="vcf") + shutil.copyfile(self.filename, self.tmpfilename) + pysam.tabix_index(self.tmpfilename, preset="vcf") - def tearDown(self): - os.unlink(self.tmpfilename + ".gz") - if os.path.exists(self.tmpfilename + ".gz.tbi"): - os.unlink(self.tmpfilename + ".gz.tbi") - - def testFromTabix(self): - - # use ascii encoding - should raise error - with pysam.TabixFile( - self.tmpfilename + ".gz", encoding="ascii") as t: - results = list(t.fetch(parser=pysam.asVCF())) - self.assertRaises(UnicodeDecodeError, - getattr, results[1], "id") - - with pysam.TabixFile( - self.tmpfilename + ".gz", encoding="utf-8") as t: - results = list(t.fetch(parser=pysam.asVCF())) - self.assertEqual(getattr(results[1], "id"), u"Rene\xe9") - - def testFromVCF(self): - self.vcf = pysam.VCF() - self.assertRaises( - UnicodeDecodeError, - self.vcf.connect, - self.tmpfilename + ".gz", - "ascii") - self.vcf.connect(self.tmpfilename + ".gz", encoding="utf-8") - v = self.vcf.getsamples()[0] + def tearDown(self): + os.unlink(self.tmpfilename + ".gz") + if os.path.exists(self.tmpfilename + ".gz.tbi"): + os.unlink(self.tmpfilename + ".gz.tbi") + + def testFromTabix(self): + + # use ascii encoding - should raise error + with pysam.TabixFile( + self.tmpfilename + ".gz", encoding="ascii") as t: + results = list(t.fetch(parser=pysam.asVCF())) + self.assertRaises(UnicodeDecodeError, + getattr, results[1], "id") + + with pysam.TabixFile( + self.tmpfilename + ".gz", encoding="utf-8") as t: + results = list(t.fetch(parser=pysam.asVCF())) + self.assertEqual(getattr(results[1], "id"), u"Rene\xe9") + + def testFromVCF(self): + self.vcf = pysam.VCF() + self.assertRaises( + UnicodeDecodeError, + self.vcf.connect, + self.tmpfilename + ".gz", + "ascii") + self.vcf.connect(self.tmpfilename + ".gz", encoding="utf-8") + v = self.vcf.getsamples()[0] class TestVCFFromTabix(TestVCF): @@ -599,15 +597,14 @@ class TestVCFFromTabix(TestVCF): "filter", "info", "format") def setUp(self): - - TestVCF.setUp(self) + super().setUp() self.tabix = pysam.TabixFile(self.tmpfilename + ".gz") self.compare = load_and_convert(self.filename) def tearDown(self): self.tabix.close() - TestVCF.tearDown(self) + super().tearDown() def testRead(self): @@ -708,14 +705,13 @@ class TestVCFFromVCF(TestVCF): missing_quality = -1 def setUp(self): - - TestVCF.setUp(self) + super().setUp() self.vcf = pysam.VCF() self.compare = load_and_convert(self.filename, encode=False) def tearDown(self): - TestVCF.tearDown(self) + super().tearDown() self.vcf.close() def open_vcf(self, fn): diff --git a/tests/tabixproxies_test.py b/tests/tabixproxies_test.py index f393dce..2ade398 100644 --- a/tests/tabixproxies_test.py +++ b/tests/tabixproxies_test.py @@ -4,14 +4,13 @@ import os import re import copy import gzip -from TestUtils import load_and_convert, make_data_files, TABIX_DATADIR, IS_PYTHON3 +from TestUtils import load_and_convert, make_data_files, TABIX_DATADIR def setUpModule(): make_data_files(TABIX_DATADIR) -@unittest.skipUnless(IS_PYTHON3, "Requires Python 3 Extended Iterable Unpacking") class TestBED(unittest.TestCase): filename = os.path.join(TABIX_DATADIR, "fivecolumns.bed.gz") @@ -23,18 +22,14 @@ class TestBED(unittest.TestCase): self.tabix.close() def testAssignmentToTargetList(self): - # TODO When we drop Python 2, remove exec() & my and simplify these - my = {} for row in self.tabix.fetch(parser=pysam.asTuple()): - my['row'] = row - # Test that *others gets the right columns... - exec('contig, start, end, *others = row', globals(), my) - self.assertEqual(3 + len(my['others']), len(row)) + contig, start, end, *others = row + self.assertEqual(3 + len(others), len(row)) # ...and that a TupleProxy can be assigned from more than once - exec('contig, *others = row', globals(), my) - self.assertEqual(1 + len(my['others']), len(row)) + contig, *others = row + self.assertEqual(1 + len(others), len(row)) class TestParser(unittest.TestCase): @@ -68,12 +63,10 @@ class TestParser(unittest.TestCase): self.assertEqual("\t".join(map(str, c)), str(r)) - @unittest.skipUnless(IS_PYTHON3, "Requires Python 3 Extended Iterable Unpacking") def testAssignmentToTargetList(self): for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())): - my = { 'r': r } - exec('col1, col2, *others, colN = r', globals(), my) - self.assertEqual(2 + len(my['others']) + 1, len(r)) + col1, col2, *others, colN = r + self.assertEqual(2 + len(others) + 1, len(r)) def testWrite(self): -- 2.30.2