From 6528922f1d45d0364eefc4c80ad2fce35f885570 Mon Sep 17 00:00:00 2001 From: Afif Elghraoui Date: Sat, 23 Jul 2016 15:39:14 -0700 Subject: [PATCH] Imported Upstream version 0.9.1.4+ds --- INSTALL | 12 +-- MANIFEST.in | 4 + README.rst | 12 ++- cy_build.py | 7 +- pysam/calignmentfile.pyx | 13 +-- pysam/version.py | 2 +- run_tests_travis.sh | 46 +++++++++- setup.py | 169 +++++++++++++++++++++--------------- tests/AlignmentFile_test.py | 71 ++++++++++++++- 9 files changed, 241 insertions(+), 95 deletions(-) diff --git a/INSTALL b/INSTALL index 5ddff7f..a1edd45 100644 --- a/INSTALL +++ b/INSTALL @@ -18,6 +18,8 @@ curl Pysam requires Python (2.7 or greater) and Cython (0.22 or greater). It has not been tested on many other platforms. +Windows support does not work yet. + Compilation =========== @@ -53,13 +55,3 @@ or python3 setup.py build_sphinx The documentation will be put into build/sphinx. - -Architecture specific options -============================= - -Pysam has been compiled on various Linux systems and works -with python 2.6 and python 2.5. - -Python 2.7 and Python 3 have not been tested. - -Windows support does not work yet. diff --git a/MANIFEST.in b/MANIFEST.in index 7b1cbda..be43691 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -19,9 +19,13 @@ include samtools/configure include samtools/config.mk.in include samtools/config.h.in include samtools/*.h +exclude samtools/config.h include samtools/*/*.h include htslib/*.c include htslib/*.h +exclude htslib/config.h +include htslib/Makefile +include htslib/htslib_vars.mk include htslib/configure include htslib/config.mk.in include htslib/config.h.in diff --git a/README.rst b/README.rst index ab9e612..5e37be5 100644 --- a/README.rst +++ b/README.rst @@ -14,9 +14,19 @@ includes an interface for tabix_. The latest version is available through `pypi `_. To install, simply type:: - + pip install pysam +If you are using the conda packaging manager (e.g. miniconda or anaconda), +you can install pysam from the `bioconda channel `_: + + conda config --add channels r + + conda config --add channels bioconda + + conda install pysam + + Pysam documentation is available through https://readthedocs.org/ from `here `_ diff --git a/cy_build.py b/cy_build.py index e052ddb..880b5cc 100644 --- a/cy_build.py +++ b/cy_build.py @@ -2,7 +2,11 @@ import os import re import sys -from Cython.Distutils import build_ext +try: + from Cython.Distutils import build_ext +except ImportError: + from setuptools.command.build_ext import build_ext + from distutils.extension import Extension from distutils.sysconfig import get_config_vars, get_python_lib, get_python_version from pkg_resources import Distribution @@ -49,6 +53,7 @@ class cy_build_ext(build_ext): self.distribution.has_ext_modules() and self.plat_name).egg_name() def build_extension(self, ext): + if isinstance(ext, CyExtension) and ext._init_func: ext._init_func(ext) diff --git a/pysam/calignmentfile.pyx b/pysam/calignmentfile.pyx index 6473220..ed5e584 100644 --- a/pysam/calignmentfile.pyx +++ b/pysam/calignmentfile.pyx @@ -106,16 +106,17 @@ KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str}, "PG" : {"ID" : str, "PN" : str, "CL" : str, "PP" : str, "DS" : str, "VN" : str,},} -# output order of fields within records +# output order of fields within records. Ensure that CL is at +# the end as parsing a CL will ignore any subsequent records. VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"), "SQ" : ("SN", "LN", "AS", "M5", "UR", "SP"), - "RG" : ("ID", "SM", "LB", "DS", - "PU", "PI", "CN", "DT", + "RG" : ("ID", "CN", "SM", "LB", + "PU", "PI", "DT", "DS", "PL", "FO", "KS", "PG", "PM"), - "PG" : ("PN", "ID", "VN", "CL", - "PP"),} + "PG" : ("PN", "ID", "VN", "PP", + "DS", "CL"),} def build_header_line(fields, record): @@ -589,7 +590,7 @@ cdef class AlignmentFile: raise ValueError( ("file has no sequences defined (mode='%s') - " "is it SAM/BAM format? Consider opening with " - "check_seq=True") % mode) + "check_sq=False") % mode) if self.htsfile == NULL: raise IOError("could not open file `%s`" % filename ) diff --git a/pysam/version.py b/pysam/version.py index 15cefc4..0a985de 100644 --- a/pysam/version.py +++ b/pysam/version.py @@ -1,6 +1,6 @@ # pysam versioning information -__version__ = "0.9.1" +__version__ = "0.9.1.4" __samtools_version__ = "1.3.1" diff --git a/run_tests_travis.sh b/run_tests_travis.sh index f1fcdce..414043e 100755 --- a/run_tests_travis.sh +++ b/run_tests_travis.sh @@ -105,9 +105,47 @@ if [ $? != 0 ]; then exit 1 fi -# build source tar-ball and test installation from tar-ball +# build source tar-ball cd .. python setup.py sdist -tar -xvzf dist/pysam-*.tar.gz -cd pysam-* -python setup.py install + +if [ $? != 0 ]; then + exit 1 +fi + +# check for presence of config.h files +echo "checking for presence of config.h files in tar-ball" +tar -tvzf dist/pysam-*.tar.gz | grep "config.h$" + +if [ $? != 1 ]; then + exit 1 +fi + +# test pip installation from tar-ball with cython +echo "pip installing with cython" +pip install --verbose --no-deps --no-use-wheel dist/pysam-*.tar.gz + +if [ $? != 0 ]; then + exit 1 +fi + +# attempt pip installation without cython +echo "pip installing without cython" +~/miniconda3/bin/conda remove cython +~/miniconda3/bin/conda list +echo "pthyon is" `which python` +pip install --verbose --no-deps --no-use-wheel --force-reinstall --upgrade dist/pysam-*.tar.gz + +if [ $? != 0 ]; then + exit 1 +fi + +# attempt pip installation without cython and without +# command line options +echo "pip installing without cython and no configure options" +export HTSLIB_CONFIGURE_OPTIONS="" +pip install --verbose --no-deps --no-use-wheel --force-reinstall --upgrade dist/pysam-*.tar.gz + +if [ $? != 0 ]; then + exit 1 +fi diff --git a/setup.py b/setup.py index 080bc24..e301f11 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ import platform import re import subprocess import sys +import sysconfig from contextlib import contextmanager from setuptools import Extension, setup @@ -45,6 +46,29 @@ def changedir(path): os.chdir(save_dir) +def run_configure(option): + try: + retcode = subprocess.call( + " ".join(("./configure", option)), + shell=True) + if retcode != 0: + return False + else: + return True + except OSError as e: + return False + + +def run_make_print_config(): + stdout = subprocess.check_output(["make", "print-config"]) + if IS_PYTHON3: + stdout = stdout.decode("ascii") + + result = dict([[x.strip() for x in line.split("=")] + for line in stdout.splitlines()]) + return result + + def configure_library(library_dir, env_options=None, options=[]): configure_script = os.path.join(library_dir, "configure") @@ -53,18 +77,6 @@ def configure_library(library_dir, env_options=None, options=[]): raise ValueError( "configure script {} does not exist".format(configure_script)) - def run_configure(option): - try: - retcode = subprocess.call( - " ".join(("./configure", option)), - shell=True) - if retcode != 0: - return False - else: - return True - except OSError as e: - return False - with changedir(library_dir): if env_options is not None: if run_configure(env_options): @@ -73,9 +85,20 @@ def configure_library(library_dir, env_options=None, options=[]): for option in options: if run_configure(option): return option + return None +def distutils_dir_name(dname): + """Returns the name of a distutils build directory + see: http://stackoverflow.com/questions/14320220/ + testing-python-c-libraries-get-build-path + """ + f = "{dirname}.{platform}-{version[0]}.{version[1]}" + return f.format(dirname=dname, + platform=sysconfig.get_platform(), + version=sys.version_info) + # How to link against HTSLIB # separate: use included htslib and include in each extension # module. No dependencies between modules and works @@ -87,10 +110,25 @@ def configure_library(library_dir, env_options=None, options=[]): # pysam. # external: use shared libhts.so compiled outside of # pysam -HTSLIB_MODE = "shared" +HTSLIB_MODE = os.environ.get("HTSLIB_MODE", "shared") HTSLIB_LIBRARY_DIR = os.environ.get("HTSLIB_LIBRARY_DIR", None) HTSLIB_INCLUDE_DIR = os.environ.get("HTSLIB_INCLUDE_DIR", None) HTSLIB_CONFIGURE_OPTIONS = os.environ.get("HTSLIB_CONFIGURE_OPTIONS", None) +HTSLIB_SOURCE = None + +package_list = ['pysam', + 'pysam.include', + 'pysam.include.samtools', + 'pysam.include.bcftools', + 'pysam.include.samtools.win32'] +package_dirs = {'pysam': 'pysam', + 'pysam.include.samtools': 'samtools', + 'pysam.include.bcftools': 'bcftools'} +config_headers = ["samtools/config.h"] + +from cy_build import CyExtension as Extension, cy_build_ext as build_ext + +cmdclass = {'build_ext': build_ext} # Check if cython is available # @@ -98,16 +136,19 @@ HTSLIB_CONFIGURE_OPTIONS = os.environ.get("HTSLIB_CONFIGURE_OPTIONS", None) # the .pyx files. If no cython is available, the C-files included in the # distribution will be used. try: - from cy_build import CyExtension as Extension, cy_build_ext as build_ext + import cython + HAVE_CYTHON = True + print ("# pysam: cython is available - using cythonize if necessary") source_pattern = "pysam/c%s.pyx" - cmdclass = {'build_ext': build_ext} - HTSLIB_MODE = "shared" + if HTSLIB_MODE != "external": + HTSLIB_MODE = "shared" except ImportError: + HAVE_CYTHON = False + print ("# pysam: no cython available - using pre-compiled C") # no Cython available - use existing C code - cmdclass = {} source_pattern = "pysam/c%s.c" - # Set mode to separate, as "shared" not fully tested yet. - HTSLIB_MODE = "separate" + if HTSLIB_MODE != "external": + HTSLIB_MODE = "shared" # collect pysam version sys.path.insert(0, "pysam") @@ -136,19 +177,26 @@ EXCLUDE = { } print ("# pysam: htslib mode is {}".format(HTSLIB_MODE)) - +print ("# pysam: HTSLIB_CONFIGURE_OPTIONS={}".format( + HTSLIB_CONFIGURE_OPTIONS)) htslib_configure_options = None if HTSLIB_MODE in ['shared', 'separate']: + package_list += ['pysam.include.htslib', + 'pysam.include.htslib.htslib'] + package_dirs.update({'pysam.include.htslib':'htslib'}) + htslib_configure_options = configure_library( "htslib", HTSLIB_CONFIGURE_OPTIONS, - ["--enable-libcurl"]) + ["--enable-libcurl", + "--disable-libcurl"]) HTSLIB_SOURCE = "builtin" print ("# pysam: htslib configure options: {}".format( str(htslib_configure_options))) + config_headers += ["htslib/config.h"] if htslib_configure_options is None: # create empty config.h file with open("htslib/config.h", "w") as outf: @@ -157,6 +205,23 @@ if HTSLIB_MODE in ['shared', 'separate']: outf.write( "/* conservative compilation options */\n") + with changedir("htslib"): + htslib_make_options = run_make_print_config() + + for key, value in htslib_make_options.items(): + print ("# pysam: htslib_config {}={}".format(key, value)) + + external_htslib_libraries = ['z'] + if "LIBS" in htslib_make_options: + external_htslib_libraries.extend( + [re.sub("^-l", "", x) for x in htslib_make_options["LIBS"].split(" ") if x.strip()]) + + shared_htslib_sources = [re.sub("\.o", ".c", os.path.join("htslib", x)) + for x in + htslib_make_options["LIBHTS_OBJS"].split(" ")] + + htslib_sources = [] + if HTSLIB_LIBRARY_DIR: # linking against a shared, externally installed htslib version, no # sources required for htslib @@ -171,34 +236,26 @@ if HTSLIB_LIBRARY_DIR: elif HTSLIB_MODE == 'separate': # add to each pysam component a separately compiled # htslib - htslib_sources = [ - x for x in - glob.glob(os.path.join("htslib", "*.c")) + - glob.glob(os.path.join("htslib", "cram", "*.c")) - if x not in EXCLUDE["htslib"]] + htslib_sources = shared_htslib_sources shared_htslib_sources = htslib_sources htslib_library_dirs = [] htslib_include_dirs = ['htslib'] internal_htslib_libraries = [] - external_htslib_libraries = ['z'] elif HTSLIB_MODE == 'shared': - # link each pysam component against the same # htslib built from sources included in the pysam # package. - htslib_sources = [] - shared_htslib_sources = [ - x for x in - glob.glob(os.path.join("htslib", "*.c")) + - glob.glob(os.path.join("htslib", "cram", "*.c")) - if x not in EXCLUDE["htslib"]] - htslib_library_dirs = ['pysam', "."] + htslib_library_dirs = [ + 'pysam', + ".", + os.path.join("build", + distutils_dir_name("lib"), + "pysam")] + htslib_include_dirs = ['htslib'] - external_htslib_libraries = ['z'] if IS_PYTHON3: - import sysconfig if sys.version_info.minor >= 5: internal_htslib_libraries = ["chtslib.{}".format( sysconfig.get_config_var('SOABI'))] @@ -216,7 +273,6 @@ elif HTSLIB_MODE == 'shared': else: raise ValueError("unknown HTSLIB value '%s'" % HTSLIB_MODE) - # build config.py with open(os.path.join("pysam", "config.py"), "w") as outf: outf.write('HTSLIB = "{}"\n'.format(HTSLIB_SOURCE)) @@ -237,31 +293,11 @@ with open(os.path.join("pysam", "config.py"), "w") as outf: "HAVE_LIBCURL", "HAVE_MMAP"]: outf.write("{} = {}\n".format(key, config_values[key])) - - -if HTSLIB_SOURCE == "builtin": - EXCLUDE_HTSLIB = ["htslib/hfile_libcurl.c"] - if htslib_configure_options is None: - print ("# pysam: could not configure htslib, choosing " - "conservative defaults") - htslib_sources = [x for x in htslib_sources - if x not in EXCLUDE_HTSLIB] - shared_htslib_sources = [x for x in shared_htslib_sources - if x not in EXCLUDE_HTSLIB] - elif "--disable-libcurl" in htslib_configure_options: - print ("# pysam: libcurl has been disabled") - htslib_sources = [x for x in htslib_sources - if x not in EXCLUDE_HTSLIB] - shared_htslib_sources = [x for x in shared_htslib_sources - if x not in EXCLUDE_HTSLIB] - elif "--enable-libcurl" in htslib_configure_options: - print ("# pysam: libcurl of builtin htslib has been enabled, " - "adding shared libcurl and libcrypto") - external_htslib_libraries.extend(["curl", "crypto"]) + print ("# pysam: config_option: {}={}".format(key, config_values[key])) # create empty config.h files if they have not been created automatically # or created by the user: -for fn in "samtools/config.h", "htslib/config.h": +for fn in config_headers: if not os.path.exists(fn): with open(fn, "w") as outf: outf.write( @@ -493,13 +529,7 @@ metadata = { 'license': "MIT", 'platforms': "ALL", 'url': "https://github.com/pysam-developers/pysam", - 'packages': ['pysam', - 'pysam.include', - 'pysam.include.htslib', - 'pysam.include.htslib.htslib', - 'pysam.include.samtools', - 'pysam.include.bcftools', - 'pysam.include.samtools.win32'], + 'packages': package_list, 'requires': ['cython (>=0.21)'], 'ext_modules': [chtslib, csamfile, @@ -512,10 +542,7 @@ metadata = { cfaidx, cutils], 'cmdclass': cmdclass, - 'package_dir': {'pysam': 'pysam', - 'pysam.include.htslib': 'htslib', - 'pysam.include.samtools': 'samtools', - 'pysam.include.bcftools': 'bcftools'}, + 'package_dir': package_dirs, 'package_data': {'': ['*.pxd', '*.h'], }, # do not pack in order to permit linking to csamtools.so 'zip_safe': False, diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py index 9a33722..c042f4f 100644 --- a/tests/AlignmentFile_test.py +++ b/tests/AlignmentFile_test.py @@ -9,6 +9,8 @@ import unittest import os import shutil import sys +import re +import copy import collections import subprocess import logging @@ -23,7 +25,8 @@ from functools import partial import pysam import pysam.samtools from TestUtils import checkBinaryEqual, checkURL, \ - check_samtools_view_equal, checkFieldEqual, force_str + check_samtools_view_equal, checkFieldEqual, force_str, \ + get_temp_filename DATADIR = "pysam_data" @@ -1395,6 +1398,72 @@ class TestHeader1000Genomes(unittest.TestCase): self.assertTrue(data) +class TestHeaderWriteRead(unittest.TestCase): + header = {'SQ': [{'LN': 1575, 'SN': 'chr1'}, + {'LN': 1584, 'SN': 'chr2'}], + 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', + 'PU': 'SC_1_10', "CN": "name:with:colon"}, + {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', + 'PU': 'SC_2_12', "CN": "name:with:colon"}], + 'PG': [{'ID': 'P1', 'VN': '1.0', 'CL': 'tool'}, + {'ID': 'P2', 'VN': '1.1', 'CL': 'tool with in option -R a\tb', + 'PP': 'P1'}], + 'HD': {'VN': '1.0'}, + 'CO': ['this is a comment', 'this is another comment'], + } + + def compare_headers(self, a, b): + '''compare two headers a and b. + + Ignore M5 and UR field as they are set application specific. + ''' + for ak, av in a.items(): + self.assertTrue(ak in b, "key '%s' not in '%s' " % (ak, b)) + self.assertEqual( + len(av), len(b[ak]), + "unequal number of entries for key {}: {} vs {}" + .format(ak, av, b[ak])) + + for row_a, row_b in zip(av, b[ak]): + if isinstance(row_b, dict): + for x in ["M5", "UR"]: + try: + del row_b[x] + except KeyError: + pass + self.assertEqual(row_a, row_b) + + def check_read_write(self, flag_write, header): + + fn = get_temp_filename() + with pysam.AlignmentFile( + fn, + flag_write, + header=header, + reference_filename="pysam_data/ex1.fa") as outf: + a = pysam.AlignedSegment() + a.query_name = "abc" + outf.write(a) + + with pysam.AlignmentFile(fn) as inf: + read_header = inf.header + + os.unlink(fn) + self.compare_headers(header, read_header) + + def test_SAM(self): + self.check_read_write("wh", self.header) + + def test_BAM(self): + self.check_read_write("wb", self.header) + + def test_CRAM(self): + header = copy.copy(self.header) + # for CRAM, \t needs to be quoted: + header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL']) + self.check_read_write("wc", header) + + class TestUnmappedReads(unittest.TestCase): # TODO -- 2.30.2