import re
import subprocess
import sys
+import sysconfig
from contextlib import contextmanager
from setuptools import Extension, setup
os.chdir(save_dir)
+def run_configure(option):
+ try:
+ retcode = subprocess.call(
+ " ".join(("./configure", option)),
+ shell=True)
+ if retcode != 0:
+ return False
+ else:
+ return True
+ except OSError as e:
+ return False
+
+
+def run_make_print_config():
+ stdout = subprocess.check_output(["make", "print-config"])
+ if IS_PYTHON3:
+ stdout = stdout.decode("ascii")
+
+ result = dict([[x.strip() for x in line.split("=")]
+ for line in stdout.splitlines()])
+ return result
+
+
def configure_library(library_dir, env_options=None, options=[]):
configure_script = os.path.join(library_dir, "configure")
raise ValueError(
"configure script {} does not exist".format(configure_script))
- def run_configure(option):
- try:
- retcode = subprocess.call(
- " ".join(("./configure", option)),
- shell=True)
- if retcode != 0:
- return False
- else:
- return True
- except OSError as e:
- return False
-
with changedir(library_dir):
if env_options is not None:
if run_configure(env_options):
for option in options:
if run_configure(option):
return option
+
return None
+def distutils_dir_name(dname):
+ """Returns the name of a distutils build directory
+ see: http://stackoverflow.com/questions/14320220/
+ testing-python-c-libraries-get-build-path
+ """
+ f = "{dirname}.{platform}-{version[0]}.{version[1]}"
+ return f.format(dirname=dname,
+ platform=sysconfig.get_platform(),
+ version=sys.version_info)
+
# How to link against HTSLIB
# separate: use included htslib and include in each extension
# module. No dependencies between modules and works
# pysam.
# external: use shared libhts.so compiled outside of
# pysam
-HTSLIB_MODE = "shared"
+HTSLIB_MODE = os.environ.get("HTSLIB_MODE", "shared")
HTSLIB_LIBRARY_DIR = os.environ.get("HTSLIB_LIBRARY_DIR", None)
HTSLIB_INCLUDE_DIR = os.environ.get("HTSLIB_INCLUDE_DIR", None)
HTSLIB_CONFIGURE_OPTIONS = os.environ.get("HTSLIB_CONFIGURE_OPTIONS", None)
+HTSLIB_SOURCE = None
+
+package_list = ['pysam',
+ 'pysam.include',
+ 'pysam.include.samtools',
+ 'pysam.include.bcftools',
+ 'pysam.include.samtools.win32']
+package_dirs = {'pysam': 'pysam',
+ 'pysam.include.samtools': 'samtools',
+ 'pysam.include.bcftools': 'bcftools'}
+config_headers = ["samtools/config.h"]
+
+from cy_build import CyExtension as Extension, cy_build_ext as build_ext
+
+cmdclass = {'build_ext': build_ext}
# Check if cython is available
#
# the .pyx files. If no cython is available, the C-files included in the
# distribution will be used.
try:
- from cy_build import CyExtension as Extension, cy_build_ext as build_ext
+ import cython
+ HAVE_CYTHON = True
+ print ("# pysam: cython is available - using cythonize if necessary")
source_pattern = "pysam/c%s.pyx"
- cmdclass = {'build_ext': build_ext}
- HTSLIB_MODE = "shared"
+ if HTSLIB_MODE != "external":
+ HTSLIB_MODE = "shared"
except ImportError:
+ HAVE_CYTHON = False
+ print ("# pysam: no cython available - using pre-compiled C")
# no Cython available - use existing C code
- cmdclass = {}
source_pattern = "pysam/c%s.c"
- # Set mode to separate, as "shared" not fully tested yet.
- HTSLIB_MODE = "separate"
+ if HTSLIB_MODE != "external":
+ HTSLIB_MODE = "shared"
# collect pysam version
sys.path.insert(0, "pysam")
}
print ("# pysam: htslib mode is {}".format(HTSLIB_MODE))
-
+print ("# pysam: HTSLIB_CONFIGURE_OPTIONS={}".format(
+ HTSLIB_CONFIGURE_OPTIONS))
htslib_configure_options = None
if HTSLIB_MODE in ['shared', 'separate']:
+ package_list += ['pysam.include.htslib',
+ 'pysam.include.htslib.htslib']
+ package_dirs.update({'pysam.include.htslib':'htslib'})
+
htslib_configure_options = configure_library(
"htslib",
HTSLIB_CONFIGURE_OPTIONS,
- ["--enable-libcurl"])
+ ["--enable-libcurl",
+ "--disable-libcurl"])
HTSLIB_SOURCE = "builtin"
print ("# pysam: htslib configure options: {}".format(
str(htslib_configure_options)))
+ config_headers += ["htslib/config.h"]
if htslib_configure_options is None:
# create empty config.h file
with open("htslib/config.h", "w") as outf:
outf.write(
"/* conservative compilation options */\n")
+ with changedir("htslib"):
+ htslib_make_options = run_make_print_config()
+
+ for key, value in htslib_make_options.items():
+ print ("# pysam: htslib_config {}={}".format(key, value))
+
+ external_htslib_libraries = ['z']
+ if "LIBS" in htslib_make_options:
+ external_htslib_libraries.extend(
+ [re.sub("^-l", "", x) for x in htslib_make_options["LIBS"].split(" ") if x.strip()])
+
+ shared_htslib_sources = [re.sub("\.o", ".c", os.path.join("htslib", x))
+ for x in
+ htslib_make_options["LIBHTS_OBJS"].split(" ")]
+
+ htslib_sources = []
+
if HTSLIB_LIBRARY_DIR:
# linking against a shared, externally installed htslib version, no
# sources required for htslib
elif HTSLIB_MODE == 'separate':
# add to each pysam component a separately compiled
# htslib
- htslib_sources = [
- x for x in
- glob.glob(os.path.join("htslib", "*.c")) +
- glob.glob(os.path.join("htslib", "cram", "*.c"))
- if x not in EXCLUDE["htslib"]]
+ htslib_sources = shared_htslib_sources
shared_htslib_sources = htslib_sources
htslib_library_dirs = []
htslib_include_dirs = ['htslib']
internal_htslib_libraries = []
- external_htslib_libraries = ['z']
elif HTSLIB_MODE == 'shared':
-
# link each pysam component against the same
# htslib built from sources included in the pysam
# package.
- htslib_sources = []
- shared_htslib_sources = [
- x for x in
- glob.glob(os.path.join("htslib", "*.c")) +
- glob.glob(os.path.join("htslib", "cram", "*.c"))
- if x not in EXCLUDE["htslib"]]
- htslib_library_dirs = ['pysam', "."]
+ htslib_library_dirs = [
+ 'pysam',
+ ".",
+ os.path.join("build",
+ distutils_dir_name("lib"),
+ "pysam")]
+
htslib_include_dirs = ['htslib']
- external_htslib_libraries = ['z']
if IS_PYTHON3:
- import sysconfig
if sys.version_info.minor >= 5:
internal_htslib_libraries = ["chtslib.{}".format(
sysconfig.get_config_var('SOABI'))]
else:
raise ValueError("unknown HTSLIB value '%s'" % HTSLIB_MODE)
-
# build config.py
with open(os.path.join("pysam", "config.py"), "w") as outf:
outf.write('HTSLIB = "{}"\n'.format(HTSLIB_SOURCE))
"HAVE_LIBCURL",
"HAVE_MMAP"]:
outf.write("{} = {}\n".format(key, config_values[key]))
-
-
-if HTSLIB_SOURCE == "builtin":
- EXCLUDE_HTSLIB = ["htslib/hfile_libcurl.c"]
- if htslib_configure_options is None:
- print ("# pysam: could not configure htslib, choosing "
- "conservative defaults")
- htslib_sources = [x for x in htslib_sources
- if x not in EXCLUDE_HTSLIB]
- shared_htslib_sources = [x for x in shared_htslib_sources
- if x not in EXCLUDE_HTSLIB]
- elif "--disable-libcurl" in htslib_configure_options:
- print ("# pysam: libcurl has been disabled")
- htslib_sources = [x for x in htslib_sources
- if x not in EXCLUDE_HTSLIB]
- shared_htslib_sources = [x for x in shared_htslib_sources
- if x not in EXCLUDE_HTSLIB]
- elif "--enable-libcurl" in htslib_configure_options:
- print ("# pysam: libcurl of builtin htslib has been enabled, "
- "adding shared libcurl and libcrypto")
- external_htslib_libraries.extend(["curl", "crypto"])
+ print ("# pysam: config_option: {}={}".format(key, config_values[key]))
# create empty config.h files if they have not been created automatically
# or created by the user:
-for fn in "samtools/config.h", "htslib/config.h":
+for fn in config_headers:
if not os.path.exists(fn):
with open(fn, "w") as outf:
outf.write(
'license': "MIT",
'platforms': "ALL",
'url': "https://github.com/pysam-developers/pysam",
- 'packages': ['pysam',
- 'pysam.include',
- 'pysam.include.htslib',
- 'pysam.include.htslib.htslib',
- 'pysam.include.samtools',
- 'pysam.include.bcftools',
- 'pysam.include.samtools.win32'],
+ 'packages': package_list,
'requires': ['cython (>=0.21)'],
'ext_modules': [chtslib,
csamfile,
cfaidx,
cutils],
'cmdclass': cmdclass,
- 'package_dir': {'pysam': 'pysam',
- 'pysam.include.htslib': 'htslib',
- 'pysam.include.samtools': 'samtools',
- 'pysam.include.bcftools': 'bcftools'},
+ 'package_dir': package_dirs,
'package_data': {'': ['*.pxd', '*.h'], },
# do not pack in order to permit linking to csamtools.so
'zip_safe': False,
import os
import shutil
import sys
+import re
+import copy
import collections
import subprocess
import logging
import pysam
import pysam.samtools
from TestUtils import checkBinaryEqual, checkURL, \
- check_samtools_view_equal, checkFieldEqual, force_str
+ check_samtools_view_equal, checkFieldEqual, force_str, \
+ get_temp_filename
DATADIR = "pysam_data"
self.assertTrue(data)
+class TestHeaderWriteRead(unittest.TestCase):
+ header = {'SQ': [{'LN': 1575, 'SN': 'chr1'},
+ {'LN': 1584, 'SN': 'chr2'}],
+ 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891',
+ 'PU': 'SC_1_10', "CN": "name:with:colon"},
+ {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891',
+ 'PU': 'SC_2_12', "CN": "name:with:colon"}],
+ 'PG': [{'ID': 'P1', 'VN': '1.0', 'CL': 'tool'},
+ {'ID': 'P2', 'VN': '1.1', 'CL': 'tool with in option -R a\tb',
+ 'PP': 'P1'}],
+ 'HD': {'VN': '1.0'},
+ 'CO': ['this is a comment', 'this is another comment'],
+ }
+
+ def compare_headers(self, a, b):
+ '''compare two headers a and b.
+
+ Ignore M5 and UR field as they are set application specific.
+ '''
+ for ak, av in a.items():
+ self.assertTrue(ak in b, "key '%s' not in '%s' " % (ak, b))
+ self.assertEqual(
+ len(av), len(b[ak]),
+ "unequal number of entries for key {}: {} vs {}"
+ .format(ak, av, b[ak]))
+
+ for row_a, row_b in zip(av, b[ak]):
+ if isinstance(row_b, dict):
+ for x in ["M5", "UR"]:
+ try:
+ del row_b[x]
+ except KeyError:
+ pass
+ self.assertEqual(row_a, row_b)
+
+ def check_read_write(self, flag_write, header):
+
+ fn = get_temp_filename()
+ with pysam.AlignmentFile(
+ fn,
+ flag_write,
+ header=header,
+ reference_filename="pysam_data/ex1.fa") as outf:
+ a = pysam.AlignedSegment()
+ a.query_name = "abc"
+ outf.write(a)
+
+ with pysam.AlignmentFile(fn) as inf:
+ read_header = inf.header
+
+ os.unlink(fn)
+ self.compare_headers(header, read_header)
+
+ def test_SAM(self):
+ self.check_read_write("wh", self.header)
+
+ def test_BAM(self):
+ self.check_read_write("wb", self.header)
+
+ def test_CRAM(self):
+ header = copy.copy(self.header)
+ # for CRAM, \t needs to be quoted:
+ header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
+ self.check_read_write("wc", header)
+
+
class TestUnmappedReads(unittest.TestCase):
# TODO