language: c
+stages:
+ - test
+ - name: deploy
+ if: tag IS present
+
env:
matrix:
- CONDA_PY=2.7
global:
- PYSAM_LINKING_TEST=1
- TWINE_USERNAME=grepall
- - secure: 'OcwwP8/o21+SGW0UVAnnCQwllhGSCq2HJzpI9EhX3kh6J9RTkyx/+drkg45bx1Z5u8zymuAFappEYzlpzqZE886XezkjOYGVa/u+Coqr1oT/BEJHFCkCA4o26yESp7Zy8aNj/juhB7Rfa77pIDXBayqTzbALz/AURMtZapasB18='
+ - secure: bTbky3Un19NAl62lix8bMLmBv9IGNhFkRXlZH+B253nYub7jwQwPQKum3ct9ea+XHJT5//uM0B8WAF6eyugpNkPQ7+S7SEH5BJuCt30nv6qvGhSO2AffZKeHEDnfW2kqGrivn87TqeomlSBlO742CD/V0wOIUwkTT9tutd+E7FU=
-_deploy_common: &deploy_common
- if: tag IS present
+_cibw_common: &cibw_common
+ addons: {}
install:
- - python3 -m pip install cibuildwheel twine
+ - python3 -m pip install cibuildwheel>=1.1.0 twine
+ script:
+ - set -e
+ - cibuildwheel --output-dir dist
+ - twine check dist/*
+ - twine upload --skip-existing dist/*
+
+_cibw_linux: &cibw_linux
+ stage: deploy
+ os: linux
+ language: python
+ python: '3.5'
+ services:
+ - docker
+ <<: *cibw_common
matrix:
include:
os: linux
language: python
python: '3.5'
- services:
- - docker
- env:
- - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && pip install -r requirements.txt"
- - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
addons:
apt:
packages:
- g++
- libcurl4-openssl-dev # for libcurl support in sdist
- libssl-dev # for s3 support in sdist
- <<: *deploy_common
+ install:
+ - python3 -m pip install Cython twine
script:
- set -e
- - cibuildwheel --output-dir dist
- - python3 -m pip install Cython
- python3 setup.py build_ext --inplace
- python3 setup.py sdist
- twine check dist/*
- twine upload --skip-existing dist/*
+ - <<: *cibw_linux
+ env:
+ - CIBW_BUILD="*_x86_64"
+ - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+ - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+ - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
+ - <<: *cibw_linux
+ env:
+ - CIBW_BUILD="*_i686"
+ - CIBW_BEFORE_BUILD="yum install -y zlib-devel bzip2-devel xz-devel && python -m pip install -r requirements.txt"
+ - CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
+ - CIBW_REPAIR_WHEEL_COMMAND_LINUX='auditwheel repair -L . -w {dest_dir} {wheel}'
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
- stage: deploy
os: osx
language: generic
env:
- - CIBW_BEFORE_BUILD="pip install -r requirements.txt"
+ - CIBW_BEFORE_BUILD="python -m pip install -r requirements.txt"
- CIBW_ENVIRONMENT='HTSLIB_CONFIGURE_OPTIONS="--disable-libcurl"'
- addons: {}
- <<: *deploy_common
- script:
- - set -e
- - cibuildwheel --output-dir dist
- - twine check dist/*
- - twine upload --skip-existing dist/*
+ - CIBW_TEST_COMMAND='python -c "import pysam"'
+ <<: *cibw_common
addons:
apt:
Release notes
=============
+Release 0.15.4
+==============
+
+Bugfix release. Principal reason for release is to update cython
+version in order to fix pip install pysam with python 3.8.
+
+* [#879] Fix add_meta function in libcbcf.pyx, so meta-information
+ lines in header added with this function have double-quoting rules
+ in accordance to rules specified in VCF4.2 and VCF4.3 specifications
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#875] Bump minimum Cython version
+* [#868] Prevent segfault on Python 2.7 AlignedSegment.compare(other=None)
+* [#867] Fix wheel building on TravisCI
+* [#863] Force arg to bytes to support non-ASCII encoding
+* [#799] disambiguate interpretation of bcf_read return code
+* [#841] Fix silent truncation of FASTQ with bad q strings
+* [#846] Prevent segmentation fault on ID, when handling malformed records
+* [#829] Run configure with the correct CC/CFLAGS/LDFLAGS env vars
+
+
Release 0.15.3
==============
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
BCFTOOLS_DISPATCH = [
"index",
else:
return toupper(ch)
-
+
cdef inline bint pileup_base_qual_skip(bam_pileup1_t * p, uint32_t threshold):
cdef uint32_t c
if p.qpos < p.b.core.l_qseq:
if c < threshold:
return True
return False
-
+
cdef inline char map_typecode_htslib_to_python(uint8_t s):
"""map an htslib typecode to the corresponding python typecode
"""
# 0 is unknown typecode
cdef char typecode = 0
-
+
if value_type is None:
if isinstance(value, int):
if value < 0:
"""
fmts, args = ["<"], []
- # htslib typecode
+ # htslib typecode
cdef uint8_t typecode
for tag in tags:
if typecode not in DATATYPE2FORMAT:
raise ValueError("invalid value type '{}' ({})".format(chr(typecode), array.typecode))
-
+
# use array.tostring() to retrieve byte representation and
# save as bytes
datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1])
typecode = get_tag_typecode(value)
if typecode == 0:
raise ValueError("could not deduce typecode for value {}".format(value))
-
+
if typecode == 'a' or typecode == 'A' or typecode == 'Z' or typecode == 'H':
value = force_bytes(value)
datafmt = "2sB%is" % (len(value)+1)
else:
datafmt = "2sB%s" % DATATYPE2FORMAT[typecode][0]
-
+
args.extend([pytag[:2],
typecode,
value])
Parameters
----------
- header -- :class:`~pysam.AlignmentHeader` object to map numerical
- identifiers to chromosome names. If not given, an empty
- header is created.
+ header:
+ :class:`~pysam.AlignmentHeader` object to map numerical
+ identifiers to chromosome names. If not given, an empty
+ header is created.
'''
# Now only called when instances are created from Python
self.cache_query_alignment_qualities = None
self.cache_query_sequence = None
self.cache_query_alignment_sequence = None
-
+
self.header = header
-
+
def __dealloc__(self):
bam_destroy1(self._delegate)
<,=,> to *other*
'''
+ # avoid segfault when other equals None
+ if other is None:
+ return -1
+
cdef int retval, x
cdef bam1_t *t
cdef bam1_t *o
cdef uint8_t *a = <uint8_t*>&t.core
cdef uint8_t *b = <uint8_t*>&o.core
-
+
retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
if retval:
return retval
raise ValueError('sam_format failed')
else:
raise NotImplementedError("todo")
-
+
ret = force_str(line.s[:line.l])
if line.m:
Parameters
----------
- sam -- :term:`SAM` formatted string
+ sam:
+ :term:`SAM` formatted string
"""
cdef AlignedSegment dest = cls.__new__(cls)
line.s = _sam
sam_parse1(&line, dest.header.ptr, dest._delegate)
-
+
return dest
cpdef tostring(self, htsfile=None):
Parameters
----------
- htsfile -- (deprecated) AlignmentFile object to map numerical
- identifiers to chromosome names. This parameter is present
- for backwards compatibility and ignored.
+ htsfile:
+ (deprecated) AlignmentFile object to map numerical
+ identifiers to chromosome names. This parameter is present
+ for backwards compatibility and ignored.
"""
return self.to_string()
-
+
def to_dict(self):
"""returns a json representation of the aligned segment.
Parameters
----------
- sam_dict -- dictionary of alignment values, keys corresponding to output from
- :meth:`todict()`.
+ sam_dict:
+ dictionary of alignment values, keys corresponding to output from
+ :meth:`todict()`.
"""
# let htslib do the parsing
"\t".join((sam_dict[x] for x in KEY_NAMES[:-1])) +
"\t" +
"\t".join(sam_dict.get(KEY_NAMES[-1], [])), header)
-
+
########################################################
## Basic attributes in order of appearance in SAM format
property query_name:
src.core.l_extranul = l_extranul
src.core.l_qname = l + l_extranul
-
+
# re-acquire pointer to location in memory
# as it might have moved
p = pysam_bam_get_qname(src)
return self.header.get_reference_name(self._delegate.core.mtid)
else:
raise ValueError("next_reference_name unknown if no header associated with record")
-
+
def __set__(self, reference):
cdef int mtid
if reference is None or reference == "*":
nbytes_old,
nbytes_new,
p)
-
+
if retval == NULL:
raise MemoryError("could not allocate memory")
# setting the unmapped flag requires recalculation of
# bin as alignment length is now implicitely 1
update_bin(self._delegate)
-
+
property mate_is_unmapped:
"""true if the mate is unmapped"""
def __get__(self):
def get_forward_sequence(self):
"""return the original read sequence.
-
+
Reads mapping to the reverse strand will be reverse
complemented.
def get_forward_qualities(self):
"""return base qualities of the read sequence.
-
+
Reads mapping to the reverse strand will be reversed.
"""
if self.is_reverse:
else:
return self.query_qualities
-
+
def get_aligned_pairs(self, matches_only=False, with_seq=False):
"""a list of aligned read (query) and reference positions.
If no cigar string is present, empty arrays will be returned.
- Parameters
- ----------
-
- Returns
- -------
-
- arrays : two arrays. The first contains the nucleotide counts within
- each cigar operation, the second contains the number of blocks for
- each cigar operation.
+ Returns:
+ arrays :
+ two arrays. The first contains the nucleotide counts within
+ each cigar operation, the second contains the number of blocks
+ for each cigar operation.
"""
values = []
cdef uint32_t ncigar = len(values)
-
+
cdef bam1_t * retval = pysam_bam_update(src,
pysam_get_n_cigar(src) * 4,
ncigar * 4,
This method accepts valid SAM specification value types, which
are::
-
+
A: printable char
i: signed int
f: float
When deducing the type code by the python type of *value*, the
following mapping is applied::
-
+
i: python int
f: python float
Z: python str or bytes
B: python array.array, list or tuple
-
+
Note that a single character string will be output as 'Z' and
not 'A' as the former is the more general type.
"""
specification) as well as additional value type 'd' as
implemented in htslib.
- Parameters
- ----------
+ Parameters:
- tag :
- data tag.
+ tag :
+ data tag.
- with_value_type : Optional[bool]
- if set to True, the return value is a tuple of (tag value, type code).
- (default False)
+ with_value_type : Optional[bool]
+ if set to True, the return value is a tuple of (tag value, type
+ code). (default False)
- Returns
- -------
+ Returns:
- A python object with the value of the `tag`. The type of the
- object depends on the data type in the data record.
+ A python object with the value of the `tag`. The type of the
+ object depends on the data type in the data record.
- Raises
- ------
+ Raises:
- KeyError
- If `tag` is not present, a KeyError is raised.
+ KeyError
+ If `tag` is not present, a KeyError is raised.
"""
cdef uint8_t * v
"""set the minimum base quality for this pileup column.
"""
self.min_base_quality = min_base_quality
-
+
def __len__(self):
"""return number of reads aligned to this column.
see :meth:`get_num_aligned`
"""
return self.get_num_aligned()
-
+
property reference_id:
'''the reference sequence number as defined in the header'''
def __get__(self):
def get_num_aligned(self):
"""return number of aligned bases at pileup column position.
-
+
This method applies a base quality filter and the number is
equal to the size of :meth:`get_query_sequences`,
:meth:`get_mapping_qualities`, etc.
cdef bam_pileup1_t * p = NULL
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
Optionally, the bases/sequences can be annotated according to the samtools
mpileup format. This is the format description from the samtools mpileup tool::
-
+
Information on match, mismatch, indel, strand, mapping
quality and start and end of a read are all encoded at the
read base column. At this column, a dot stands for a match
To reproduce samtools mpileup format, set all of mark_matches,
mark_ends and add_indels to True.
-
+
Parameters
----------
If True, add bases for bases inserted into the reference and
'N's for base skipped from the reference. If a reference sequence
is given, add the actual bases.
-
+
Returns
-------
cdef uint8_t rb = 0
cdef kstring_t * buf = &self.buf
cdef bam_pileup1_t * p = NULL
-
+
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
# see samtools pileup_seq
if mark_ends and p.is_head:
kputc('^', buf)
-
+
if p.b.core.qual > 93:
kputc(126, buf)
else:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if p.qpos < p.b.core.l_qseq:
c = bam_get_qual(p.b)[p.qpos]
else:
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.b.core.qual)
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.qpos)
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
-
+
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
-
+
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(charptr_to_str(pysam_bam_get_qname(p.b)))
return result
-
+
cdef class PileupRead:
'''Representation of a read aligned to a particular position in the
def __get__(self):
return self._is_refskip
-
+
cpdef enum CIGAR_OPS:
CMATCH = 0
cpdef enum SAM_FLAGS:
- # the read is paired in sequencing, no matter whether it is mapped in a pair
+ # the read is paired in sequencing, no matter whether it is mapped in a pair
FPAIRED = 1
- # the read is mapped in a proper pair
+ # the read is mapped in a proper pair
FPROPER_PAIR = 2
- # the read itself is unmapped; conflictive with FPROPER_PAIR
+ # the read itself is unmapped; conflictive with FPROPER_PAIR
FUNMAP = 4
- # the mate is unmapped
+ # the mate is unmapped
FMUNMAP = 8
- # the read is mapped to the reverse strand
+ # the read is mapped to the reverse strand
FREVERSE = 16
- # the mate is mapped to the reverse strand
+ # the mate is mapped to the reverse strand
FMREVERSE = 32
- # this is read1
+ # this is read1
FREAD1 = 64
- # this is read2
+ # this is read2
FREAD2 = 128
- # not primary alignment
+ # not primary alignment
FSECONDARY = 256
- # QC failure
+ # QC failure
FQCFAIL = 512
- # optical or PCR duplicate
+ # optical or PCR duplicate
FDUP = 1024
- # supplementary alignment
- FSUPPLEMENTARY = 2048
+ # supplementary alignment
+ FSUPPLEMENTARY = 2048
__all__ = [
if self.ptr.target_name[x] == NULL:
raise MemoryError("could not allocate {} bytes".format(len(name) + 1, sizeof(char)))
strncpy(self.ptr.target_name[x], name, len(name))
-
+
return self
@classmethod
raise KeyError("incomplete sequence information in '%s'" % str(fields))
except ValueError:
raise ValueError("wrong sequence information in '%s'" % str(fields))
-
+
return cls._from_text_and_lengths(text, reference_names, reference_lengths)
-
+
@classmethod
def from_dict(cls, header_dict):
def _build_sequence_section(self):
"""return sequence section of header.
-
+
The sequence section is built from the list of reference names and
lengths stored in the BAM-file and not from any @SQ entries that
are part of the header's text section.
"""
-
+
cdef int x
text = []
for x in range(self.ptr.n_targets):
force_str(self.ptr.target_name[x]),
self.ptr.target_len[x]))
return "".join(text)
-
+
def to_dict(self):
"""return two-level dictionary with header information from the file.
raise KeyError("unknown reference {}".format(reference))
else:
return self.ptr.target_len[tid]
-
+
def is_valid_tid(self, int tid):
"""
return True if the numerical :term:`tid` is valid; False otherwise.
"""
reference = force_bytes(reference)
return bam_name2id(self.ptr, reference)
-
+
def __str__(self):
'''string with the full contents of the :term:`sam file` header as a
string.
def get(self, *args):
return self.to_dict().get(*args)
-
+
def __len__(self):
return self.to_dict().__len__()
raise ValueError(
"either supply options `template`, `header`, `text` or both `reference_names` "
"and `reference_lengths` for writing")
-
+
if template:
# header is copied, though at the moment not strictly
# necessary as AlignmentHeader is immutable.
"SAM? file does not have a valid header (mode='%s'), "
"please provide reference_names and reference_lengths")
self.header = makeAlignmentHeader(hdr)
-
+
# set filename with reference sequences
if self.is_cram and reference_filename:
creference_filename = self.reference_filename
ignore orphans (paired reads that are not in a proper pair).
The default is to ignore orphans.
-
+
min_base_quality: int
Minimum base quality. Bases below the minimum quality will
"""
cdef int rtid, has_coord
cdef int32_t rstart, rstop
-
+
if not self.is_open:
raise ValueError("I/O operation on closed file")
raise ValueError("interval of size 0")
if _stop < _start:
raise ValueError("interval of size less than 0")
-
+
cdef int length = _stop - _start
cdef c_array.array int_array_template = array.array('L', [])
cdef c_array.array count_a
# count
seq = read.seq
quality = read.query_qualities
-
+
for qpos, refpos in read.get_aligned_pairs(True):
if qpos is not None and refpos is not None and \
_start <= refpos < _stop:
base_position = r.pos
for op, nt in r.cigartuples:
- if op in match_or_deletion:
+ if op in match_or_deletion:
base_position += nt
- elif op == BAM_CREF_SKIP:
+ elif op == BAM_CREF_SKIP:
junc_start = base_position
base_position += nt
res[(junc_start, base_position)] += 1
return res
-
+
def close(self):
'''closes the :class:`pysam.AlignmentFile`.'''
'''
write a single :class:`pysam.AlignedSegment` to disk.
- Raises
- ------
- ValueError
- if the writing failed
-
- Returns
- -------
+ Raises:
+ ValueError
+ if the writing failed
- int : the number of bytes written. If the file is closed,
- this will be 0.
+ Returns:
+ int :
+ the number of bytes written. If the file is closed,
+ this will be 0.
'''
if not self.is_open:
return 0
"AlignedSegment refers to reference number {} that "
"is larger than the number of references ({}) in the header".format(
read._delegate.core.tid, self.header.ptr.n_targets))
-
+
cdef int ret
with nogil:
ret = sam_write1(self.htsfile,
"""return statistics about mapped/unmapped reads per chromosome as
they are stored in the index.
- Returns
- -------
- list : a list of records for each chromosome. Each record has the attributes 'contig',
- 'mapped', 'unmapped' and 'total'.
+ Returns:
+ list :
+ a list of records for each chromosome. Each record has the
+ attributes 'contig', 'mapped', 'unmapped' and 'total'.
"""
-
+
self.check_index()
cdef int tid
cdef uint64_t mapped, unmapped
mapped,
unmapped,
mapped + unmapped)))
-
+
return results
###############################################################
if self.header is None:
raise ValueError("header not available in closed files")
return self.header.get_reference_length(reference)
-
+
property nreferences:
"""int with the number of :term:`reference` sequences in the file.
This is a read-only attribute."""
cdef char *cfilename
cdef char *creference_filename
cdef char *cindexname = NULL
-
+
if not samfile.is_open:
raise ValueError("I/O operation on closed file")
# reopen the file - note that this makes the iterator
# slow and causes pileup to slow down significantly.
if multiple_iterators:
-
+
cfilename = samfile.filename
with nogil:
self.htsfile = hts_open(cfilename, 'r')
self.index = sam_index_load2(self.htsfile, cfilename, cindexname)
else:
self.index = NULL
-
+
# need to advance in newly opened file to position after header
# better: use seek/tell?
with nogil:
self.header = makeAlignmentHeader(hdr)
self.owns_samfile = True
-
+
# options specific to CRAM files
if samfile.is_cram and samfile.reference_filename:
creference_filename = samfile.reference_filename
raise IOError('truncated file')
else:
raise IOError("error while reading file {}: {}".format(self.samfile.filename, self.retval))
-
+
def __dealloc__(self):
hts_itr_destroy(self.iter)
cdef int __advance_all(void *data, bam1_t *b):
- '''only use reads for pileup passing basic filters such as
+ '''only use reads for pileup passing basic filters such as
BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, BAM_FDUP
'''
continue
if d.flag_require and not (b.core.flag & d.flag_require):
continue
-
+
# reload sequence
if d.fastafile != NULL and b.core.tid != d.tid:
if d.seq != NULL:
sam_prob_realn(b, d.seq, d.seq_len, 7)
else:
sam_prob_realn(b, d.seq, d.seq_len, 3)
-
+
if d.seq != NULL and d.adjust_capq_threshold > 10:
q = sam_cap_mapq(b, d.seq, d.seq_len, d.adjust_capq_threshold)
if q < 0:
continue
elif b.core.qual > q:
b.core.qual = q
-
+
if b.core.qual < d.min_mapping_quality:
continue
if d.ignore_orphans and b.core.flag & BAM_FPAIRED and not (b.core.flag & BAM_FPROPER_PAIR):
continue
-
+
break
-
+
return ret
self.iterdata.compute_baq = kwargs.get("compute_baq", True)
self.iterdata.redo_baq = kwargs.get("redo_baq", False)
self.iterdata.ignore_orphans = kwargs.get("ignore_orphans", True)
-
+
self.tid = 0
self.pos = 0
self.n_plp = 0
'''
return true if iterator is associated with a reference'''
return self.fastafile
-
+
cdef _setup_iterator(self,
int tid,
int start,
cdef void * data[1]
data[0] = <void*>&self.iterdata
-
+
if self.stepper is None or self.stepper == "all":
with nogil:
self.pileup_iter = bam_mplp_init(1,
if self.ignore_overlaps:
with nogil:
bam_mplp_init_overlaps(self.pileup_iter)
-
+
cdef reset(self, tid, start, stop):
'''reset iterator position.
# &self.iterdata)
with nogil:
bam_mplp_reset(self.pileup_iter)
-
+
cdef _free_pileup_iter(self):
'''free the memory alloc'd by bam_plp_init.
if self.iterdata.seq != NULL:
free(self.iterdata.seq)
self.iterdata.seq = NULL
-
+
# backwards compatibility
-
+
def hasReference(self):
return self.has_reference()
cdef char * getSequence(self):
def addReference(self, FastaFile fastafile):
return self.add_reference(fastafile)
-
+
cdef class IteratorColumnRegion(IteratorColumn):
'''iterates over a region only.
'''
def __next__(self):
cdef int n
-
+
while 1:
n = self.cnext()
if n < 0:
cdef uint64_t pos
cdef bam_hdr_t * hdr = self.header.ptr
-
+
while ret > 0:
with nogil:
pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
"""copy data from src to dest where the size of the elements (src_type/dst_type) differ
as well as the number of elements (src_values/dst_values).
"""
-
+
cdef char *src_datac
cdef char *dst_datac
cdef int8_t *src_datai8
if key == 'phased':
sample.phased = bool(value)
return
-
+
cdef bcf_hdr_t *hdr = sample.record.header.ptr
cdef bcf1_t *r = sample.record.ptr
cdef int fmt_id
&value_count, &scalar, &realloc)
vlen = value_count < 0
value_count = len(values)
-
+
# If we can, write updated values to existing allocated storage.
if fmt and not realloc:
r.d.indiv_dirty = 1
if self.ptr.dirty:
bcf_hdr_sync(self.ptr)
+
def add_meta(self, key, value=None, items=None):
"""Add metadata to this header"""
if not ((value is not None) ^ (items is not None)):
hrec.value = strdup(force_bytes(value))
else:
for key, value in items:
+
+ quoted = True
+ if key in set(("ID", "Number", "Type")):
+ quoted = False
+
key = force_bytes(key)
bcf_hrec_add_key(hrec, key, <int>len(key))
value = force_bytes(str(value))
- quoted = strpbrk(value, ' ;,"\t<>') != NULL
+
bcf_hrec_set_val(hrec, hrec.nkeys-1, value, <int>len(value), quoted)
except:
bcf_hrec_destroy(hrec)
raise ValueError('Error unpacking VariantRecord')
# causes a memory leak https://github.com/pysam-developers/pysam/issues/773
# return bcf_str_cache_get_charptr(r.d.id) if r.d.id != b'.' else None
+ if (r.d.m_id == 0):
+ raise ValueError('Error extracing ID')
return charptr_to_str(r.d.id) if r.d.id != b'.' else None
@id.setter
def __next__(self):
cdef int ret
+ cdef int errcode
cdef bcf1_t *record = bcf_init1()
if not record:
ret = bcf_read1(self.htsfile, self.header.ptr, record)
if ret < 0:
+ errcode = record.errcode
bcf_destroy1(record)
+ if errcode:
+ raise IOError('unable to parse next record')
if ret == -1:
raise StopIteration
elif ret == -2:
# potentially unnecessary optimization that also sets max_unpack
if not include_samples:
self.drop_samples = True
+
if self.persist:
return FastxRecord(proxy=makeFastqProxy(self.entry))
return makeFastqProxy(self.entry)
- else:
+ elif (l == -1):
raise StopIteration
+ elif (l == -2):
+ raise ValueError('truncated quality string in {0}'
+ .format(self._filename))
+ else:
+ raise ValueError('unknown problem parsing {0}'
+ .format(self._filename))
# Compatibility Layer for pysam 0.8.1
cdef class FastqFile(FastxFile):
Returns
-------
- tuple : a tuple of `flag`, :term:`tid`, `start` and
- `stop`. The flag indicates whether no coordinates were
- supplied and the genomic region is the complete genomic space.
+ tuple :
+ a tuple of `flag`, :term:`tid`, `start` and
+ `stop`. The flag indicates whether no coordinates were
+ supplied and the genomic region is the complete genomic space.
Raises
------
if skip_next:
skip_next = False
continue
- if arg in SIMPLE_FLAGS or (len(arg) > 2 and arg.startswith('-@')):
+ if arg in SIMPLE_FLAGS or (len(arg) > 2 and force_bytes(arg).startswith(b'-@')):
continue
if arg in ARGUMENTS:
skip_next = True
-from utils import PysamDispatcher
+from pysam.utils import PysamDispatcher
# samtools command line options to export in python
#
# pysam versioning information
-__version__ = "0.15.3"
+__version__ = "0.15.4"
# TODO: upgrade number
__samtools_version__ = "1.9"
-cython>=0.24.1
+cython>=0.29.12
import sys
import sysconfig
from contextlib import contextmanager
-from setuptools import Extension, setup
+from setuptools import setup
from cy_build import CyExtension as Extension, cy_build_ext as build_ext
try:
import cython
'classifiers': [_f for _f in classifiers.split("\n") if _f],
'url': "https://github.com/pysam-developers/pysam",
'packages': package_list,
- 'requires': ['cython (>=0.21)'],
+ 'requires': ['cython (>=0.29.12)'],
'ext_modules': [Extension(**opts) for opts in modules],
'cmdclass': cmdclass,
'package_dir': package_dirs,
def testCompare(self):
'''check comparison functions.'''
a = self.build_read()
+ b = None
+
+ self.assertFalse(a is b)
+ self.assertFalse(a == b)
+ self.assertEqual(-1, a.compare(b))
+
b = self.build_read()
self.assertEqual(0, a.compare(b))