From 18e49038bca0bc7775fc8f6e3703b207815a4b91 Mon Sep 17 00:00:00 2001 From: Afif Elghraoui Date: Wed, 11 Nov 2015 18:45:10 -0800 Subject: [PATCH] Imported Upstream version 0.8.4+ds --- README.rst | 5 +- benchmark/AlignedSegment_bench.py | 43 + doc/api.rst | 15 +- doc/conf.py | 20 +- doc/faq.rst | 26 +- doc/glossary.rst | 34 +- doc/release.rst | 86 + doc/usage.rst | 22 +- pysam/TabProxies.pxd | 94 - pysam/__init__.py | 64 +- pysam/calignedsegment.pxd | 94 + pysam/calignedsegment.pyx | 2277 +++++++ pysam/calignmentfile.pxd | 71 +- pysam/calignmentfile.pyx | 3595 +++------- pysam/cbcf.pxd | 2 +- pysam/cbcf.pyx | 694 +- pysam/cfaidx.pxd | 54 +- pysam/cfaidx.pyx | 470 +- pysam/chtslib.pxd | 43 +- pysam/chtslib.pyx | 97 +- pysam/csamfile.pxd | 4 +- pysam/csamfile.pyx | 2 +- pysam/csamtools.pxd | 13 +- pysam/csamtools.pyx | 63 +- pysam/ctabix.pxd | 39 +- pysam/ctabix.pyx | 312 +- pysam/ctabixproxies.pxd | 59 + pysam/{TabProxies.pyx => ctabixproxies.pyx} | 176 +- pysam/cutils.pxd | 27 + pysam/cutils.pyx | 214 + pysam/cvcf.pxd | 40 - pysam/cvcf.pyx | 152 +- pysam/htslib_util.h | 3 - pysam/pysam_util.c | 2 +- pysam/tabix_util.c | 5 +- pysam/version.py | 2 +- requires.txt => requirements.txt | 0 save/pysam_test2.6.py | 2 +- setup.py | 228 +- tests/AlignedSegment_test.py | 236 +- tests/AlignmentFile_test.py | 314 +- tests/SamFile_test.py | 74 +- tests/TestUtils.py | 6 +- tests/faidx_test.py | 99 +- tests/pysam_data/Makefile | 2 +- tests/pysam_data/faidx_ex1.fa | 6540 +++++++++++++++++++ tests/pysam_data/{ex1.fq => faidx_ex1.fq} | 0 tests/samtools_test.py | 93 +- tests/tabix_test.py | 46 +- 49 files changed, 12538 insertions(+), 4021 deletions(-) create mode 100644 benchmark/AlignedSegment_bench.py delete mode 100644 pysam/TabProxies.pxd create mode 100644 pysam/calignedsegment.pxd create mode 100644 pysam/calignedsegment.pyx create mode 100644 pysam/ctabixproxies.pxd rename pysam/{TabProxies.pyx => ctabixproxies.pyx} (87%) create mode 100644 pysam/cutils.pxd create mode 100644 pysam/cutils.pyx rename requires.txt => requirements.txt (100%) create mode 100644 tests/pysam_data/faidx_ex1.fa rename tests/pysam_data/{ex1.fq => faidx_ex1.fq} (100%) diff --git a/README.rst b/README.rst index cbe09d2..cf3c260 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,9 @@ Pysam ===== +.. image:: https://travis-ci.org/pysam-developers/pysam.svg + :alt: pysam build status + Pysam is a python module for reading and manipulating files in the SAM/BAM format. The SAM/BAM format is a way to store efficiently large numbers of alignments (`Li 2009`_), such as those routinely created by @@ -15,7 +18,7 @@ The latest version is available through type:: pip install pysam - . + Pysam documentation is available through https://readthedocs.org/ from `here `_ diff --git a/benchmark/AlignedSegment_bench.py b/benchmark/AlignedSegment_bench.py new file mode 100644 index 0000000..98286d0 --- /dev/null +++ b/benchmark/AlignedSegment_bench.py @@ -0,0 +1,43 @@ +"""Benchmarking module for AlignedSegment functionality""" + +import timeit + +iterations = 10000 +repeats = 5 + +setup_binary_tag = """ +import pysam +import array +read = pysam.AlignedSegment() +read.set_tag('FZ', array.array('H', range(1000))) +""" + +setup_binary_tag_from_file = """ +import pysam +with pysam.AlignmentFile("../tests/pysam_data/example_btag.bam", "rb") as inf: + read = inf.fetch().next() +""" + +def test_read_binary_get_tag(read): + tags = read.get_tag('FZ') + +def test_read_and_process_binary_get_tag(read): + tags = sum(read.get_tag('FZ')) + +tests = ( + ("test_read_binary_get_tag", "setup_binary_tag"), + ("test_read_binary_get_tag", "setup_binary_tag_from_file"), + ("test_read_and_process_binary_get_tag", "setup_binary_tag"), + ) + +for repeat in range(repeats): + print ("# repeat=", repeat) + for testf, setup_name in tests: + setup = locals()[setup_name] + setup += """\nfrom __main__ import %s""" % testf + #try: + t = timeit.timeit("%s(read)" % testf, number=iterations, setup=setup) + #except AttributeError, msg: + # print msg + # continue + print ("%5.2f\t%s\t%s" % (t,testf, setup_name)) diff --git a/doc/api.rst b/doc/api.rst index d700ac5..c756959 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -58,13 +58,14 @@ reads are represented as :class:`~pysam.PileupRead` objects in the import pysam samfile = pysam.AlignmentFile("ex1.bam", "rb" ) for pileupcolumn in samfile.pileup("chr1", 100, 120): - print ("\ncoverage at base %s = %s" % + print ("\ncoverage at base %s = %s" % (pileupcolumn.pos, pileupcolumn.n)) for pileupread in pileupcolumn.pileups: - if not pileupread.is_del and not pileupread.is_refskip: # query position is None if is_del or is_refskip is set. + if not pileupread.is_del and not pileupread.is_refskip: + # query position is None if is_del or is_refskip is set. print ('\tbase in read %s = %s' % - (pileupread.alignment.query_name, - pileupread.alignment.query_sequence[pileupread.query_position])) + (pileupread.alignment.query_name, + pileupread.alignment.query_sequence[pileupread.query_position])) samfile.close() @@ -199,6 +200,12 @@ Fastq files .. autoclass:: pysam.FastqFile :members: +VCF files +--------- +.. autoclass:: pysam.VariantFile + :members: +.. autoclass:: pysam.VariantHeader + :members: diff --git a/doc/conf.py b/doc/conf.py index 19a4563..ede1809 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -16,21 +16,25 @@ import sys, os, glob # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -_libdir = "../build/lib.%s-%s-%s.%s" % (os.uname()[0].lower(), os.uname()[4],sys.version_info[0], sys.version_info[1] ) -if os.path.exists( _libdir ): - sys.path.insert(0, os.path.abspath( _libdir ) ) +_libdir = "../build/lib.%s-%s-%s.%s" % (os.uname()[0].lower(), os.uname()[4], + sys.version_info[0], sys.version_info[1]) +if os.path.exists(_libdir): + sys.path.insert(0, os.path.abspath(_libdir)) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', - 'sphinx.ext.ifconfig', - 'sphinx.ext.intersphinx'] +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.todo', + 'sphinx.ext.ifconfig', + 'sphinx.ext.intersphinx', +# 'numpydoc'] + 'sphinx.ext.napoleon'] intersphinx_mapping = {'python': ('http://docs.python.org/3.2', None)} - # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -61,6 +65,8 @@ rst_epilog = ''' ''' +autosummary_generate = True + # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. diff --git a/doc/faq.rst b/doc/faq.rst index 412a647..b414305 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -38,10 +38,10 @@ The following code will cause unexpected behaviour:: samfile = pysam.AlignmentFile("pysam_ex1.bam", "rb") iter1 = samfile.fetch("chr1") - print iter1.next().reference_id + print (iter1.next().reference_id) iter2 = samfile.fetch("chr2") - print iter2.next().reference_id - print iter1.next().reference_id + print (iter2.next().reference_id) + print (iter1.next().reference_id) This will give the following output:: @@ -61,10 +61,10 @@ chr2. The correct way to work with multiple iterators is:: samfile = pysam.AlignmentFile("pysam_ex1.bam", "rb") iter1 = samfile.fetch("chr1", all) - print iter1.next().reference_id + print (iter1.next().reference_id) iter2 = samfile.fetch("chr2") - print iter2.next().reference_id - print iter1.next().reference_id + print (iter2.next().reference_id) + print (iter1.next().reference_id) Here, the output is:: @@ -100,7 +100,19 @@ in the iteration by adding the ``until_eof=True`` flag:: bf = pysam.AlignemFile(fname, "rb") for r in bf.fetch(until_eof=True): if r.is_unmapped: - print "read is unmapped" + print ("read is unmapped") + +I can't call AlignmentFile.fetch on a file without index +======================================================== + +:meth:`~pysam.AlignmentFile.fetch` requires an index when +iterating over a SAM/BAM file. To iterate over a file without +index, use the ``until_eof=True`:: + + bf = pysam.AlignemFile(fname, "rb") + for r in bf.fetch(until_eof=True): + print (r) + BAM files with a large number of reference sequences is slow ============================================================ diff --git a/doc/glossary.rst b/doc/glossary.rst index e5c69c2..f40bcfb 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -11,11 +11,18 @@ Glossary ``[ (0,3), (1,5), (0,2) ]`` refers to an alignment with 3 matches, 5 insertions and another 2 matches. - region - A genomic region, stated relative to a reference sequence. A region consists of - reference name ('chr1'), start (100000), and end (200000). - 0-based coordinates. Can be expressed as a string ('chr1:10000:20000') - + region + A genomic region, stated relative to a reference sequence. A + region consists of reference name ('chr1'), start (10000), and + end (20000). Start and end can be omitted for regions spanning + a whole chromosome. If end is missing, the region will span from + start to the end of the chromosome. Within pysam, coordinates + are 0-based, half-open intervals, i.e., the position 10,000 is + part of the interval, but 20,000 is not. An exception are + :term:`samtools` compatible region strings such as + 'chr1:10000:20000', which are closed, i.e., both positions 10,000 + and 20,000 are part of the interval. + column Reads that are aligned to a base in the :term:`reference` sequence. @@ -85,3 +92,20 @@ Glossary sequence will not be part of the alignment record, in contrast to :term:`soft clipped` reads. + VCF + Variant call format + + BCF + Binary :term:`VCF` + + tabix + Utility in the htslib package to index :term:`bgzip` compressed + files. + + faidx + Utility in the samtools package to index :term:`fasta` formatted + files. + + bgzip + Utility in the htslib package to block compress genomic data + files. diff --git a/doc/release.rst b/doc/release.rst index 29d21ab..e02c818 100644 --- a/doc/release.rst +++ b/doc/release.rst @@ -2,6 +2,92 @@ Release notes ============= +Release 0.8.4 +============= + +This release contains numerous bugfixes and a first implementation of +a pythonic interface to VCF/BCF files. Note that this code is still +incomplete and preliminary, but does offer a nearly complete immutable +Pythonic interface to VCF/BCF metadata and data with reading and +writing capability. + +Potential isses when upgrading from v0.8.3: + +* binary tags are now returned as python arrays +* renamed several methods for pep8 compatibility, old names still retained for + backwards compatibility, but should be considered deprecated. + * gettid() is now get_tid() + * getrname() is now get_reference_name() + * parseRegion() is now parse_region() +* faidx now returns strings and not binary strings in py3. +* The cython components have been broken up into smaller files with + more specific content. This will affect users using the cython + interfaces. + +Edited list of commit log changes: + +* fixes AlignmentFile.check_index to return True +* add RG/PM header tag - closes #179 +* add with_seq option to get_aligned_pairs +* use char * inside reconsituteReferenceSequence +* add soft clipping for get_reference_sequence +* add get_reference_sequence +* queryEnd now computes length from cigar string if no sequence present, closes #176 +* tolerate missing space at end of gtf files, closes #162 +* do not raise Error when receiving output on stderr +* add docu about fetching without index, closes #170 +* FastaFile and FastxFile now return strings in python3, closes #173 +* py3 compat: relative -> absolute imports. +* add reference_name and next_reference_name attributes to AlignedSegment +* add function signatures to cvcf cython. Added note about other VCF code. +* add context manager functions to FastaFile +* add reference_name and next_reference_name attributes to AlignedSegment +* PileupColumn also gets a reference_name attribute. +* add context manager functions to FastaFile +* TabixFile.header for remote files raises AttributeError, fixes #157 +* add context manager interface to TabixFile, closes #165 +* change ctypedef enum to typedef enum for cython 0.23 +* add function signatures to cvcf cython, also added note about other VCF code +* remove exception for custom upper-case header record tags. +* rename VALID_HEADER_FIELDS to KNOWN_HEADER_FIELDS +* fix header record tag parsing for custom tags. +* use cython.str in count_coverage, fixes #141 +* avoid maketrans (issues with python3) +* refactoring: AlignedSegment now in separate module +* do not execute remote tests if URL not available +* fix the unmapped count, incl reads with no SQ group +* add raw output to tags +* added write access for binary tags +* bugfix in call to resize +* implemented writing of binary tags from arrays +* implemented convert_binary_tag to use arrays +* add special cases for reads that are unmapped or whose mates are unmapped. +* rename TabProxies to ctabixproxies +* remove underscores from utility functions +* move utility methods into cutils +* remove callback argument to fetch - closes #128 +* avoid calling close in dealloc +* add unit tests for File object opening +* change AlignmentFile.open to filepath_or_object +* implement copy.copy, close #65 +* add chaching of array attributes in AlignedSegment, closes #121 +* add export of Fastafile +* remove superfluous pysam_dispatch +* use persist option in FastqFile +* get_tag: expose tag type if requested with `with_value_type` +* fix to allow reading vcf record info via tabix-based vcf reader +* add pFastqProxy and pFastqFile objects to make it possible to work with multiple fastq records per file handle, unlike FastqProxy/FastqFile. +* release GIL around htslib IO operations +* More work on read/write support, API improvements +* add `phased` property on `VariantRecordSample` +* add mutable properties to VariantRecord +* BCF fixes and start of read/write support +* VariantHeaderRecord objects now act like mappings for attributes. +* add VariantHeader.alts dict from alt ID->Record. +* Bug fix to strong representation of structured header records. +* VariantHeader is now mutable + + Release 0.8.3 ============= diff --git a/doc/usage.rst b/doc/usage.rst index f4dd4d5..e005893 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -265,6 +265,26 @@ form: .. Calling indels works along the same lines, using the :class:`pysam.IteratorIndelCalls` .. and :class:`pysam.IteratorIndelCaller`. + +==================================== +Working with VCF/BCF formatted files +==================================== + +To iterate through a VCF/BCF formatted file tabular file use +:class:`~pysam.VariantFile`:: + + from pysam import VariantFile + + bcf_in = VariantFile("test.bcf") # auto-detect input format + bcf_out = VariantFile('-', 'w', header=bcf_in.header) + + for rec in bcf_in.fetch('chr1', 100000, 200000): + bcf_out.write(rec) + +.. note:: + + The VCF/BCF API is preliminary and incomplete. + =============== Extending pysam =============== @@ -292,7 +312,7 @@ flagstat command and consists of three files: 2. The cython implementation :file:`_pysam_flagstat.pyx`. This script imports the pysam API via:: - from pysam.calignmentfile cimport AlignementFile, AlignedSegment + from pysam.calignmentfile cimport AlignmentFile, AlignedSegment This statement imports, amongst others, :class:`AlignedSegment` into the namespace. Speed can be gained from declaring diff --git a/pysam/TabProxies.pxd b/pysam/TabProxies.pxd deleted file mode 100644 index 22211eb..0000000 --- a/pysam/TabProxies.pxd +++ /dev/null @@ -1,94 +0,0 @@ -cdef extern from "stdlib.h": - void free(void *) - void *malloc(size_t) - void *calloc(size_t,size_t) - void *realloc(void *,size_t) - int c_abs "abs" (int) - int c_abs "abs" (int) - int atoi( char *nptr) - long atol( char *nptr) - double atof( char *nptr) - -cdef extern from "Python.h": - ctypedef struct FILE - char *fgets(char *str, int size, FILE *ifile) - int feof(FILE *stream) - size_t strlen(char *s) - size_t getline(char **lineptr, size_t *n, FILE *stream) - char *strstr(char *, char *) - char *strchr(char *string, int c) - int fileno(FILE *stream) - -cdef extern from "string.h": - int strcmp(char *s1, char *s2) - int strncmp(char *s1,char *s2,size_t len) - char *strcpy(char *dest,char *src) - char *strncpy(char *dest,char *src, size_t len) - char *strdup(char *) - char *strcat(char *,char *) - size_t strlen(char *s) - int memcmp( void * s1, void *s2, size_t len ) - void *memcpy(void *dest, void *src, size_t n) - void *memchr(void *s, int c, size_t n) - -cdef extern from "stdint.h": - ctypedef int int64_t - ctypedef int int32_t - ctypedef int uint32_t - ctypedef int uint8_t - ctypedef int uint64_t - -cdef class TupleProxy: - - cdef: - char * data - char ** fields - int nfields - int index - int nbytes - int offset - bint is_modified - - cdef encoding - - cdef int getMaxFields(self) - cdef int getMinFields(self) -# cdef char * _getindex(self, int idx) - - cdef take(self, char * buffer, size_t nbytes) - cdef present(self, char * buffer, size_t nbytes) - cdef copy(self, char * buffer, size_t nbytes) - cdef update(self, char * buffer, size_t nbytes) - -cdef class GTFProxy(TupleProxy) : - - cdef: - char * _attributes - cdef bint hasOwnAttributes - - cdef int getMaxFields(self) - cdef int getMinFields(self) - cdef char * getAttributes( self ) - -cdef class NamedTupleProxy(TupleProxy) : - pass - -cdef class BedProxy(NamedTupleProxy) : - - cdef: - char * contig - uint32_t start - uint32_t end - int bedfields - - cdef int getMaxFields(self) - cdef int getMinFields(self) - cdef update(self, char * buffer, size_t nbytes) - -cdef class VCFProxy(NamedTupleProxy) : - - cdef: - char * contig - uint32_t pos - - cdef update( self, char * buffer, size_t nbytes ) diff --git a/pysam/__init__.py b/pysam/__init__.py index efe39fd..32f8cfd 100644 --- a/pysam/__init__.py +++ b/pysam/__init__.py @@ -1,13 +1,18 @@ from pysam.libchtslib import * +from pysam.cutils import * +import pysam.cutils as cutils + +import pysam.cfaidx as cfaidx +from pysam.cfaidx import * import pysam.ctabix as ctabix from pysam.ctabix import * import pysam.csamfile as csamfile from pysam.csamfile import * import pysam.calignmentfile as calignmentfile from pysam.calignmentfile import * -import pysam.cfaidx as cfaidx -from pysam.cfaidx import * +import pysam.calignedsegment as calignedsegment +from pysam.calignedsegment import * import pysam.cvcf as cvcf from pysam.cvcf import * import pysam.cbcf as cbcf @@ -30,14 +35,13 @@ class SamtoolsError(Exception): class SamtoolsDispatcher(object): - '''samtools dispatcher. - - Emulates the samtools command line as module calls. + '''The samtools dispatcher emulates the samtools command line as + module calls. Captures stdout and stderr. - Raises a :class:`pysam.SamtoolsError` exception in case - samtools exits with an error code other than 0. + Raises a :class:`pysam.SamtoolsError` exception in case samtools + exits with an error code other than 0. Some command line options are associated with parsers. For example, the samtools command "pileup -c" creates a tab-separated @@ -46,8 +50,8 @@ class SamtoolsDispatcher(object): will be processed in order checking for the presence of each option. - If no parser is given or no appropriate parser is found, the - stdout output of samtools commands will be returned. + If no parser is given or no appropriate parser is found, the stdout + output of samtools commands will be returned. ''' @@ -60,7 +64,11 @@ class SamtoolsDispatcher(object): self.stderr = [] def __call__(self, *args, **kwargs): - '''execute a samtools command + '''execute a samtools command. + + Keyword arguments: + catch_stdout -- redirect stdout from the samtools command and return as variable (default True) + raw -- ignore any parsers associated with this samtools command. ''' retval, stderr, stdout = csamtools._samtools_dispatch( self.dispatch, args, catch_stdout=kwargs.get("catch_stdout", True)) @@ -69,19 +77,23 @@ class SamtoolsDispatcher(object): raise SamtoolsError( 'csamtools returned with error %i: %s' % (retval, "\n".join(stderr))) + self.stderr = stderr - # samtools commands do not propagate the return code correctly. - # I have thus added this patch to throw if there is output on stderr. - # Note that there is sometimes output on stderr that is not an error, - # for example: [sam_header_read2] 2 sequences loaded. - # Ignore messages like these - stderr = [x for x in stderr - if not (x.startswith("[sam_header_read2]") or - x.startswith("[bam_index_load]") or - x.startswith("[bam_sort_core]") or - x.startswith("[samopen] SAM header is present"))] - if stderr: - raise SamtoolsError("\n".join(stderr)) + + # Uncommented for samtools 1.2 + # # samtools commands do not propagate the return code correctly. + # # I have thus added this patch to throw if there is output on stderr. + # # Note that there is sometimes output on stderr that is not an error, + # # for example: [sam_header_read2] 2 sequences loaded. + # # Ignore messages like these + # stderr = [x for x in stderr + # if not (x.startswith("[sam_header_read2]") or + # x.startswith("[bam_index_load]") or + # x.startswith("[bam_sort_core]") or + # x.startswith("[samopen] SAM header is present"))] + + # if stderr: + # raise SamtoolsError("\n".join(stderr)) # call parser for stdout: if not kwargs.get("raw") and stdout and self.parsers: @@ -144,12 +156,14 @@ for key, options in SAMTOOLS_DISPATCH.items(): # hack to export all the symbols from separate modules __all__ = \ - libchtslib.__all__ + \ - ctabix.__all__ + \ + libchtslib.__all__ +\ + cutils.__all__ +\ + ctabix.__all__ +\ cvcf.__all__ +\ cbcf.__all__ +\ cfaidx.__all__ +\ calignmentfile.__all__ +\ + calignedsegment.__all__ +\ csamfile.__all__ +\ ["SamtoolsError", "SamtoolsDispatcher"] +\ list(SAMTOOLS_DISPATCH) +\ @@ -179,7 +193,7 @@ def get_libraries(): dirname = os.path.abspath(os.path.join(os.path.dirname(__file__))) return [os.path.join(dirname, x) for x in ( 'libchtslib.so', - 'TabProxies.so', + 'ctabixproxies.so', 'cfaidx.so', 'csamfile.so', 'cvcf.so', diff --git a/pysam/calignedsegment.pxd b/pysam/calignedsegment.pxd new file mode 100644 index 0000000..ce82d88 --- /dev/null +++ b/pysam/calignedsegment.pxd @@ -0,0 +1,94 @@ +from pysam.chtslib cimport * + +cdef extern from "htslib_util.h": + + # add *nbytes* into the variable length data of *src* at *pos* + bam1_t * pysam_bam_update(bam1_t * b, + size_t nbytes_old, + size_t nbytes_new, + uint8_t * pos) + + # now: static + int aux_type2size(int) + + char * pysam_bam_get_qname(bam1_t * b) + uint32_t * pysam_bam_get_cigar(bam1_t * b) + uint8_t * pysam_bam_get_seq(bam1_t * b) + uint8_t * pysam_bam_get_qual(bam1_t * b) + uint8_t * pysam_bam_get_aux(bam1_t * b) + int pysam_bam_get_l_aux(bam1_t * b) + char pysam_bam_seqi(uint8_t * s, int i) + + uint16_t pysam_get_bin(bam1_t * b) + uint8_t pysam_get_qual(bam1_t * b) + uint8_t pysam_get_l_qname(bam1_t * b) + uint16_t pysam_get_flag(bam1_t * b) + uint16_t pysam_get_n_cigar(bam1_t * b) + void pysam_set_bin(bam1_t * b, uint16_t v) + void pysam_set_qual(bam1_t * b, uint8_t v) + void pysam_set_l_qname(bam1_t * b, uint8_t v) + void pysam_set_flag(bam1_t * b, uint16_t v) + void pysam_set_n_cigar(bam1_t * b, uint16_t v) + void pysam_update_flag(bam1_t * b, uint16_t v, uint16_t flag) + + +from pysam.calignmentfile cimport AlignmentFile +ctypedef AlignmentFile AlignmentFile_t + +cdef bytes TagToString(tuple tagtup) + +# Note: need to declare all C fields and methods here +cdef class AlignedSegment: + + # object that this AlignedSegment represents + cdef bam1_t * _delegate + + # the file from which this AlignedSegment originates (can be None) + cdef AlignmentFile _alignment_file + + # caching of array properties for quick access + cdef object cache_query_qualities + cdef object cache_query_alignment_qualities + cdef object cache_query_sequence + cdef object cache_query_alignment_sequence + + # add an alignment tag with value to the AlignedSegment + # an existing tag of the same name will be replaced. + cpdef set_tag(self, tag, value, value_type=?, replace=?) + + # add an alignment tag with value to the AlignedSegment + # an existing tag of the same name will be replaced. + cpdef get_tag(self, tag, with_value_type=?) + + # return true if tag exists + cpdef has_tag(self, tag) + + # returns a valid sam alignment string + cpdef bytes tostring(self, AlignmentFile_t handle) + + # returns the aux tag fields as a string. + cdef bytes get_tag_string(self) + + +cdef class PileupColumn: + cdef bam_pileup1_t ** plp + cdef int tid + cdef int pos + cdef int n_pu + cdef AlignmentFile _alignment_file + + +cdef class PileupRead: + cdef AlignedSegment _alignment + cdef int32_t _qpos + cdef int _indel + cdef int _level + cdef uint32_t _is_del + cdef uint32_t _is_head + cdef uint32_t _is_tail + cdef uint32_t _is_refskip + +# factor methods +cdef makeAlignedSegment(bam1_t * src, AlignmentFile alignment_file) +cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos, int n_pu, AlignmentFile alignment_file) +cdef inline makePileupRead(bam_pileup1_t * src, AlignmentFile alignment_file) diff --git a/pysam/calignedsegment.pyx b/pysam/calignedsegment.pyx new file mode 100644 index 0000000..f2b07a1 --- /dev/null +++ b/pysam/calignedsegment.pyx @@ -0,0 +1,2277 @@ +# cython: embedsignature=True +# cython: profile=True +############################################################################### +############################################################################### +# Cython wrapper for SAM/BAM/CRAM files based on htslib +############################################################################### +# The principal classes defined in this module are: +# +# class AlignedSegment an aligned segment (read) +# +# class PileupColumn a collection of segments (PileupRead) aligned to +# a particular genomic position. +# +# class PileupRead an AlignedSegment aligned to a particular genomic +# position. Contains additional attributes with respect +# to this. +# +# Additionally this module defines numerous additional classes that are part +# of the internal API. These are: +# +# Various iterator classes to iterate over alignments in sequential (IteratorRow) +# or in a stacked fashion (IteratorColumn): +# +# class IteratorRow +# class IteratorRowRegion +# class IteratorRowHead +# class IteratorRowAll +# class IteratorRowAllRefs +# class IteratorRowSelection +# +############################################################################### +# +# The MIT License +# +# Copyright (c) 2015 Andreas Heger +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### +import re +import array +import ctypes +import struct + +cimport cython +from cpython cimport array as c_array +from cpython.version cimport PY_MAJOR_VERSION +from cpython cimport PyErr_SetString, PyBytes_FromStringAndSize +from libc.string cimport strchr + +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport qualities_to_qualitystring, qualitystring_to_array, \ + array_to_qualitystring + +# Constants for binary tag conversion +cdef char * htslib_types = 'cCsSiIf' +cdef char * parray_types = 'bBhHiIf' + +# translation tables + +# cigar code to character and vice versa +cdef char* CODE2CIGAR= "MIDNSHP=X" + +if PY_MAJOR_VERSION >= 3: + CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR)) +else: + CIGAR2CODE = dict([ord(y), x] for x, y in enumerate(CODE2CIGAR)) + +CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=X])") + +##################################################################### +# typecode guessing +cdef inline char map_typecode_htslib_to_python(uint8_t s): + """map an htslib typecode to the corresponding python typecode + to be used in the struct or array modules.""" + + # map type from htslib to python array + cdef char * f = strchr(htslib_types, s) + if f == NULL: + raise ValueError("unknown htslib tag typecode '%s'" % chr(s)) + return parray_types[f - htslib_types] + +cdef inline uint8_t map_typecode_python_to_htslib(char s): + """determine value type from type code of array""" + cdef char * f = strchr(parray_types, s) + if f == NULL: + raise ValueError( + "unknown conversion for array typecode '%s'" % s) + return htslib_types[f - parray_types] + +# optional tag data manipulation +cdef convert_binary_tag(uint8_t * tag): + """return bytesize, number of values and array of values + in aux_data memory location pointed to by tag.""" + cdef uint8_t auxtype + cdef uint8_t byte_size + cdef int32_t nvalues + # get byte size + auxtype = tag[0] + byte_size = aux_type2size(auxtype) + tag += 1 + # get number of values in array + nvalues = (tag)[0] + tag += 4 + + # define python array + cdef c_array.array c_values = array.array( + chr(map_typecode_htslib_to_python(auxtype))) + c_array.resize(c_values, nvalues) + + # copy data + memcpy(c_values.data.as_voidptr, tag, nvalues * byte_size) + + # no need to check for endian-ness as bam1_core_t fields + # and aux_data are in host endian-ness. See sam.c and calls + # to swap_data + return byte_size, nvalues, c_values + + +cdef bytes TagToString(tuple tagtup): + cdef c_array.array b_aux_arr + cdef char value_type = tagtup[2] + cdef char* tag = tagtup[0] + cdef double value_double + cdef long value_int + cdef bytes value_bytes + cdef long i, min_value + cdef double f + cdef cython.str ret + cdef size_t size + if(value_type in ['c', 'C', 'i', 'I', 's', 'S']): + value_int = tagtup[1] + ret = tag + ":i:%s" % value_int + elif(value_type in ['f', 'F', 'd', 'D']): + value_float = tagtup[1] + ret = tag + ":f:%s" % (value_float) + elif(value_type == "Z"): + value_bytes = tagtup[1] + ret = tag + ":Z:" + value_bytes + elif(value_type == "B"): + if(isinstance(tagtup[1], array.array)): + b_aux_arr = tagtup[1] + else: + if(isinstance(tagtup[1][0], float)): + if(len(tagtup[1]) == 1): + return (tag + ":B:f%s," % tagtup[1][0]) + else: + return (tag + ":B:f" + + ",".join([str(f) for f in tagtup[1]])) + else: + b_aux_arr = array('l', tagtup[1]) + # Choose long to accommodate any size integers. + size = sizeof(b_aux_arr) + min_value = min(b_aux_arr) + length = len(b_aux_arr) + if(size == 1): + if(min_value < 0): + ret = tag + ":B:c," + ",".join([str(i) for i in b_aux_arr]) + else: + ret = tag + ":B:C," + ",".join([str(i) for i in b_aux_arr]) + elif(size == 2): + if(min_value < 0): + ret = tag + ":B:i," + ",".join([str(i) for i in b_aux_arr]) + else: + ret = tag + ":B:I," + ",".join([str(i) for i in b_aux_arr]) + else: # size == 4. Removed check to compile to switch statement. + if(min_value < 0): + ret = tag + ":B:s," + ",".join([str(i) for i in b_aux_arr]) + else: + ret = tag + ":B:S," + ",".join([str(i) for i in b_aux_arr]) + elif(value_type == "H"): + ret = tag + ":H:" + "".join([hex(i)[2:] for i in tagtup[1]]) + elif(value_type == "A"): + ret = tag + ":A:" + tagtup[1] + else: + # Unrecognized character - returning the string as it was provided. + # An exception is not being raised because that prevents cython + # from being able to compile this into a switch statement for + # performance. + ret = "%s:%s:%s" % (tag, tagtup[2], tagtup[1]) + return ret + + +cdef inline uint8_t get_value_code(value, value_type=None): + '''guess type code for a *value*. If *value_type* is None, + the type code will be inferred based on the Python type of + *value*''' + cdef uint8_t typecode + cdef char * _char_type + + if value_type is None: + if isinstance(value, int): + typecode = 'i' + elif isinstance(value, float): + typecode = 'd' + elif isinstance(value, str): + typecode = 'Z' + elif isinstance(value, bytes): + typecode = 'Z' + elif isinstance(value, array.array) or \ + isinstance(value, list) or \ + isinstance(value, tuple): + typecode = 'B' + else: + return 0 + else: + if value_type not in 'Zidf': + return 0 + value_type = force_bytes(value_type) + _char_type = value_type + typecode = (_char_type)[0] + + return typecode + + +cdef inline getTypecode(value, maximum_value=None): + '''returns the value typecode of a value. + + If max is specified, the approprite type is + returned for a range where value is the minimum. + ''' + + if maximum_value is None: + maximum_value = value + + t = type(value) + + if t is float: + valuetype = b'f' + elif t is int: + # signed ints + if value < 0: + if value >= -128 and maximum_value < 128: + valuetype = b'c' + elif value >= -32768 and maximum_value < 32768: + valuetype = b's' + elif value < -2147483648 or maximum_value >= 2147483648: + raise ValueError( + "at least one signed integer out of range of " + "BAM/SAM specification") + else: + valuetype = b'i' + # unsigned ints + else: + if maximum_value < 256: + valuetype = b'C' + elif maximum_value < 65536: + valuetype = b'S' + elif maximum_value >= 4294967296: + raise ValueError( + "at least one integer out of range of BAM/SAM specification") + else: + valuetype = b'I' + else: + # Note: hex strings (H) are not supported yet + if t is not bytes: + value = value.encode('ascii') + if len(value) == 1: + valuetype = b"A" + else: + valuetype = b'Z' + + return valuetype + + +cdef inline packTags(tags): + """pack a list of tags. Each tag is a tuple of (tag, tuple). + + Values are packed into the most space efficient data structure + possible unless the tag contains a third field with the typecode. + + Returns a format string and the associated list of arguments + to be used in a call to struct.pack_into. + """ + fmts, args = ["<"], [] + + datatype2format = { + 'c': ('b', 1), + 'C': ('B', 1), + 's': ('h', 2), + 'S': ('H', 2), + 'i': ('i', 4), + 'I': ('I', 4), + 'f': ('f', 4), + 'A': ('c', 1)} + + for tag in tags: + + if len(tag) == 2: + pytag, value = tag + valuetype = None + elif len(tag) == 3: + pytag, value, valuetype = tag + else: + raise ValueError("malformatted tag: %s" % str(tag)) + + if not type(pytag) is bytes: + pytag = pytag.encode('ascii') + + t = type(value) + + if t is tuple or t is list: + # binary tags from tuples or lists + if valuetype is None: + # automatically determine value type - first value + # determines type. If there is a mix of types, the + # result is undefined. + valuetype = getTypecode(min(value), max(value)) + + if valuetype not in datatype2format: + raise ValueError("invalid value type '%s'" % valuetype) + + datafmt = "2sccI%i%s" % (len(value), datatype2format[valuetype][0]) + args.extend([pytag[:2], + b"B", + valuetype, + len(value)] + list(value)) + + elif isinstance(value, array.array): + # binary tags from arrays + if valuetype is None: + valuetype = chr(map_typecode_python_to_htslib(ord(value.typecode))) + + if valuetype not in datatype2format: + raise ValueError("invalid value type '%s'" % valuetype) + + # use array.tostring() to retrieve byte representation and + # save as bytes + datafmt = "2sccI%is" % (len(value) * datatype2format[valuetype][1]) + args.extend([pytag[:2], + b"B", + valuetype, + len(value), + value.tostring()]) + + else: + if valuetype is None: + valuetype = getTypecode(value) + + if valuetype == b"Z": + datafmt = "2sc%is" % (len(value)+1) + else: + datafmt = "2sc%s" % datatype2format[valuetype][0] + + args.extend([pytag[:2], + valuetype, + value]) + + fmts.append(datafmt) + + return "".join(fmts), args + + +cdef inline int32_t calculateQueryLength(bam1_t * src): + """return query length computed from CIGAR alignment. + + Return 0 if there is no CIGAR alignment. + """ + + cdef uint32_t * cigar_p = pysam_bam_get_cigar(src) + + if cigar_p == NULL: + return 0 + + cdef uint32_t k, qpos + cdef int op + qpos = 0 + + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + + if op == BAM_CMATCH or op == BAM_CINS or \ + op == BAM_CSOFT_CLIP or \ + op == BAM_CEQUAL or op == BAM_CDIFF: + qpos += cigar_p[k] >> BAM_CIGAR_SHIFT + + return qpos + + +cdef inline int32_t getQueryStart(bam1_t *src) except -1: + cdef uint32_t * cigar_p + cdef uint32_t k, op + cdef uint32_t start_offset = 0 + + if pysam_get_n_cigar(src): + cigar_p = pysam_bam_get_cigar(src); + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + if op == BAM_CHARD_CLIP: + if start_offset != 0 and start_offset != src.core.l_qseq: + PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string') + return -1 + elif op == BAM_CSOFT_CLIP: + start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT + else: + break + + return start_offset + + +cdef inline int32_t getQueryEnd(bam1_t *src) except -1: + cdef uint32_t * cigar_p + cdef uint32_t k, op + cdef uint32_t end_offset = src.core.l_qseq + + # if there is no sequence, compute length from cigar string + if end_offset == 0: + end_offset = calculateQueryLength(src) + + # walk backwards in cigar string + if pysam_get_n_cigar(src) > 1: + cigar_p = pysam_bam_get_cigar(src); + for k from pysam_get_n_cigar(src) > k >= 1: + op = cigar_p[k] & BAM_CIGAR_MASK + if op == BAM_CHARD_CLIP: + if end_offset != 0 and end_offset != src.core.l_qseq: + PyErr_SetString(ValueError, + 'Invalid clipping in CIGAR string') + return -1 + elif op == BAM_CSOFT_CLIP: + end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT + else: + break + + return end_offset + + +cdef inline object getSequenceInRange(bam1_t *src, + uint32_t start, + uint32_t end): + """return python string of the sequence in a bam1_t object. + """ + + cdef uint8_t * p + cdef uint32_t k + cdef char * s + + if not src.core.l_qseq: + return None + + seq = PyBytes_FromStringAndSize(NULL, end - start) + s = seq + p = pysam_bam_get_seq(src) + + for k from start <= k < end: + # equivalent to seq_nt16_str[bam1_seqi(s, i)] (see bam.c) + # note: do not use string literal as it will be a python string + s[k-start] = seq_nt16_str[p[k/2] >> 4 * (1 - k%2) & 0xf] + + return charptr_to_str(seq) + + +cdef inline object getQualitiesInRange(bam1_t *src, + uint32_t start, + uint32_t end): + """return python array of quality values from a bam1_t object""" + + cdef uint8_t * p + cdef uint32_t k + + p = pysam_bam_get_qual(src) + if p[0] == 0xff: + return None + + # 'B': unsigned char + cdef c_array.array result = array.array('B', [0]) + c_array.resize(result, end - start) + + # copy data + memcpy(result.data.as_voidptr, &p[start], end - start) + + return result + + +##################################################################### +## private factory methods +cdef class AlignedSegment +cdef makeAlignedSegment(bam1_t * src, AlignmentFile alignment_file): + '''return an AlignedSegment object constructed from `src`''' + # note that the following does not call __init__ + cdef AlignedSegment dest = AlignedSegment.__new__(AlignedSegment) + dest._delegate = bam_dup1(src) + dest._alignment_file = alignment_file + return dest + + +cdef class PileupColumn +cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos, + int n_pu, AlignmentFile alignment_file): + '''return a PileupColumn object constructed from pileup in `plp` and + setting additional attributes. + + ''' + # note that the following does not call __init__ + cdef PileupColumn dest = PileupColumn.__new__(PileupColumn) + dest._alignment_file = alignment_file + dest.plp = plp + dest.tid = tid + dest.pos = pos + dest.n_pu = n_pu + return dest + +cdef class PileupRead +cdef inline makePileupRead(bam_pileup1_t * src, AlignmentFile alignment_file): + '''return a PileupRead object construted from a bam_pileup1_t * object.''' + cdef PileupRead dest = PileupRead.__new__(PileupRead) + dest._alignment = makeAlignedSegment(src.b, alignment_file) + dest._qpos = src.qpos + dest._indel = src.indel + dest._level = src.level + dest._is_del = src.is_del + dest._is_head = src.is_head + dest._is_tail = src.is_tail + dest._is_refskip = src.is_refskip + return dest + + +# TODO: avoid string copying for getSequenceInRange, reconstituneSequenceFromMD, ... +cdef inline object reconstituteSequenceFromMD(bam1_t * src): + """return reference sequence from MD tag. + + Returns + ------- + + None, if no MD tag is present. + """ + + cdef uint8_t * md_tag_ptr = bam_aux_get(src, "MD") + + if md_tag_ptr == NULL: + return None + + cdef uint32_t start, end + start = getQueryStart(src) + end = getQueryEnd(src) + + # get read sequence, taking into account soft-clipping + r = getSequenceInRange(src, start, end) + cdef char * read_sequence = r + + cdef char * md_tag = bam_aux2Z(md_tag_ptr) + cdef int md_idx = 0 + cdef int r_idx = 0 + cdef int nmatches = 0 + cdef int x = 0 + cdef int s_idx = 0 + + # maximum length of sequence is read length + inserts in MD tag + \0 + cdef uint32_t max_len = end - start + strlen(md_tag) + 1 + cdef char * s = calloc(max_len, sizeof(char)) + if s == NULL: + raise ValueError( + "could not allocated sequence of length %i" % max_len) + while md_tag[md_idx] != 0: + # c is numerical + if md_tag[md_idx] >= 48 and md_tag[md_idx] <= 57: + nmatches *= 10 + nmatches += md_tag[md_idx] - 48 + md_idx += 1 + continue + else: + # save matches up to this point + for x from r_idx <= x < r_idx + nmatches: + s[s_idx] = read_sequence[x] + s_idx += 1 + r_idx += nmatches + nmatches = 0 + + if md_tag[md_idx] == '^': + md_idx += 1 + while md_tag[md_idx] >= 65 and md_tag[md_idx] <= 90: + s[s_idx] = md_tag[md_idx] + s_idx += 1 + md_idx += 1 + else: + # convert mismatch to lower case + s[s_idx] = md_tag[md_idx] + 32 + s_idx += 1 + r_idx += 1 + md_idx += 1 + + # save matches up to this point + for x from r_idx <= x < r_idx + nmatches: + s[s_idx] = read_sequence[x] + s_idx += 1 + + seq = PyBytes_FromStringAndSize(s, s_idx) + free(s) + return seq + + +cdef class AlignedSegment: + '''Class representing an aligned segment. + + This class stores a handle to the samtools C-structure representing + an aligned read. Member read access is forwarded to the C-structure + and converted into python objects. This implementation should be fast, + as only the data needed is converted. + + For write access, the C-structure is updated in-place. This is + not the most efficient way to build BAM entries, as the variable + length data is concatenated and thus needs to be resized if + a field is updated. Furthermore, the BAM entry might be + in an inconsistent state. + + One issue to look out for is that the sequence should always + be set *before* the quality scores. Setting the sequence will + also erase any quality scores that were set previously. + ''' + + # Now only called when instances are created from Python + def __init__(self): + # see bam_init1 + self._delegate = calloc(1, sizeof(bam1_t)) + # allocate some memory. If size is 0, calloc does not return a + # pointer that can be passed to free() so allocate 40 bytes + # for a new read + self._delegate.m_data = 40 + self._delegate.data = calloc( + self._delegate.m_data, 1) + self._delegate.l_data = 0 + + # caching for selected fields + self.cache_query_qualities = None + self.cache_query_alignment_qualities = None + self.cache_query_sequence = None + self.cache_query_alignment_sequence = None + + def __dealloc__(self): + bam_destroy1(self._delegate) + + def __str__(self): + """return string representation of alignment. + + The representation is an approximate :term:`SAM` format, because + an aligned read might not be associated with a :term:`AlignmentFile`. + As a result :term:`tid` is shown instead of the reference name. + Similarly, the tags field is returned in its parsed state. + + To get a valid SAM record, use :meth:`tostring`. + """ + # sam-parsing is done in sam.c/bam_format1_core which + # requires a valid header. + return "\t".join(map(str, (self.query_name, + self.flag, + self.reference_id, + self.reference_start, + self.mapping_quality, + self.cigarstring, + self.next_reference_id, + self.next_reference_start, + self.query_alignment_length, + self.query_sequence, + self.query_qualities, + self.tags))) + + def __copy__(self): + return makeAlignedSegment(self._delegate, self._alignment_file) + + def __deepcopy__(self, memo): + return makeAlignedSegment(self._delegate, self._alignment_file) + + def compare(self, AlignedSegment other): + '''return -1,0,1, if contents in this are binary + <,=,> to *other* + + ''' + + cdef int retval, x + cdef bam1_t *t + cdef bam1_t *o + + t = self._delegate + o = other._delegate + + # uncomment for debugging purposes + # cdef unsigned char * oo, * tt + # tt = (&t.core) + # oo = (&o.core) + # for x from 0 <= x < sizeof( bam1_core_t): print x, tt[x], oo[x] + # tt = (t.data) + # oo = (o.data) + # for x from 0 <= x < max(t.l_data, o.l_data): print x, tt[x], oo[x], chr(tt[x]), chr(oo[x]) + + # Fast-path test for object identity + if t == o: + return 0 + + retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t)) + + if retval: + return retval + # cmp(t.l_data, o.l_data) + retval = (t.l_data > o.l_data) - (t.l_data < o.l_data) + if retval: + return retval + return memcmp(t.data, o.data, t.l_data) + + def __richcmp__(self, AlignedSegment other, int op): + if op == 2: # == operator + return self.compare(other) == 0 + elif op == 3: # != operator + return self.compare(other) != 0 + else: + return NotImplemented + + def __hash__(self): + cdef bam1_t * src + src = self._delegate + # shift and xor values in the core structure + # make sure tid and mtid are shifted by different amounts + # should variable length data be included? + cdef uint32_t hash_value = src.core.tid << 24 ^ \ + src.core.pos << 16 ^ \ + src.core.qual << 8 ^ \ + src.core.flag ^ \ + src.core.isize << 24 ^ \ + src.core.mtid << 16 ^ \ + src.core.mpos << 8 + + return hash_value + + cpdef bytes tostring(self, AlignmentFile_t htsfile): + """returns a string representation of the aligned segment. + + The output format is valid SAM format if + + Parameters + ---------- + + htsfile -- AlignmentFile object to map numerical + identifers to chromosome names. + """ + + cdef cython.str cigarstring, mate_ref, ref + if self.reference_id < 0: + ref = "*" + else: + ref = htsfile.getrname(self.reference_id) + + if self.rnext < 0: + mate_ref = "*" + elif self.rnext == self.reference_id: + mate_ref = "=" + else: + mate_ref = htsfile.getrname(self.rnext) + + cigarstring = self.cigarstring if( + self.cigarstring is not None) else "*" + ret = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( + self.query_name, self.flag, + ref, self.pos + 1, self.mapq, + cigarstring, + mate_ref, self.mpos + 1, + self.template_length, + self.seq, self.qual, + self.get_tag_string()) + return ret + + cdef bytes get_tag_string(self): + cdef tuple tag + cdef cython.str ret = "\t".join([ + TagToString(tag) for tag in + self.get_tags(with_value_type=True)]) + return ret + + ######################################################## + ## Basic attributes in order of appearance in SAM format + property query_name: + """the query template name (None if not present)""" + def __get__(self): + cdef bam1_t * src + src = self._delegate + if pysam_get_l_qname(src) == 0: + return None + return charptr_to_str(pysam_bam_get_qname(src)) + + def __set__(self, qname): + if qname is None or len(qname) == 0: + return + qname = force_bytes(qname) + cdef bam1_t * src + cdef int l + cdef char * p + + src = self._delegate + p = pysam_bam_get_qname(src) + + # the qname is \0 terminated + l = len(qname) + 1 + pysam_bam_update(src, + pysam_get_l_qname(src), + l, + p) + + + pysam_set_l_qname(src, l) + + # re-acquire pointer to location in memory + # as it might have moved + p = pysam_bam_get_qname(src) + + strncpy(p, qname, l) + + property flag: + """properties flag""" + def __get__(self): + return pysam_get_flag(self._delegate) + def __set__(self, flag): + pysam_set_flag(self._delegate, flag) + + property reference_name: + """:term:`reference` name (None if no AlignmentFile is associated)""" + def __get__(self): + if self._alignment_file is not None: + return self._alignment_file.getrname(self._delegate.core.tid) + return None + + property reference_id: + """:term:`reference` ID + + .. note:: + + This field contains the index of the reference sequence in + the sequence dictionary. To obtain the name of the + reference sequence, use + :meth:`pysam.AlignmentFile.getrname()` + + """ + def __get__(self): return self._delegate.core.tid + def __set__(self, tid): self._delegate.core.tid = tid + + property reference_start: + """0-based leftmost coordinate""" + def __get__(self): return self._delegate.core.pos + def __set__(self, pos): + ## setting the position requires updating the "bin" attribute + cdef bam1_t * src + src = self._delegate + src.core.pos = pos + if pysam_get_n_cigar(src): + pysam_set_bin(src, + hts_reg2bin( + src.core.pos, + bam_endpos(src), + 14, + 5)) + else: + pysam_set_bin(src, + hts_reg2bin( + src.core.pos, + src.core.pos + 1, + 14, + 5)) + + property mapping_quality: + """mapping quality""" + def __get__(self): + return pysam_get_qual(self._delegate) + def __set__(self, qual): + pysam_set_qual(self._delegate, qual) + + property cigarstring: + '''the :term:`cigar` alignment as a string. + + The cigar string is a string of alternating integers + and characters denoting the length and the type of + an operation. + + .. note:: + The order length,operation is specified in the + SAM format. It is different from the order of + the :attr:`cigar` property. + + Returns None if not present. + + To unset the cigarstring, assign None or the + empty string. + ''' + def __get__(self): + c = self.cigartuples + if c is None: + return None + # reverse order + else: + return "".join([ "%i%c" % (y,CODE2CIGAR[x]) for x,y in c]) + + def __set__(self, cigar): + if cigar is None or len(cigar) == 0: + self.cigartuples = [] + else: + parts = CIGAR_REGEX.findall(cigar) + # reverse order + self.cigartuples = [(CIGAR2CODE[ord(y)], int(x)) for x,y in parts] + + # TODO + # property cigar: + # """the cigar alignment""" + + property next_reference_id: + """the :term:`reference` id of the mate/next read.""" + def __get__(self): return self._delegate.core.mtid + def __set__(self, mtid): + self._delegate.core.mtid = mtid + + property next_reference_name: + """:term:`reference` name of the mate/next read (None if no + AlignmentFile is associated)""" + def __get__(self): + if self._alignment_file is not None: + return self._alignment_file.getrname(self._delegate.core.mtid) + return None + + property next_reference_start: + """the position of the mate/next read.""" + def __get__(self): + return self._delegate.core.mpos + def __set__(self, mpos): + self._delegate.core.mpos = mpos + + property query_length: + """the length of the query/read. + + This value corresponds to the length of the sequence supplied + in the BAM/SAM file. The length of a query is 0 if there is no + sequence in the BAM/SAM file. In those cases, the read length + can be inferred from the CIGAR alignment, see + :meth:`pysam.AlignmentFile.infer_query_length.`. + + The length includes soft-clipped bases and is equal to + ``len(query_sequence)``. + + This property is read-only but can be set by providing a + sequence. + + Returns 0 if not available. + + """ + def __get__(self): + return self._delegate.core.l_qseq + + property template_length: + """the observed query template length""" + def __get__(self): + return self._delegate.core.isize + def __set__(self, isize): + self._delegate.core.isize = isize + + property query_sequence: + """read sequence bases, including :term:`soft clipped` bases + (None if not present). + + Note that assigning to seq will invalidate any quality scores. + Thus, to in-place edit the sequence and quality scores, copies of + the quality scores need to be taken. Consider trimming for example:: + + q = read.query_qualities + read.query_squence = read.query_sequence[5:10] + read.query_qualities = q[5:10] + + The sequence is returned as it is stored in the BAM file. Some mappers + might have stored a reverse complement of the original read + sequence. + """ + def __get__(self): + if self.cache_query_sequence: + return self.cache_query_sequence + + cdef bam1_t * src + cdef char * s + src = self._delegate + + if src.core.l_qseq == 0: + return None + + self.cache_query_sequence = getSequenceInRange( + src, 0, src.core.l_qseq) + return self.cache_query_sequence + + def __set__(self, seq): + # samtools manages sequence and quality length memory together + # if no quality information is present, the first byte says 0xff. + cdef bam1_t * src + cdef uint8_t * p + cdef char * s + cdef int l, k + cdef Py_ssize_t nbytes_new, nbytes_old + + if seq == None: + l = 0 + else: + l = len(seq) + seq = force_bytes(seq) + + src = self._delegate + + # as the sequence is stored in half-bytes, the total length (sequence + # plus quality scores) is (l+1)/2 + l + nbytes_new = (l + 1) / 2 + l + nbytes_old = (src.core.l_qseq + 1) / 2 + src.core.l_qseq + + # acquire pointer to location in memory + p = pysam_bam_get_seq(src) + src.core.l_qseq = l + + # change length of data field + pysam_bam_update(src, + nbytes_old, + nbytes_new, + p) + + if l > 0: + # re-acquire pointer to location in memory + # as it might have moved + p = pysam_bam_get_seq(src) + for k from 0 <= k < nbytes_new: + p[k] = 0 + # convert to C string + s = seq + for k from 0 <= k < l: + p[k/2] |= seq_nt16_table[s[k]] << 4 * (1 - k % 2) + + # erase qualities + p = pysam_bam_get_qual(src) + p[0] = 0xff + + self.cache_query_sequence = seq + + # clear cached values for quality values + self.cache_query_qualities = None + self.cache_query_alignment_qualities = None + + property query_qualities: + """read sequence base qualities, including :term:`soft + clipped` bases (None if not present). + + Quality scores are returned as a python array of unsigned + chars. Note that this is not the ASCII-encoded value typically + seen in FASTQ or SAM formatted files. Thus, no offset of 33 + needs to be subtracted. + + Note that to set quality scores the sequence has to be set + beforehand as this will determine the expected length of the + quality score array. + + This method raises a ValueError if the length of the + quality scores and the sequence are not the same. + + """ + def __get__(self): + + if self.cache_query_qualities: + return self.cache_query_qualities + + cdef bam1_t * src + cdef char * q + + src = self._delegate + + if src.core.l_qseq == 0: + return None + + self.cache_query_qualities = getQualitiesInRange(src, 0, src.core.l_qseq) + return self.cache_query_qualities + + def __set__(self, qual): + + # note that memory is already allocated via setting the sequence + # hence length match of sequence and quality needs is checked. + cdef bam1_t * src + cdef uint8_t * p + cdef int l + + src = self._delegate + p = pysam_bam_get_qual(src) + if qual is None or len(qual) == 0: + # if absent and there is a sequence: set to 0xff + if src.core.l_qseq != 0: + p[0] = 0xff + return + + # check for length match + l = len(qual) + if src.core.l_qseq != l: + raise ValueError( + "quality and sequence mismatch: %i != %i" % + (l, src.core.l_qseq)) + + # create a python array object filling it + # with the quality scores + + # NB: should avoid this copying if qual is + # already of the correct type. + cdef c_array.array result = c_array.array('B', qual) + + # copy data + memcpy(p, result.data.as_voidptr, l) + + # save in cache + self.cache_query_qualities = qual + + property bin: + """properties bin""" + def __get__(self): + return pysam_get_bin(self._delegate) + def __set__(self, bin): + pysam_set_bin(self._delegate, bin) + + + ########################################################## + # Derived simple attributes. These are simple attributes of + # AlignedSegment getting and setting values. + ########################################################## + # 1. Flags + ########################################################## + property is_paired: + """true if read is paired in sequencing""" + def __get__(self): + return (self.flag & BAM_FPAIRED) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FPAIRED) + + property is_proper_pair: + """true if read is mapped in a proper pair""" + def __get__(self): + return (self.flag & BAM_FPROPER_PAIR) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FPROPER_PAIR) + property is_unmapped: + """true if read itself is unmapped""" + def __get__(self): + return (self.flag & BAM_FUNMAP) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FUNMAP) + property mate_is_unmapped: + """true if the mate is unmapped""" + def __get__(self): + return (self.flag & BAM_FMUNMAP) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FMUNMAP) + property is_reverse: + """true if read is mapped to reverse strand""" + def __get__(self): + return (self.flag & BAM_FREVERSE) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FREVERSE) + property mate_is_reverse: + """true is read is mapped to reverse strand""" + def __get__(self): + return (self.flag & BAM_FMREVERSE) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FMREVERSE) + property is_read1: + """true if this is read1""" + def __get__(self): + return (self.flag & BAM_FREAD1) != 0 + def __set__(self,val): + pysam_update_flag(self._delegate, val, BAM_FREAD1) + property is_read2: + """true if this is read2""" + def __get__(self): + return (self.flag & BAM_FREAD2) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FREAD2) + property is_secondary: + """true if not primary alignment""" + def __get__(self): + return (self.flag & BAM_FSECONDARY) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FSECONDARY) + property is_qcfail: + """true if QC failure""" + def __get__(self): + return (self.flag & BAM_FQCFAIL) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FQCFAIL) + property is_duplicate: + """true if optical or PCR duplicate""" + def __get__(self): + return (self.flag & BAM_FDUP) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FDUP) + property is_supplementary: + """true if this is a supplementary alignment""" + def __get__(self): + return (self.flag & BAM_FSUPPLEMENTARY) != 0 + def __set__(self, val): + pysam_update_flag(self._delegate, val, BAM_FSUPPLEMENTARY) + + # 2. Coordinates and lengths + property reference_end: + '''aligned reference position of the read on the reference genome. + + reference_end points to one past the last aligned residue. + Returns None if not available (read is unmapped or no cigar + alignment present). + + ''' + def __get__(self): + cdef bam1_t * src + src = self._delegate + if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0: + return None + return bam_endpos(src) + + property reference_length: + '''aligned length of the read on the reference genome. + + This is equal to `aend - pos`. Returns None if not available.''' + def __get__(self): + cdef bam1_t * src + src = self._delegate + if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0: + return None + return bam_endpos(src) - \ + self._delegate.core.pos + + property query_alignment_sequence: + """aligned portion of the read. + + This is a substring of :attr:`seq` that excludes flanking + bases that were :term:`soft clipped` (None if not present). It + is equal to ``seq[qstart:qend]``. + + SAM/BAM files may include extra flanking bases that are not + part of the alignment. These bases may be the result of the + Smith-Waterman or other algorithms, which may not require + alignments that begin at the first residue or end at the last. + In addition, extra sequencing adapters, multiplex identifiers, + and low-quality bases that were not considered for alignment + may have been retained. + + """ + + def __get__(self): + if self.cache_query_alignment_sequence: + return self.cache_query_alignment_sequence + + cdef bam1_t * src + cdef uint32_t start, end + + src = self._delegate + + if src.core.l_qseq == 0: + return None + + start = getQueryStart(src) + end = getQueryEnd(src) + + self.cache_query_alignment_sequence = getSequenceInRange(src, start, end) + return self.cache_query_alignment_sequence + + property query_alignment_qualities: + """aligned query sequence quality values (None if not present). These + are the quality values that correspond to :attr:`query`, that + is, they exclude qualities of :term:`soft clipped` bases. This + is equal to ``qual[qstart:qend]``. + + Quality scores are returned as a python array of unsigned + chars. Note that this is not the ASCII-encoded value typically + seen in FASTQ or SAM formatted files. Thus, no offset of 33 + needs to be subtracted. + + This property is read-only. + + """ + def __get__(self): + + if self.cache_query_alignment_qualities: + return self.cache_query_alignment_qualities + + cdef bam1_t * src + cdef uint32_t start, end + + src = self._delegate + + if src.core.l_qseq == 0: + return None + + start = getQueryStart(src) + end = getQueryEnd(src) + self.cache_query_alignment_qualities = \ + getQualitiesInRange(src, start, end) + return self.cache_query_alignment_qualities + + property query_alignment_start: + """start index of the aligned query portion of the sequence (0-based, + inclusive). + + This the index of the first base in :attr:`seq` that is not + soft-clipped. + + """ + def __get__(self): + return getQueryStart(self._delegate) + + property query_alignment_end: + """end index of the aligned query portion of the sequence (0-based, + exclusive)""" + def __get__(self): + return getQueryEnd(self._delegate) + + property query_alignment_length: + """length of the aligned query sequence. + + This is equal to :attr:`qend` - :attr:`qstart`""" + def __get__(self): + cdef bam1_t * src + src = self._delegate + return getQueryEnd(src) - getQueryStart(src) + + ##################################################### + # Computed properties + + def get_reference_positions(self, full_length=False): + """a list of reference positions that this read aligns to. + + By default, this method only returns positions in the + reference that are within the alignment. If *full_length* is + set, None values will be included for any soft-clipped or + unaligned positions within the read. The returned list will + thus be of the same length as the read. + + """ + cdef uint32_t k, i, pos + cdef int op + cdef uint32_t * cigar_p + cdef bam1_t * src + cdef bint _full = full_length + + src = self._delegate + if pysam_get_n_cigar(src) == 0: + return [] + + result = [] + pos = src.core.pos + cigar_p = pysam_bam_get_cigar(src) + + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + l = cigar_p[k] >> BAM_CIGAR_SHIFT + + if op == BAM_CSOFT_CLIP or op == BAM_CINS: + if _full: + for i from 0 <= i < l: + result.append(None) + elif op == BAM_CMATCH: + for i from pos <= i < pos + l: + result.append(i) + pos += l + elif op == BAM_CDEL or op == BAM_CREF_SKIP: + pos += l + + return result + + def infer_query_length(self, always=True): + """inferred read length from CIGAR string. + + If *always* is set to True, the read length + will be always inferred. If set to False, the length + of the read sequence will be returned if it is + available. + + Returns None if CIGAR string is not present. + """ + + cdef uint32_t * cigar_p + cdef bam1_t * src + + src = self._delegate + + if not always and src.core.l_qseq: + return src.core.l_qseq + + return calculateQueryLength(src) + + def get_reference_sequence(self): + """return the reference sequence. + + This method requires the MD tag to be set. + """ + return reconstituteSequenceFromMD(self._delegate) + + + def get_aligned_pairs(self, matches_only=False, with_seq=False): + """a list of aligned read (query) and reference positions. + + For inserts, deletions, skipping either query or reference + position may be None. + + Padding is currently not supported and leads to an exception. + + Parameters + ---------- + + matches_only : bool + + If True, only matched bases are returned - no None on either + side. + + with_seq : bool + + If True, return a third element in the tuple containing the + reference sequence. Substitutions are lower-case. This option + requires an MD tag to be present. + + Returns + ------- + + aligned_pairs : list of tuples + + """ + cdef uint32_t k, i, pos, qpos, r_idx + cdef int op + cdef uint32_t * cigar_p + cdef bam1_t * src = self._delegate + cdef bint _matches_only = bool(matches_only) + cdef bint _with_seq = bool(with_seq) + + # TODO: this method performs no checking and assumes that + # read sequence, cigar and MD tag are consistent. + + if _with_seq: + ref_seq = reconstituteSequenceFromMD(src) + if ref_seq is None: + raise ValueError("MD tag not present") + + r_idx = 0 + + if pysam_get_n_cigar(src) == 0: + return [] + + result = [] + pos = src.core.pos + qpos = 0 + cigar_p = pysam_bam_get_cigar(src) + + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + l = cigar_p[k] >> BAM_CIGAR_SHIFT + + if op == BAM_CMATCH or op == BAM_CEQUAL or op == BAM_CDIFF: + if _with_seq: + for i from pos <= i < pos + l: + result.append((qpos, i, ref_seq[r_idx])) + r_idx += 1 + qpos += 1 + else: + for i from pos <= i < pos + l: + result.append((qpos, i)) + qpos += 1 + pos += l + + elif op == BAM_CINS or op == BAM_CSOFT_CLIP: + if not _matches_only: + if _with_seq: + for i from pos <= i < pos + l: + result.append((qpos, None, None)) + qpos += 1 + else: + for i from pos <= i < pos + l: + result.append((qpos, None)) + qpos += 1 + else: + qpos += l + + elif op == BAM_CDEL or op == BAM_CREF_SKIP: + if not _matches_only: + if _with_seq: + for i from pos <= i < pos + l: + result.append((None, i, ref_seq[r_idx])) + r_idx += 1 + else: + for i from pos <= i < pos + l: + result.append((None, i)) + pos += l + + elif op == BAM_CHARD_CLIP: + pass # advances neither + + elif op == BAM_CPAD: + raise NotImplementedError( + "Padding (BAM_CPAD, 6) is currently not supported. " + "Please implement. Sorry about that.") + + return result + + def get_blocks(self): + """ a list of start and end positions of + aligned gapless blocks. + + The start and end positions are in genomic + coordinates. + + Blocks are not normalized, i.e. two blocks + might be directly adjacent. This happens if + the two blocks are separated by an insertion + in the read. + """ + + cdef uint32_t k, pos, l + cdef int op + cdef uint32_t * cigar_p + cdef bam1_t * src + + src = self._delegate + if pysam_get_n_cigar(src) == 0: + return [] + + result = [] + pos = src.core.pos + cigar_p = pysam_bam_get_cigar(src) + l = 0 + + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + l = cigar_p[k] >> BAM_CIGAR_SHIFT + if op == BAM_CMATCH: + result.append((pos, pos + l)) + pos += l + elif op == BAM_CDEL or op == BAM_CREF_SKIP: + pos += l + + return result + + def get_overlap(self, uint32_t start, uint32_t end): + """return number of aligned bases of read overlapping the interval + *start* and *end* on the reference sequence. + + Return None if cigar alignment is not available. + """ + cdef uint32_t k, i, pos, overlap + cdef int op, o + cdef uint32_t * cigar_p + cdef bam1_t * src + + overlap = 0 + + src = self._delegate + if pysam_get_n_cigar(src) == 0: + return None + pos = src.core.pos + o = 0 + + cigar_p = pysam_bam_get_cigar(src) + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + l = cigar_p[k] >> BAM_CIGAR_SHIFT + + if op == BAM_CMATCH: + o = min( pos + l, end) - max( pos, start ) + if o > 0: overlap += o + + if op == BAM_CMATCH or op == BAM_CDEL or op == BAM_CREF_SKIP: + pos += l + + return overlap + + ##################################################### + ## Unsorted as yet + # TODO: capture in CIGAR object + property cigartuples: + """the :term:`cigar` alignment. The alignment + is returned as a list of tuples of (operation, length). + + If the alignment is not present, None is returned. + + The operations are: + + +-----+--------------+-----+ + |M |BAM_CMATCH |0 | + +-----+--------------+-----+ + |I |BAM_CINS |1 | + +-----+--------------+-----+ + |D |BAM_CDEL |2 | + +-----+--------------+-----+ + |N |BAM_CREF_SKIP |3 | + +-----+--------------+-----+ + |S |BAM_CSOFT_CLIP|4 | + +-----+--------------+-----+ + |H |BAM_CHARD_CLIP|5 | + +-----+--------------+-----+ + |P |BAM_CPAD |6 | + +-----+--------------+-----+ + |= |BAM_CEQUAL |7 | + +-----+--------------+-----+ + |X |BAM_CDIFF |8 | + +-----+--------------+-----+ + + .. note:: + The output is a list of (operation, length) tuples, such as + ``[(0, 30)]``. + This is different from the SAM specification and + the :attr:`cigarstring` property, which uses a + (length, operation) order, for example: ``30M``. + + To unset the cigar property, assign an empty list + or None. + """ + def __get__(self): + cdef uint32_t * cigar_p + cdef bam1_t * src + cdef uint32_t op, l + cdef int k + + src = self._delegate + if pysam_get_n_cigar(src) == 0: + return None + + cigar = [] + + cigar_p = pysam_bam_get_cigar(src); + for k from 0 <= k < pysam_get_n_cigar(src): + op = cigar_p[k] & BAM_CIGAR_MASK + l = cigar_p[k] >> BAM_CIGAR_SHIFT + cigar.append((op, l)) + return cigar + + def __set__(self, values): + cdef uint32_t * p + cdef bam1_t * src + cdef op, l + cdef int k, ncigar + + k = 0 + + src = self._delegate + + # get location of cigar string + p = pysam_bam_get_cigar(src) + + # empty values for cigar string + if values is None: + values = [] + + ncigar = len(values) + # create space for cigar data within src.data + pysam_bam_update(src, + pysam_get_n_cigar(src) * 4, + ncigar * 4, + p) + + # length is number of cigar operations, not bytes + pysam_set_n_cigar(src, ncigar) + + # re-acquire pointer to location in memory + # as it might have moved + p = pysam_bam_get_cigar(src) + + # insert cigar operations + for op, l in values: + p[k] = l << BAM_CIGAR_SHIFT | op + k += 1 + + ## setting the cigar string requires updating the bin + pysam_set_bin(src, + hts_reg2bin( + src.core.pos, + bam_endpos(src), + 14, + 5)) + + + cpdef set_tag(self, + tag, + value, + value_type=None, + replace=True): + """sets a particular field *tag* to *value* in the optional alignment + section. + + *value_type* describes the type of *value* that is to entered + into the alignment record.. It can be set explicitly to one + of the valid one-letter type codes. If unset, an appropriate + type will be chosen automatically. + + An existing value of the same *tag* will be overwritten unless + replace is set to False. This is usually not recommened as a + tag may only appear once in the optional alignment section. + + If *value* is None, the tag will be deleted. + """ + + cdef int value_size + cdef uint8_t * value_ptr + cdef uint8_t *existing_ptr + cdef uint8_t typecode + cdef float float_value + cdef double double_value + cdef int32_t int_value + cdef bam1_t * src = self._delegate + cdef char * _value_type + cdef c_array.array array_value + cdef object buffer + + if len(tag) != 2: + raise ValueError('Invalid tag: %s' % tag) + + tag = force_bytes(tag) + if replace: + existing_ptr = bam_aux_get(src, tag) + if existing_ptr: + bam_aux_del(src, existing_ptr) + + # setting value to None deletes a tag + if value is None: + return + + typecode = get_value_code(value, value_type) + if typecode == 0: + raise ValueError("can't guess type or invalid type code specified") + + # Not Endian-safe, but then again neither is samtools! + if typecode == 'Z': + value = force_bytes(value) + value_ptr = value + value_size = len(value)+1 + elif typecode == 'i': + int_value = value + value_ptr = &int_value + value_size = sizeof(int32_t) + elif typecode == 'd': + double_value = value + value_ptr = &double_value + value_size = sizeof(double) + elif typecode == 'f': + float_value = value + value_ptr = &float_value + value_size = sizeof(float) + elif typecode == 'B': + # the following goes through python, needs to be cleaned up + # pack array using struct + if value_type is None: + fmt, args = packTags([(tag, value)]) + else: + fmt, args = packTags([(tag, value, value_type)]) + + # remove tag and type code as set by bam_aux_append + # first four chars of format (<2sc) + fmt = '<' + fmt[4:] + # first two values to pack + args = args[2:] + value_size = struct.calcsize(fmt) + # buffer will be freed when object goes out of scope + buffer = ctypes.create_string_buffer(value_size) + struct.pack_into(fmt, buffer, 0, *args) + # bam_aux_append copies data from value_ptr + bam_aux_append(src, + tag, + typecode, + value_size, + buffer.raw) + return + else: + raise ValueError('unsupported value_type in set_option') + + bam_aux_append(src, + tag, + typecode, + value_size, + value_ptr) + + cpdef has_tag(self, tag): + """returns true if the optional alignment section + contains a given *tag*.""" + cdef uint8_t * v + cdef int nvalues + btag = force_bytes(tag) + v = bam_aux_get(self._delegate, btag) + return v != NULL + + cpdef get_tag(self, tag, with_value_type=False): + """ + retrieves data from the optional alignment section + given a two-letter *tag* denoting the field. + + The returned value is cast into an appropriate python type. + + This method is the fastest way to access the optional + alignment section if only few tags need to be retrieved. + + Parameters + ---------- + + tag : + data tag. + + with_value_type : Optional[bool] + if set to True, the return value is a tuple of (tag value, type code). + (default False) + + Returns + ------- + + A python object with the value of the `tag`. The type of the + object depends on the data type in the data record. + + Raises + ------ + + KeyError + If `tag` is not present, a KeyError is raised. + + """ + cdef uint8_t * v + cdef int nvalues + btag = force_bytes(tag) + v = bam_aux_get(self._delegate, btag) + if v == NULL: + raise KeyError("tag '%s' not present" % tag) + if chr(v[0]) == "B": + auxtype = chr(v[0]) + chr(v[1]) + else: + auxtype = chr(v[0]) + + if auxtype == 'c' or auxtype == 'C' or auxtype == 's' or auxtype == 'S': + value = bam_aux2i(v) + elif auxtype == 'i' or auxtype == 'I': + value = bam_aux2i(v) + elif auxtype == 'f' or auxtype == 'F': + value = bam_aux2f(v) + elif auxtype == 'd' or auxtype == 'D': + value = bam_aux2f(v) + elif auxtype == 'A': + # there might a more efficient way + # to convert a char into a string + value = '%c' % bam_aux2A(v) + elif auxtype == 'Z': + value = charptr_to_str(bam_aux2Z(v)) + elif auxtype[0] == 'B': + bytesize, nvalues, values = convert_binary_tag(v + 1) + value = values + else: + raise ValueError("unknown auxiliary type '%s'" % auxtype) + + if with_value_type: + return (value, auxtype) + else: + return value + + def get_tags(self, with_value_type=False): + """the fields in the optional aligment section. + + Returns a list of all fields in the optional + alignment section. Values are converted to appropriate python + values. For example: + + [(NM, 2), (RG, "GJP00TM04")] + + If *with_value_type* is set, the value type as encode in + the AlignedSegment record will be returned as well: + + [(NM, 2, "i"), (RG, "GJP00TM04", "Z")] + + This method will convert all values in the optional alignment + section. When getting only one or few tags, please see + :meth:`get_tag` for a quicker way to achieve this. + + """ + + cdef char * ctag + cdef bam1_t * src + cdef uint8_t * s + cdef char auxtag[3] + cdef char auxtype + cdef uint8_t byte_size + cdef int32_t nvalues + + src = self._delegate + if src.l_data == 0: + return [] + s = pysam_bam_get_aux(src) + result = [] + auxtag[2] = 0 + while s < (src.data + src.l_data): + # get tag + auxtag[0] = s[0] + auxtag[1] = s[1] + s += 2 + auxtype = s[0] + if auxtype in ('c', 'C'): + value = bam_aux2i(s) + s += 1 + elif auxtype in ('s', 'S'): + value = bam_aux2i(s) + s += 2 + elif auxtype in ('i', 'I'): + value = bam_aux2i(s) + s += 4 + elif auxtype == 'f': + value = bam_aux2f(s) + s += 4 + elif auxtype == 'd': + value = bam_aux2f(s) + s += 8 + elif auxtype == 'A': + value = "%c" % bam_aux2A(s) + s += 1 + elif auxtype in ('Z', 'H'): + value = charptr_to_str(bam_aux2Z(s)) + # +1 for NULL terminated string + s += len(value) + 1 + elif auxtype == 'B': + s += 1 + byte_size, nvalues, value = convert_binary_tag(s) + # 5 for 1 char and 1 int + s += 5 + (nvalues * byte_size) - 1 + else: + raise KeyError("unknown type '%s'" % auxtype) + + s += 1 + + if with_value_type: + result.append((charptr_to_str(auxtag), value, auxtype)) + else: + result.append((charptr_to_str(auxtag), value)) + + return result + + def set_tags(self, tags): + """sets the fields in the optional alignmest section with + a list of (tag, value) tuples. + + The :term:`value type` of the values is determined from the + python type. Optionally, a type may be given explicitly as + a third value in the tuple, For example: + + x.set_tags([(NM, 2, "i"), (RG, "GJP00TM04", "Z")] + + This method will not enforce the rule that the same tag may appear + only once in the optional alignment section. + """ + + cdef bam1_t * src + cdef uint8_t * s + cdef char * temp + cdef int new_size = 0 + cdef int old_size + src = self._delegate + + # convert and pack the data + if tags is not None and len(tags) > 0: + fmt, args = packTags(tags) + new_size = struct.calcsize(fmt) + buffer = ctypes.create_string_buffer(new_size) + struct.pack_into(fmt, + buffer, + 0, + *args) + + # delete the old data and allocate new space. + # If total_size == 0, the aux field will be + # empty + old_size = pysam_bam_get_l_aux(src) + pysam_bam_update(src, + old_size, + new_size, + pysam_bam_get_aux(src)) + + # copy data only if there is any + if new_size > 0: + + # get location of new data + s = pysam_bam_get_aux(src) + + # check if there is direct path from buffer.raw to tmp + p = buffer.raw + # create handle to make sure buffer stays alive long + # enough for memcpy, see issue 129 + temp = p + memcpy(s, temp, new_size) + + + ######################################################## + # Compatibility Accessors + # Functions, properties for compatibility with pysam < 0.8 + # + # Several options + # change the factory functions according to API + # * requires code changes throughout, incl passing + # handles to factory functions + # subclass functions and add attributes at runtime + # e.g.: AlignedSegments.qname = AlignedSegments.query_name + # * will slow down the default interface + # explicit declaration of getters/setters + ######################################################## + property qname: + def __get__(self): return self.query_name + def __set__(self, v): self.query_name = v + property tid: + def __get__(self): return self.reference_id + def __set__(self, v): self.reference_id = v + property pos: + def __get__(self): return self.reference_start + def __set__(self, v): self.reference_start = v + property mapq: + def __get__(self): return self.mapping_quality + def __set__(self, v): self.mapping_quality = v + property rnext: + def __get__(self): return self.next_reference_id + def __set__(self, v): self.next_reference_id = v + property pnext: + def __get__(self): + return self.next_reference_start + def __set__(self, v): + self.next_reference_start = v + property cigar: + def __get__(self): + r = self.cigartuples + if r is None: + r = [] + return r + def __set__(self, v): self.cigartuples = v + property tlen: + def __get__(self): + return self.template_length + def __set__(self, v): + self.template_length = v + property seq: + def __get__(self): + return self.query_sequence + def __set__(self, v): + self.query_sequence = v + property qual: + def __get__(self): + return array_to_qualitystring(self.query_qualities) + def __set__(self, v): + self.query_qualities = qualitystring_to_array(v) + property alen: + def __get__(self): + return self.reference_length + def __set__(self, v): + self.reference_length = v + property aend: + def __get__(self): + return self.reference_end + def __set__(self, v): + self.reference_end = v + property rlen: + def __get__(self): + return self.query_length + def __set__(self, v): + self.query_length = v + property query: + def __get__(self): + return self.query_alignment_sequence + def __set__(self, v): + self.query_alignment_sequence = v + property qqual: + def __get__(self): + return array_to_qualitystring(self.query_alignment_qualities) + def __set__(self, v): + self.query_alignment_qualities = qualitystring_to_array(v) + property qstart: + def __get__(self): + return self.query_alignment_start + def __set__(self, v): + self.query_alignment_start = v + property qend: + def __get__(self): + return self.query_alignment_end + def __set__(self, v): + self.query_alignment_end = v + property qlen: + def __get__(self): + return self.query_alignment_length + def __set__(self, v): + self.query_alignment_length = v + property mrnm: + def __get__(self): + return self.next_reference_id + def __set__(self, v): + self.next_reference_id = v + property mpos: + def __get__(self): + return self.next_reference_start + def __set__(self, v): + self.next_reference_start = v + property rname: + def __get__(self): + return self.reference_id + def __set__(self, v): + self.reference_id = v + property isize: + def __get__(self): + return self.template_length + def __set__(self, v): + self.template_length = v + property blocks: + def __get__(self): + return self.get_blocks() + property aligned_pairs: + def __get__(self): + return self.get_aligned_pairs() + property inferred_length: + def __get__(self): + return self.infer_query_length() + property positions: + def __get__(self): + return self.get_reference_positions() + property tags: + def __get__(self): + return self.get_tags() + def __set__(self, tags): + self.set_tags(tags) + def overlap(self): + return self.get_overlap() + def opt(self, tag): + return self.get_tag(tag) + def setTag(self, tag, value, value_type=None, replace=True): + return self.set_tag(tag, value, value_type, replace) + + +cdef class PileupColumn: + '''A pileup of reads at a particular reference sequence postion + (:term:`column`). A pileup column contains all the reads that map + to a certain target base. + + This class is a proxy for results returned by the samtools pileup + engine. If the underlying engine iterator advances, the results + of this column will change. + + ''' + def __init__(self): + raise TypeError("this class cannot be instantiated from Python") + + def __str__(self): + return "\t".join(map(str, + (self.reference_id, + self.reference_pos, + self.nsegments))) +\ + "\n" +\ + "\n".join(map(str, self.pileups)) + + property reference_id: + '''the reference sequence number as defined in the header''' + def __get__(self): + return self.tid + + property reference_name: + """:term:`reference` name (None if no AlignmentFile is associated)""" + def __get__(self): + if self._alignment_file is not None: + return self._alignment_file.getrname(self.tid) + return None + + property nsegments: + '''number of reads mapping to this column.''' + def __get__(self): + return self.n_pu + def __set__(self, n): + self.n_pu = n + + property reference_pos: + '''the position in the reference sequence (0-based).''' + def __get__(self): + return self.pos + + property pileups: + '''list of reads (:class:`pysam.PileupRead`) aligned to this column''' + def __get__(self): + cdef int x + pileups = [] + + if self.plp == NULL or self.plp[0] == NULL: + raise ValueError("PileupColumn accessed after iterator finished") + + # warning: there could be problems if self.n and self.buf are + # out of sync. + for x from 0 <= x < self.n_pu: + pileups.append(makePileupRead(&(self.plp[0][x]), self._alignment_file)) + return pileups + + ######################################################## + # Compatibility Accessors + # Functions, properties for compatibility with pysam < 0.8 + ######################################################## + property pos: + def __get__(self): + return self.reference_pos + def __set__(self, v): + self.reference_pos = v + + property tid: + def __get__(self): + return self.reference_id + def __set__(self, v): + self.reference_id = v + + property n: + def __get__(self): + return self.nsegments + def __set__(self, v): + self.nsegments = v + + +cdef class PileupRead: + '''Representation of a read aligned to a particular position in the + reference sequence. + + ''' + + def __init__(self): + raise TypeError( + "this class cannot be instantiated from Python") + + def __str__(self): + return "\t".join( + map(str, + (self.alignment, self.query_position, + self.indel, self.level, + self.is_del, self.is_head, + self.is_tail, self.is_refskip))) + + property alignment: + """a :class:`pysam.AlignedSegment` object of the aligned read""" + def __get__(self): + return self._alignment + + property query_position: + """position of the read base at the pileup site, 0-based. + None if is_del or is_refskip is set. + + """ + def __get__(self): + if self.is_del or self.is_refskip: + return None + else: + return self._qpos + + property indel: + """indel length; 0 for no indel, positive for ins and negative for del""" + def __get__(self): + return self._indel + + property level: + """the level of the read in the "viewer" mode""" + def __get__(self): + return self._level + + property is_del: + """1 iff the base on the padded read is a deletion""" + def __get__(self): + return self._is_del + + property is_head: + def __get__(self): + return self._is_head + + property is_tail: + def __get__(self): + return self._is_tail + + property is_refskip: + def __get__(self): + return self._is_refskip + +__all__ = [ + "AlignedSegment", + "PileupColumn", + "PileupRead"] diff --git a/pysam/calignmentfile.pxd b/pysam/calignmentfile.pxd index b75c1fd..a7e956d 100644 --- a/pysam/calignmentfile.pxd +++ b/pysam/calignmentfile.pxd @@ -4,46 +4,19 @@ from libc.stdlib cimport malloc, calloc, realloc, free from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup from libc.stdio cimport FILE, printf -from cfaidx cimport faidx_t, Fastafile -from chtslib cimport * +from pysam.cfaidx cimport faidx_t, Fastafile +from pysam.calignedsegment cimport AlignedSegment +from pysam.chtslib cimport * + +from cpython cimport array +cimport cython cdef extern from *: ctypedef char* const_char_ptr "const char*" cdef extern from "htslib_util.h": - int hts_set_verbosity(int verbosity) - int hts_get_verbosity() - - # add *nbytes* into the variable length data of *src* at *pos* - bam1_t * pysam_bam_update(bam1_t * b, - size_t nbytes_old, - size_t nbytes_new, - uint8_t * pos) - - # now: static - int aux_type2size(int) - char * pysam_bam_get_qname(bam1_t * b) - uint32_t * pysam_bam_get_cigar(bam1_t * b) - uint8_t * pysam_bam_get_seq(bam1_t * b) - uint8_t * pysam_bam_get_qual(bam1_t * b) - uint8_t * pysam_bam_get_aux(bam1_t * b) - int pysam_bam_get_l_aux(bam1_t * b) - char pysam_bam_seqi(uint8_t * s, int i) - - uint16_t pysam_get_bin(bam1_t * b) - uint8_t pysam_get_qual(bam1_t * b) - uint8_t pysam_get_l_qname(bam1_t * b) - uint16_t pysam_get_flag(bam1_t * b) - uint16_t pysam_get_n_cigar(bam1_t * b) - void pysam_set_bin(bam1_t * b, uint16_t v) - void pysam_set_qual(bam1_t * b, uint8_t v) - void pysam_set_l_qname(bam1_t * b, uint8_t v) - void pysam_set_flag(bam1_t * b, uint16_t v) - void pysam_set_n_cigar(bam1_t * b, uint16_t v) - void pysam_update_flag(bam1_t * b, uint16_t v, uint16_t flag) - cdef extern from "samfile_util.h": @@ -62,24 +35,6 @@ ctypedef struct __iterdata: char * seq int seq_len -# Exposing pysam extension classes -# -# Note: need to declare all C fields and methods here -cdef class AlignedSegment: - - # object that this AlignedSegment represents - cdef bam1_t * _delegate - - # add an alignment tag with value to the AlignedSegment - # an existing tag of the same name will be replaced. - cpdef set_tag(self, tag, value, value_type=?, replace=?) - - # add an alignment tag with value to the AlignedSegment - # an existing tag of the same name will be replaced. - cpdef get_tag(self, tag) - - # return true if tag exists - cpdef has_tag(self, tag) cdef class AlignmentFile: @@ -108,15 +63,12 @@ cdef class AlignmentFile: # beginning of read section cdef int64_t start_offset - cdef bam_hdr_t * _buildHeader(self, new_header) cdef bam1_t * getCurrent(self) cdef int cnext(self) # write an aligned read cpdef int write(self, AlignedSegment read) except -1 - cdef char * _getrname(self, int tid) - cdef class PileupColumn: cdef bam_pileup1_t ** plp cdef int tid @@ -143,7 +95,7 @@ cdef class IteratorRow: cdef class IteratorRowRegion(IteratorRow): cdef hts_itr_t * iter - cdef bam1_t * getCurrent( self ) + cdef bam1_t * getCurrent(self) cdef int cnext(self) cdef class IteratorRowHead(IteratorRow): @@ -153,7 +105,7 @@ cdef class IteratorRowHead(IteratorRow): cdef int cnext(self) cdef class IteratorRowAll(IteratorRow): - cdef bam1_t * getCurrent( self ) + cdef bam1_t * getCurrent(self) cdef int cnext(self) cdef class IteratorRowAllRefs(IteratorRow): @@ -163,7 +115,7 @@ cdef class IteratorRowAllRefs(IteratorRow): cdef class IteratorRowSelection(IteratorRow): cdef int current_pos cdef positions - cdef bam1_t * getCurrent( self ) + cdef bam1_t * getCurrent(self) cdef int cnext(self) cdef class IteratorColumn: @@ -183,13 +135,13 @@ cdef class IteratorColumn: cdef int max_depth cdef int cnext(self) - cdef char * getSequence( self ) + cdef char * getSequence(self) cdef setMask(self, mask) cdef setupIteratorData(self, int tid, int start, int end, - int multiple_iterators = ?) + int multiple_iterators=?) cdef reset(self, tid, start, end) cdef _free_pileup_iter(self) @@ -208,3 +160,4 @@ cdef class IndexedReads: cdef index cdef int owns_samfile cdef bam_hdr_t * header + diff --git a/pysam/calignmentfile.pyx b/pysam/calignmentfile.pyx index 533b0ff..57f2464 100644 --- a/pysam/calignmentfile.pyx +++ b/pysam/calignmentfile.pyx @@ -1,207 +1,82 @@ # cython: embedsignature=True # cython: profile=True -# adds doc-strings for sphinx -import tempfile +######################################################## +######################################################## +# Cython wrapper for SAM/BAM/CRAM files based on htslib +######################################################## +# The principal classes defined in this module are: +# +# class AlignmentFile read/write access to SAM/BAM/CRAM formatted files +# +# class IndexedReads index a SAM/BAM/CRAM file by query name while keeping +# the original sort order intact +# +# Additionally this module defines numerous additional classes that are part +# of the internal API. These are: +# +# Various iterator classes to iterate over alignments in sequential (IteratorRow) +# or in a stacked fashion (IteratorColumn): +# +# class IteratorRow +# class IteratorRowRegion +# class IteratorRowHead +# class IteratorRowAll +# class IteratorRowAllRefs +# class IteratorRowSelection +# class IteratorColumn +# class IteratorColumnRegion +# class IteratorColumnAllRefs +# +######################################################## +# +# The MIT License +# +# Copyright (c) 2015 Andreas Heger +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +######################################################## import os -import sys -import types -import itertools -import struct -import ctypes import collections import re -import platform import warnings import array -from cpython cimport PyErr_SetString, \ - PyBytes_Check, \ - PyUnicode_Check, \ - PyBytes_FromStringAndSize - -from cpython cimport array - +from cpython cimport array as c_array from cpython.version cimport PY_MAJOR_VERSION -cimport cython - -######################################################################## -######################################################################## -######################################################################## -## Python 3 compatibility functions -######################################################################## -IS_PYTHON3 = PY_MAJOR_VERSION >= 3 -cdef from_string_and_size(char* s, size_t length): - if PY_MAJOR_VERSION < 3: - return s[:length] - else: - return s[:length].decode("ascii") - -# filename encoding (copied from lxml.etree.pyx) -cdef str _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' - -#cdef char* _C_FILENAME_ENCODING -#_C_FILENAME_ENCODING = _FILENAME_ENCODING - -cdef bytes _encodeFilename(object filename): - """Make sure a filename is 8-bit encoded (or None).""" - if filename is None: - return None - elif PyBytes_Check(filename): - return filename - elif PyUnicode_Check(filename): - return filename.encode(_FILENAME_ENCODING) - else: - raise TypeError, u"Argument must be string or unicode." - -cdef bytes _forceBytes(object s): - u"""convert string or unicode object to bytes, assuming - ascii encoding. - """ - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode('ascii') - else: - raise TypeError, u"Argument must be string, bytes or unicode." - -cdef inline bytes _forceCmdlineBytes(object s): - return _forceBytes(s) +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport encode_filename, from_string_and_size +from pysam.calignedsegment cimport makeAlignedSegment, makePileupColumn -cdef _charptr_to_str(char* s): - if PY_MAJOR_VERSION < 3: - return s - else: - return s.decode("ascii") - -cdef _forceStr(object s): - """Return s converted to str type of current Python - (bytes in Py2, unicode in Py3)""" - if s is None: - return None - if PY_MAJOR_VERSION < 3: - return s - elif PyBytes_Check(s): - return s.decode('ascii') - else: - # assume unicode - return s +cimport cython -######################################################################## -######################################################################## -######################################################################## +######################################################## ## Constants and global variables -######################################################################## # defines imported from samtools DEF SEEK_SET = 0 DEF SEEK_CUR = 1 DEF SEEK_END = 2 -cdef char* CODE2CIGAR= "MIDNSHP=X" -if IS_PYTHON3: - CIGAR2CODE = dict( [y,x] for x,y in enumerate( CODE2CIGAR) ) -else: - CIGAR2CODE = dict( [ord(y),x] for x,y in enumerate( CODE2CIGAR) ) -CIGAR_REGEX = re.compile( "(\d+)([MIDNSHP=X])" ) - -##################################################################### -# hard-coded constants -cdef int max_pos = 2 << 29 - -##################################################################### -##################################################################### -##################################################################### -## private factory methods -##################################################################### -cdef class AlignedSegment -cdef object makeAlignedSegment(bam1_t * src): - '''enter src into AlignedSegment.''' - # note that the following does not call __init__ - cdef AlignedSegment dest = AlignedSegment.__new__(AlignedSegment) - dest._delegate = bam_dup1(src) - return dest - - -cdef class PileupColumn -cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos, int n_pu): - # note that the following does not call __init__ - cdef PileupColumn dest = PileupColumn.__new__(PileupColumn) - dest.plp = plp - dest.tid = tid - dest.pos = pos - dest.n_pu = n_pu - return dest - -cdef class PileupRead -cdef makePileupRead(bam_pileup1_t * src): - '''fill a PileupRead object from a bam_pileup1_t * object.''' - cdef PileupRead dest = PileupRead.__new__(PileupRead) - dest._alignment = makeAlignedSegment(src.b) - dest._qpos = src.qpos - dest._indel = src.indel - dest._level = src.level - dest._is_del = src.is_del - dest._is_head = src.is_head - dest._is_tail = src.is_tail - dest._is_refskip = src.is_refskip - return dest - -cdef convertBinaryTagToList( uint8_t * s ): - """return bytesize, number of values list of values in s.""" - cdef char auxtype - cdef uint8_t byte_size - cdef int32_t nvalues - - # get byte size - auxtype = s[0] - byte_size = aux_type2size( auxtype ) - s += 1 - # get number of values in array - nvalues = (s)[0] - s += 4 - # get values - values = [] - if auxtype == 'c': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 1 - elif auxtype == 'C': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 1 - elif auxtype == 's': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 2 - elif auxtype == 'S': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 2 - elif auxtype == 'i': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 4 - elif auxtype == 'I': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 4 - elif auxtype == 'f': - for x from 0 <= x < nvalues: - values.append((s)[0]) - s += 4 - - return byte_size, nvalues, values - +# maximum genomic coordinace +cdef int MAX_POS = 2 << 29 # valid types for SAM headers VALID_HEADER_TYPES = {"HD" : dict, @@ -214,13 +89,14 @@ VALID_HEADER_TYPES = {"HD" : dict, VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO") # default type conversions within SAM header records -VALID_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str}, +KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str}, "SQ" : {"SN" : str, "LN" : int, "AS" : str, "M5" : str, "SP" : str, "UR" : str,}, "RG" : {"ID" : str, "CN" : str, "DS" : str, "DT" : str, "FO" : str, "KS" : str, "LB" : str, "PG" : str, "PI" : str, - "PL" : str, "PU" : str, "SM" : str,}, + "PL" : str, "PM" : str, "PU" : str, + "SM" : str,}, "PG" : {"ID" : str, "PN" : str, "CL" : str, "PP" : str, "DS" : str, "VN" : str,},} @@ -235,94 +111,250 @@ VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"), "PP"),} -cdef class AlignmentFile: - '''*(filename, mode=None, template = None, - reference_names=None, reference_lengths = None, - text=NULL, header=None, - add_sq_text=False, check_header=True, - check_sq=True)* +def build_header_line(fields, record): + '''build a header line from `fields` dictionary for `record`''' + + # TODO: add checking for field and sort order + line = ["@%s" % record] + # comment + if record == "CO": + line.append(fields) + # user tags + elif record.islower(): + for key in sorted(fields): + line.append("%s:%s" % (key, str(fields[key]))) + # defined tags + else: + # write fields of the specification + for key in VALID_HEADER_ORDER[record]: + if key in fields: + line.append("%s:%s" % (key, str(fields[key]))) + # write user fields + for key in fields: + if not key.isupper(): + line.append("%s:%s" % (key, str(fields[key]))) + + return "\t".join(line) + +cdef bam_hdr_t * build_header(new_header): + '''return a new header built from a dictionary in `new_header`. + + This method inserts the text field, target_name and target_len. + ''' + + lines = [] + + # check if hash exists + + # create new header and copy old data + cdef bam_hdr_t * dest - A :term:`SAM`/:term:`BAM` formatted file. The file is - automatically opened. + dest = bam_hdr_init() - *mode* should be ``r`` for reading or ``w`` for writing. The - default is text mode (:term:`SAM`). For binary (:term:`BAM`) I/O - you should append ``b`` for compressed or ``u`` for uncompressed - :term:`BAM` output. Use ``h`` to output header information in - text (:term:`TAM`) mode. + # first: defined tags + for record in VALID_HEADERS: + if record in new_header: + ttype = VALID_HEADER_TYPES[record] + data = new_header[record] + if type(data) != type(ttype()): + raise ValueError( + "invalid type for record %s: %s, expected %s" % + (record, type(data), type(ttype()))) + if type(data) is dict: + lines.append(build_header_line(data, record)) + else: + for fields in new_header[record]: + lines.append(build_header_line(fields, record)) + + # then: user tags (lower case), sorted alphabetically + for record, data in sorted(new_header.items()): + if record in VALID_HEADERS: continue + if type(data) is dict: + lines.append(build_header_line(data, record)) + else: + for fields in new_header[record]: + lines.append(build_header_line(fields, record)) + + text = "\n".join(lines) + "\n" + if dest.text != NULL: free( dest.text ) + dest.text = calloc(len(text), sizeof(char)) + dest.l_text = len(text) + cdef bytes btext = text.encode('ascii') + strncpy(dest.text, btext, dest.l_text) + + cdef bytes bseqname + # collect targets + if "SQ" in new_header: + seqs = [] + for fields in new_header["SQ"]: + try: + seqs.append( (fields["SN"], fields["LN"] ) ) + except KeyError: + raise KeyError( "incomplete sequence information in '%s'" % str(fields)) + + dest.n_targets = len(seqs) + dest.target_name = calloc(dest.n_targets, sizeof(char*)) + dest.target_len = calloc(dest.n_targets, sizeof(uint32_t)) + + for x from 0 <= x < dest.n_targets: + seqname, seqlen = seqs[x] + dest.target_name[x] = calloc( + len(seqname) + 1, sizeof(char)) + bseqname = seqname.encode('ascii') + strncpy(dest.target_name[x], bseqname, + len(seqname) + 1) + dest.target_len[x] = seqlen + + return dest - If ``b`` is present, it must immediately follow ``r`` or ``w``. - Valid modes are ``r``, ``w``, ``wh``, ``rb``, ``wb``, ``wbu`` and - ``wb0``. For instance, to open a :term:`BAM` formatted file for - reading, type:: - f = pysam.AlignmentFile('ex1.bam','rb') +cdef class AlignmentFile: + """ + AlignmentFile(filepath_or_object, mode=None, template=None, + reference_names=None, reference_lengths=None, text=NULL, + header=None, add_sq_text=False, check_header=True, check_sq=True) - If mode is not specified, we will try to auto-detect in the order - 'rb', 'r', thus both the following should work:: + A :term:`SAM`/:term:`BAM` formatted file. - f1 = pysam.AlignmentFile('ex1.bam') - f2 = pysam.AlignmentFile('ex1.sam') + If `filepath_or_object` is a string, the file is automatically + opened. If `filepath_or_object` is a python File object, the + already opened file will be used. - If an index for a BAM file exists (.bai), it will be opened - automatically. Without an index random access to reads via - :meth:`fetch` and :meth:`pileup` is disabled. + If the file is opened for reading an index for a BAM file exists + (.bai), it will be opened automatically. Without an index random + access via :meth:`~pysam.AlignmentFile.fetch` and + :meth:`~pysam.AlignmentFile.pileup` is disabled. For writing, the header of a :term:`SAM` file/:term:`BAM` file can be constituted from several sources (see also the samtools format specification): - 1. If *template* is given, the header is copied from a another - *AlignmentFile* (*template* must be of type *AlignmentFile*). + 1. If `template` is given, the header is copied from a another + `AlignmentFile` (`template` must be a + :class:`~pysam.AlignmentFile`). - 2. If *header* is given, the header is built from a - multi-level dictionary. The first level are the four types - ('HD', 'SQ', ...). The second level are a list of lines, - with each line being a list of tag-value pairs. The header - is constructed first from all the defined fields, followed - by user tags in alphabetical order. + 2. If `header` is given, the header is built from a + multi-level dictionary. - 3. If *text* is given, new header text is copied from raw + 3. If `text` is given, new header text is copied from raw text. - 4. The names (*reference_names*) and lengths - (*reference_lengths*) are supplied directly as lists. By - default, 'SQ' and 'LN' tags will be added to the header - text. This option can be changed by unsetting the flag - *add_sq_text*. + 4. The names (`reference_names`) and lengths + (`reference_lengths`) are supplied directly as lists. For writing a CRAM file, the filename of the reference can be - added through a fasta formatted file (*reference_filename*) + added through a fasta formatted file (`reference_filename`) By default, if a file is opened in mode 'r', it is checked - for a valid header (*check_header* = True) and a definition of - chromosome names (*check_sq* = True). + for a valid header (`check_header` = True) and a definition of + chromosome names (`check_sq` = True). - ''' + Parameters + ---------- + mode : string + `mode` should be ``r`` for reading or ``w`` for writing. The + default is text mode (:term:`SAM`). For binary (:term:`BAM`) I/O + you should append ``b`` for compressed or ``u`` for uncompressed + :term:`BAM` output. Use ``h`` to output header information in + text (:term:`TAM`) mode. + + If ``b`` is present, it must immediately follow ``r`` or ``w``. + Valid modes are ``r``, ``w``, ``wh``, ``rb``, ``wb``, ``wbu`` and + ``wb0``. For instance, to open a :term:`BAM` formatted file for + reading, type:: + + f = pysam.AlignmentFile('ex1.bam','rb') + + If mode is not specified, the method will try to auto-detect + in the order 'rb', 'r', thus both the following should work:: + + f1 = pysam.AlignmentFile('ex1.bam') + f2 = pysam.AlignmentFile('ex1.sam') + + template : AlignmentFile + when writing, copy header frem `template`. + + header : dict + when writing, build header from a multi-level dictionary. The + first level are the four types ('HD', 'SQ', ...). The + second level are a list of lines, with each line being a + list of tag-value pairs. The header is constructed first + from all the defined fields, followed by user tags in + alphabetical order. + + text : string + when writing, use the string provided as the header + + reference_names : list + see referece_lengths + + reference_lengths : list + when writing, build header from list of chromosome names and lengths. + By default, 'SQ' and 'LN' tags will be added to the header + text. This option can be changed by unsetting the flag + `add_sq_text`. + + add_sq_text : bool + do not add 'SQ' and 'LN' tags to header. This option permits construction + :term:`SAM` formatted files without a header. + + check_header : bool + when reading, check if header is present (default=True) + + check_sq : bool + when reading, check if SQ entries are present in header (default=True) + + """ + + def __cinit__(self, *args, **kwargs): - def __cinit__(self, *args, **kwargs ): self.htsfile = NULL self._filename = None self.is_bam = False self.is_stream = False self.is_cram = False self.is_remote = False - + self._open(*args, **kwargs) # allocate memory for iterator self.b = calloc(1, sizeof(bam1_t)) - def _isOpen(self): + def is_open(self): '''return true if htsfile has been opened.''' return self.htsfile != NULL - def _hasIndex(self): - '''return true if htsfile has an existing (and opened) index.''' + def has_index(self): + """return true if htsfile has an existing (and opened) index. + """ return self.index != NULL + def check_index(self): + """return True if index is present. + + Raises + ------ + + AttributeError + if htsfile is :term:`SAM` formatted and thus has no index. + + ValueError + if htsfile is closed or index could not be opened. + """ + + if not self.is_open(): + raise ValueError("I/O operation on closed file") + if not self.is_bam and not self.is_cram: + raise AttributeError( + "AlignmentFile.mapped only available in bam files") + if self.index == NULL: + raise ValueError( + "mapping information not recorded in index " + "or index not available") + return True + def _open(self, - filename, + filepath_or_object, mode=None, AlignmentFile template=None, reference_names=None, @@ -341,6 +373,9 @@ cdef class AlignmentFile: If _open is called on an existing file, the current file will be closed and a new file will be opened. ''' + cdef char *cfilename + cdef char *cmode + # for backwards compatibility: if referencenames is not None: reference_names = referencenames @@ -350,7 +385,8 @@ cdef class AlignmentFile: # read mode autodetection if mode is None: try: - self._open(filename, 'rb', + self._open(filepath_or_object, + 'rb', template=template, reference_names=reference_names, reference_lengths=reference_lengths, @@ -364,7 +400,8 @@ cdef class AlignmentFile: except ValueError, msg: pass - self._open(filename, 'r', + self._open(filepath_or_object, + 'r', template=template, reference_names=reference_names, reference_lengths=reference_lengths, @@ -376,7 +413,7 @@ cdef class AlignmentFile: check_sq=check_sq) return - assert mode in ("r","w","rb","wb", "wh", + assert mode in ("r", "w", "rb", "wb", "wh", "wbu", "rU", "wb0", "rc", "wc"), \ "invalid file opening mode `%s`" % mode @@ -385,12 +422,20 @@ cdef class AlignmentFile: if self.htsfile != NULL: self.close() + # check if we are working with a File object + if hasattr(filepath_or_object, "fileno"): + filename = filepath_or_object.name + if filepath_or_object.closed: + raise ValueError('I/O operation on closed file') + else: + filename = filepath_or_object + # for htslib, wbu seems to not work if mode == "wbu": mode = "wb0" cdef bytes bmode = mode.encode('ascii') - self._filename = filename = _encodeFilename(filename) + self._filename = filename = encode_filename(filename) # FIXME: Use htsFormat when it is available self.is_bam = len(mode) > 1 and mode[1] == 'b' @@ -400,6 +445,7 @@ cdef class AlignmentFile: filename.startswith(b"ftp:") cdef char * ctext + cdef hFILE * fp ctext = NULL if mode[0] == 'w': @@ -409,7 +455,7 @@ cdef class AlignmentFile: if template: self.header = bam_hdr_dup(template.header) elif header: - self.header = self._buildHeader(header) + self.header = build_header(header) else: # build header from a target names and lengths assert reference_names and reference_lengths, \ @@ -420,7 +466,7 @@ cdef class AlignmentFile: "unequal names and lengths of reference sequences" # allocate and fill header - reference_names = [_forceBytes(ref) for ref in reference_names] + reference_names = [force_bytes(ref) for ref in reference_names] self.header = bam_hdr_init() self.header.n_targets = len(reference_names) n = 0 @@ -443,13 +489,13 @@ cdef class AlignmentFile: text = [] for x from 0 <= x < self.header.n_targets: text.append("@SQ\tSN:%s\tLN:%s\n" % \ - (_forceStr(reference_names[x]), + (force_str(reference_names[x]), reference_lengths[x])) text = ''.join(text) if text is not None: # copy without \0 - text = _forceBytes(text) + text = force_bytes(text) ctext = text self.header.l_text = strlen(ctext) self.header.text = calloc( @@ -457,7 +503,14 @@ cdef class AlignmentFile: memcpy(self.header.text, ctext, strlen(ctext)) # open file (hts_open is synonym with sam_open) - self.htsfile = hts_open(filename, bmode) + cfilename, cmode = filename, bmode + if hasattr(filepath_or_object, "fileno"): + fp = hdopen(filepath_or_object.fileno(), cmode) + with nogil: + self.htsfile = hts_hopen(fp, cfilename, cmode) + else: + with nogil: + self.htsfile = hts_open(cfilename, cmode) # set filename with reference sequences. If no filename # is given, the CRAM reference arrays will be built from @@ -465,12 +518,13 @@ cdef class AlignmentFile: if self.is_cram and reference_filename: # note that fn_aux takes ownership, so create # a copy - fn = _encodeFilename(reference_filename) + fn = encode_filename(reference_filename) self.htsfile.fn_aux = strdup(fn) # write header to htsfile if self.is_bam or self.is_cram or "h" in mode: - sam_hdr_write(self.htsfile, self.header) + with nogil: + sam_hdr_write(self.htsfile, self.header) elif mode[0] == "r": # open file for reading @@ -480,7 +534,15 @@ cdef class AlignmentFile: raise IOError("file `%s` not found" % filename) # open file (hts_open is synonym with sam_open) - self.htsfile = hts_open(filename, bmode) + cfilename, cmode = filename, bmode + if hasattr(filepath_or_object, "fileno"): + fp = hdopen(filepath_or_object.fileno(), cmode) + with nogil: + self.htsfile = hts_hopen(fp, cfilename, cmode) + else: + with nogil: + self.htsfile = hts_open(cfilename, cmode) + if self.htsfile == NULL: raise ValueError( "could not open file (mode='%s') - " @@ -488,7 +550,8 @@ cdef class AlignmentFile: # bam files require a valid header if self.is_bam or self.is_cram: - self.header = sam_hdr_read(self.htsfile) + with nogil: + self.header = sam_hdr_read(self.htsfile) if self.header == NULL: raise ValueError( "file does not have valid header (mode='%s') " @@ -497,7 +560,8 @@ cdef class AlignmentFile: # in sam files it is optional (htsfile full of # unmapped reads) if check_header: - self.header = sam_hdr_read(self.htsfile) + with nogil: + self.header = sam_hdr_read(self.htsfile) if self.header == NULL: raise ValueError( "file does not have valid header (mode='%s') " @@ -525,7 +589,9 @@ cdef class AlignmentFile: # open index for remote files if self.is_remote: - self.index = hts_idx_load(filename, format_index) + cfilename = filename + with nogil: + self.index = hts_idx_load(cfilename, format_index) if self.index == NULL: warnings.warn( "unable to open remote index for '%s'" % filename) @@ -541,8 +607,10 @@ cdef class AlignmentFile: else: # returns NULL if there is no index or index could # not be opened - self.index = sam_index_load(self.htsfile, - filename) + cfilename = filename + with nogil: + self.index = sam_index_load(self.htsfile, + cfilename) if self.index == NULL: raise IOError( "error while opening index for '%s'" % @@ -552,62 +620,135 @@ cdef class AlignmentFile: if not self.is_stream: self.start_offset = self.tell() - def gettid(self, reference): - ''' - convert :term:`reference` name into numerical :term:`tid` + def get_tid(self, reference): + """ + return the numerical :term:`tid` corresponding to + :term:`reference` returns -1 if reference is not known. - ''' - if not self._isOpen(): + """ + if not self.is_open(): raise ValueError("I/O operation on closed file") - reference = _forceBytes(reference) + reference = force_bytes(reference) return bam_name2id(self.header, reference) - def getrname(self, tid): - ''' - convert numerical :term:`tid` into :term:`reference` name.''' - if not self._isOpen(): + def get_reference_name(self, tid): + """ + return :term:`reference` name corresponding to numerical :term:`tid` + """ + if not self.is_open(): raise ValueError("I/O operation on closed file") if not 0 <= tid < self.header.n_targets: raise ValueError("reference_id %i out of range 0<=tid<%i" % (tid, self.header.n_targets)) - return _charptr_to_str(self.header.target_name[tid]) + return charptr_to_str(self.header.target_name[tid]) - cdef char * _getrname(self, int tid): # TODO unused - ''' - convert numerical :term:`tid` into :term:`reference` name.''' - if not self._isOpen(): + def reset(self): + """reset file position to beginning of file just after + the header. + + Returns + ------- + + The file position after moving the file pointer. + + """ + return self.seek(self.start_offset, 0) + + def seek(self, uint64_t offset, int where=0): + """move file pointer to position `offset`, see + :meth:`pysam.AlignmentFile.tell`. + + Parameters + ---------- + + offset : int + + position of the read/write pointer within the file. + + where : int + + optional and defaults to 0 which means absolute file + positioning, other values are 1 which means seek relative to + the current position and 2 means seek relative to the file's + end. + + Returns + ------- + + the file position after moving the file pointer + + """ + + if not self.is_open(): raise ValueError("I/O operation on closed file") + if not self.is_bam: + raise NotImplementedError( + "seek only available in bam files") + if self.is_stream: + raise OSError("seek no available in streams") - if not 0 <= tid < self.header.n_targets: - raise ValueError("tid %i out of range 0<=tid<%i" % - (tid, self.header.n_targets )) - return self.header.target_name[tid] + cdef uint64_t pos + with nogil: + pos = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, where) + return pos + + def tell(self): + """ + return current file position. + """ + if not self.is_open(): + raise ValueError("I/O operation on closed file") + if not (self.is_bam or self.is_cram): + raise NotImplementedError( + "seek only available in bam files") - def _parseRegion(self, + cdef uint64_t pos + with nogil: + pos = bgzf_tell(hts_get_bgzfp(self.htsfile)) + return pos + + def parse_region(self, reference=None, start=None, end=None, region=None, tid=None): - '''parse region information. - - Raises ValueError for invalid regions. + """parse alternative ways to specify a genomic region. A region can + either be specified by :term:`reference`, `start` and + `end`. `start` and `end` denote 0-based, half-open + intervals. - Returns a tuple of a flag, :term:`tid`, start and end. The - flag indicates whether some coordinates were supplied. + Alternatively, a samtools :term:`region` string can be + supplied. + + If any of the coordinates are missing they will be replaced by the + minimum (`start`) or maximum (`end`) coordinate. - Note that region strings are 1-based, while *start* and *end* denote + Note that region strings are 1-based, while `start` and `end` denote an interval in python coordinates. - ''' + Returns + ------- + + tuple : a tuple of `flag`, :term:`tid`, `start` and `end`. The + flag indicates whether no coordinates were supplied and the + genomic region is the complete genomic space. + + Raises + ------ + + ValueError + for invalid or out of bounds regions. + + """ cdef int rtid cdef long long rstart cdef long long rend rtid = -1 rstart = 0 - rend = max_pos + rend = MAX_POS if start != None: try: rstart = start @@ -621,7 +762,7 @@ cdef class AlignmentFile: raise ValueError('end out of range (%i)' % end) if region: - region = _forceStr(region) + region = force_str(region) parts = re.split("[:-]", region) reference = parts[0] if len(parts) >= 2: @@ -643,92 +784,85 @@ cdef class AlignmentFile: if rstart > rend: raise ValueError( 'invalid coordinates: start (%i) > end (%i)' % (rstart, rend)) - if not 0 <= rstart < max_pos: + if not 0 <= rstart < MAX_POS: raise ValueError('start out of range (%i)' % rstart) - if not 0 <= rend <= max_pos: + if not 0 <= rend <= MAX_POS: raise ValueError('end out of range (%i)' % rend) return 1, rtid, rstart, rend - def reset(self): - '''reset file position to beginning of file just after - the header.''' - return self.seek(self.start_offset, 0) - - def seek(self, uint64_t offset, int where = 0): - '''move file pointer to position *offset*, see - :meth:`pysam.AlignmentFile.tell`. - ''' - - if not self._isOpen(): - raise ValueError("I/O operation on closed file") - if not self.is_bam: - raise NotImplementedError( - "seek only available in bam files") - if self.is_stream: - raise OSError("seek no available in streams") - - return bgzf_seek(hts_get_bgzfp(self.htsfile), offset, where) - - def tell(self): - ''' - return current file position. - ''' - if not self._isOpen(): - raise ValueError("I/O operation on closed file") - if not (self.is_bam or self.is_cram): - raise NotImplementedError( - "seek only available in bam files") - - return bgzf_tell(hts_get_bgzfp(self.htsfile)) - def fetch(self, reference=None, start=None, end=None, region=None, tid=None, - callback=None, until_eof=False, multiple_iterators=False): - '''fetch aligned, i.e. mapped, reads in a :term:`region` - using 0-based - indexing. The region is specified by :term:`reference`, - *start* and *end*. Alternatively, a samtools :term:`region` - string can be supplied. - - Without *reference* or *region* all mapped reads will be - fetched. The reads will be returned ordered by reference - sequence, which will not necessarily be the order within the - file. + """fetch reads aligned in a :term:`region`. - If *until_eof* is given, all reads from the current file - position will be returned in order as they are within the - file. Using this option will also fetch unmapped reads. + See :meth:`AlignmentFile.parse_region` for more information + on genomic regions. - Set *multiple_iterators* to true if you will be using multiple - iterators on the same file at the same time. The iterator - returned will receive its own copy of a filehandle to the file - effectively re-opening the file. Re-opening a file creates - some overhead, so beware. + Without a `reference` or `region` all mapped reads in the file + will be fetched. The reads will be returned ordered by reference + sequence, which will not necessarily be the order within the + file. This mode of iteration still requires an index. If there is + no index, use `until_eof=True`. - If only *reference* is set, all reads aligned to *reference* + If only `reference` is set, all reads aligned to `reference` will be fetched. Note that a :term:`SAM` file does not allow random access. If - *region* or *reference* are given, an exception is raised. + `region` or `reference` are given, an exception is raised. - ''' + :class:`~pysam.FastaFile` + :class:`~pysam.IteratorRow` + :class:`~pysam.IteratorRow` + :class:`~IteratorRow` + :class:`IteratorRow` + + Parameters + ---------- + + until_eof : bool + + If `until_eof` is True, all reads from the current file + position will be returned in order as they are within the + file. Using this option will also fetch unmapped reads. + + multiple_iterators : bool + + If `multiple_iterators` is True (default) multiple + iterators on the same file can be used at the same time. The + iterator returned will receive its own copy of a filehandle to + the file effectively re-opening the file. Re-opening a file + creates some overhead, so beware. + + Returns + ------- + + An iterator over a collection of reads. + + Raises + ------ + + ValueError + if the genomic coordinates are out of range or invalid or the + file does not permit random access to genomic coordinates. + + """ cdef int rtid, rstart, rend, has_coord - if not self._isOpen(): + if not self.is_open(): raise ValueError( "I/O operation on closed file" ) - has_coord, rtid, rstart, rend = self._parseRegion(reference, - start, - end, - region, - tid) + has_coord, rtid, rstart, rend = self.parse_region( + reference, + start, + end, + region, + tid) # Turn of re-opening if htsfile is a stream if self.is_stream: @@ -736,7 +870,7 @@ cdef class AlignmentFile: if self.is_bam or self.is_cram: if not until_eof and not self.is_remote: - if not self._hasIndex(): + if not self.has_index(): raise ValueError( "fetch called on bamfile without index") @@ -760,10 +894,6 @@ cdef class AlignmentFile: raise ValueError( "fetching by region is not available for sam files") - if callback: - raise NotImplementedError( - "callback not implemented yet") - if self.header == NULL: raise ValueError( "fetch called for htsfile without header") @@ -777,22 +907,29 @@ cdef class AlignmentFile: multiple_iterators=multiple_iterators) def head(self, n, multiple_iterators=True): - '''return iterator over the first n alignments. + '''return an iterator over the first n alignments. - This is useful for inspecting the bam-file. + This iterator is is useful for inspecting the bam-file. - *multiple_iterators* is set to True by default in order to - avoid changing the current file position. + Parameters + ---------- + + multiple_iterators : bool + + is set to True by default in order to + avoid changing the current file position. + + Returns + ------- + + an iterator over a collection of reads + ''' return IteratorRowHead(self, n, multiple_iterators=multiple_iterators) - def mate(self, - AlignedSegment read): - '''return the mate of :class:`AlignedSegment` *read*. - - Throws a ValueError if read is unpaired or the mate - is unmapped. + def mate(self, AlignedSegment read): + '''return the mate of :class:`~pysam.AlignedSegment` `read`. .. note:: @@ -806,6 +943,17 @@ cdef class AlignmentFile: If a read needs to be processed with its mate, work from a read name sorted file or, better, cache reads. + Returns + ------- + + :class:`~pysam.AlignedSegment` : the mate + + Raises + ------ + + ValueError + if the read is unpaired or the mate is unmapped + ''' cdef uint32_t flag = read._delegate.core.flag @@ -836,69 +984,40 @@ cdef class AlignmentFile: break else: raise ValueError("mate not found") - - return mate - - def count(self, - reference=None, - start=None, - end=None, - region=None, - until_eof=False): - '''*(reference = None, start = None, end = None, - region = None, callback = None, until_eof = False)* - - count reads :term:`region` using 0-based indexing. The region - is specified by :term:`reference`, *start* and - *end*. Alternatively, a samtools :term:`region` string can be - supplied. - - Note that a :term:`SAM` file does not allow random access. If - *region* or *reference* are given, an exception is raised. - ''' - cdef AlignedSegment read - cdef long counter = 0 - - if not self._isOpen(): - raise ValueError( "I/O operation on closed file" ) - - for read in self.fetch(reference=reference, - start=start, - end=end, - region=region, - until_eof=until_eof): - counter += 1 - return counter + return mate - def pileup( self, - reference = None, - start = None, - end = None, - region = None, - **kwargs ): - '''perform a :term:`pileup` within a :term:`region`. The region is - specified by :term:`reference`, *start* and *end* (using - 0-based indexing). Alternatively, a samtools *region* string + def pileup(self, + reference=None, + start=None, + end=None, + region=None, + **kwargs): + """perform a :term:`pileup` within a :term:`region`. The region is + specified by :term:`reference`, 'start' and 'end' (using + 0-based indexing). Alternatively, a samtools 'region' string can be supplied. - Without *reference* or *region* all reads will be used for the + Without 'reference' or 'region' all reads will be used for the pileup. The reads will be returned ordered by :term:`reference` sequence, which will not necessarily be the order within the file. - The method returns an iterator of type - :class:`pysam.IteratorColumn` unless a *callback is - provided. If a *callback* is given, the callback will be - executed for each column within the :term:`region`. - Note that :term:`SAM` formatted files do not allow random - access. In these files, if a *region* or *reference* are + access. In these files, if a 'region' or 'reference' are given an exception is raised. - Optional *kwargs* to the iterator: + .. note:: + + 'all' reads which overlap the region are returned. The + first base returned will be the first base of the first + read 'not' necessarily the first base of the region used + in the query. - stepper + Parameters + ---------- + + stepper : string The stepper controlls how the iterator advances. Possible options for the stepper are @@ -908,120 +1027,223 @@ cdef class AlignmentFile: ``nofilter`` uses every single read - ``samtools`` same filter and read processing as in :term:`csamtools` - pileup. This requires a *fastafile* to be given. + pileup. This requires a 'fastafile' to be given. - fastafile - A :class:`~pysam.FastaFile` object. This is required for - some of the steppers. + fastafile : :class:`~pysam.FastaFile` object. - mask - Skip all reads with bits set in mask if mask=True. + This is required for some of the steppers. - max_depth - Maximum read depth permitted. The default limit is *8000*. + max_depth : int + Maximum read depth permitted. The default limit is '8000'. - truncate + truncate : bool By default, the samtools pileup engine outputs all reads - overlapping a region (see note below). If truncate is True - and a region is given, only output columns in the exact - region specificied. + overlapping a region. If truncate is True and a region is + given, only columns in the exact region specificied are + returned. - .. note:: + Returns + ------- - *all* reads which overlap the region are returned. The - first base returned will be the first base of the first - read *not* necessarily the first base of the region used - in the query. + an iterator over genomic positions. - ''' + """ cdef int rtid, rstart, rend, has_coord - if not self._isOpen(): - raise ValueError( "I/O operation on closed file" ) + if not self.is_open(): + raise ValueError("I/O operation on closed file") - has_coord, rtid, rstart, rend = self._parseRegion( - reference, start, end, region ) + has_coord, rtid, rstart, rend = self.parse_region( + reference, start, end, region) if self.is_bam or self.is_cram: - if not self._hasIndex(): + if not self.has_index(): raise ValueError("no index available for pileup") if has_coord: return IteratorColumnRegion(self, - tid = rtid, - start = rstart, - end = rend, + tid=rtid, + start=rstart, + end=rend, **kwargs ) else: return IteratorColumnAllRefs(self, **kwargs ) else: - raise NotImplementedError( "pileup of samfiles not implemented yet" ) + raise NotImplementedError( + "pileup of samfiles not implemented yet") - @cython.boundscheck(False) # we do manual bounds checking - def count_coverage(self, chr, start, stop, quality_threshold = 15, - read_callback = 'all'): - """Count ACGT in a part of a AlignmentFile. - Return 4 array.arrays of length = stop - start, - in order A C G T. + def count(self, + reference=None, + start=None, + end=None, + region=None, + until_eof=False): + ''' + count the number of reads in :term:`region` + + The region is specified by :term:`reference`, `start` and + `end`. Alternatively, a :term:`samtools` :term:`region` string + can be supplied. + + Note that a :term:`SAM` file does not allow random access and if + `region` or `reference` are given, an exception is raised. + + Parameters + ---------- - @quality_threshold is the minimum quality score (in phred) a - base has to reach to be counted. Possible @read_callback - values are + reference : string + reference_name of the genomic region (chromosome) - ``all`` -` skip reads in which any of the following - flags are set: BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, - BAM_FDUP + start : int + start of the genomic region - ``nofilter`` - uses every single read + end : int + end of the genomic region - Alternatively, @read_callback can be a function ```check_read(read)``1 - that should return True only for those reads that shall be included in - the counting. + until_eof : bool + count until the end of the file, possibly including + unmapped reads as well. - """ - - cdef int _start = start - cdef int _stop = stop - cdef int length = _stop - _start - cdef array.array int_array_template = array.array('L', []) - cdef array.array count_a - cdef array.array count_c - cdef array.array count_g - cdef array.array count_t - count_a = array.clone(int_array_template, length, zero=True) - count_c = array.clone(int_array_template, length, zero=True) - count_g = array.clone(int_array_template, length, zero=True) - count_t = array.clone(int_array_template, length, zero=True) - - cdef char * seq - cdef array.array quality - cdef int qpos - cdef int refpos - cdef int c = 0 - cdef int _threshold = quality_threshold - for read in self.fetch(chr, start, stop): - if read_callback == 'all': - if (read.flag & (0x4 | 0x100 | 0x200 | 0x400)): - continue - elif read_callback == 'nofilter': - pass - else: - if not read_callback(read): - continue - seq = read.seq + Raises + ------ + + ValueError + if the genomic coordinates are out of range or invalid. + + ''' + cdef AlignedSegment read + cdef long counter = 0 + + if not self.is_open(): + raise ValueError( "I/O operation on closed file" ) + + for read in self.fetch(reference=reference, + start=start, + end=end, + region=region, + until_eof=until_eof): + counter += 1 + + return counter + + @cython.boundscheck(False) # we do manual bounds checking + def count_coverage(self, + reference=None, + start=None, + end=None, + region=None, + quality_threshold=15, + read_callback='all'): + """count the coverage of genomic positions by reads in :term:`region`. + + The region is specified by :term:`reference`, `start` and + `end`. Alternatively, a :term:`samtools` :term:`region` string + can be supplied. The coverage is computed per-base [ACGT]. + + Parameters + ---------- + + reference : string + reference_name of the genomic region (chromosome) + + start : int + start of the genomic region + + end : int + end of the genomic region + + region : int + a region string. + + quality_threshold : int + quality_threshold is the minimum quality score (in phred) a + base has to reach to be counted. + + read_callback: string or function + + select a call-back to ignore reads when counting. It can + be either a string with the following values: + + ``all`` + skip reads in which any of the following + flags are set: BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL, + BAM_FDUP + + ``nofilter`` + uses every single read + + Alternatively, `read_callback` can be a function + ``check_read(read)`` that should return True only for + those reads that shall be included in the counting. + + Raises + ------ + + ValueError + if the genomic coordinates are out of range or invalid. + + Returns + ------- + + four array.arrays of the same length in order A C G T : tuple + + """ + + cdef int _start = start + cdef int _stop = end + cdef int length = _stop - _start + cdef c_array.array int_array_template = array.array('L', []) + cdef c_array.array count_a + cdef c_array.array count_c + cdef c_array.array count_g + cdef c_array.array count_t + count_a = c_array.clone(int_array_template, length, zero=True) + count_c = c_array.clone(int_array_template, length, zero=True) + count_g = c_array.clone(int_array_template, length, zero=True) + count_t = c_array.clone(int_array_template, length, zero=True) + + cdef AlignedSegment read + cdef cython.str seq + cdef c_array.array quality + cdef int qpos + cdef int refpos + cdef int c = 0 + cdef int filter_method = 0 + if read_callback == "all": + filter_method = 1 + elif read_callback == "nofilter": + filter_method = 2 + + cdef int _threshold = quality_threshold + for read in self.fetch(reference=reference, + start=start, + end=end, + region=region): + # apply filter + if filter_method == 1: + # filter = "all" + if (read.flag & (0x4 | 0x100 | 0x200 | 0x400)): + continue + elif filter_method == 2: + # filter = "nofilter" + pass + else: + if not read_callback(read): + continue + + # count + seq = read.seq quality = read.query_qualities for qpos, refpos in read.get_aligned_pairs(True): - if qpos is not None and refpos is not None and _start <= refpos < _stop: - if quality[qpos] > quality_threshold: + if qpos is not None and refpos is not None and \ + _start <= refpos < _stop: + if quality[qpos] >= quality_threshold: if seq[qpos] == 'A': count_a.data.as_ulongs[refpos - _start] += 1 if seq[qpos] == 'C': @@ -1030,6 +1252,7 @@ cdef class AlignmentFile: count_g.data.as_ulongs[refpos - _start] += 1 if seq[qpos] == 'T': count_t.data.as_ulongs[refpos - _start] += 1 + return count_a, count_c, count_g, count_t def close(self): @@ -1049,8 +1272,14 @@ cdef class AlignmentFile: # close within __dealloc__ (see BCFFile.__dealloc__). Not a pretty # solution and perhaps unnecessary given that calling self.close has # been working for years. + # AH: I have removed the call to close. Even though it is working, + # it seems to be dangerous according to the documentation as the + # object be partially deconstructed already. + if self.htsfile != NULL: + hts_close(self.htsfile) + hts_idx_destroy(self.index); + self.htsfile = NULL - self.close() bam_destroy1(self.b) if self.header != NULL: bam_hdr_destroy(self.header) @@ -1059,14 +1288,26 @@ cdef class AlignmentFile: ''' write a single :class:`pysam.AlignedSegment` to disk. - returns the number of bytes written. + Raises + ------ + ValueError + if the writing failed + + Returns + ------- + + int : the number of bytes written. If the file is closed, + this will be 0. ''' - if not self._isOpen(): + if not self.is_open(): return 0 - cdef int ret = sam_write1(self.htsfile, - self.header, - read._delegate) + cdef int ret + + with nogil: + ret = sam_write1(self.htsfile, + self.header, + read._delegate) # kbj: Still need to raise an exception with except -1. Otherwise # when ret == -1 we get a "SystemError: error return without @@ -1076,6 +1317,7 @@ cdef class AlignmentFile: return ret + # context manager interface def __enter__(self): return self @@ -1088,34 +1330,44 @@ cdef class AlignmentFile: ############################################################### ## properties ############################################################### + property closed: + """bool indicating the current state of the file object. + This is a read-only attribute; the close() method changes the value. + """ + def __get__(self): + return not self.is_open() + property filename: - '''filename associated with this object.''' + """filename associated with this object. This is a read-only attribute.""" def __get__(self): return self._filename property nreferences: - '''number of :term:`reference` sequences in the file.''' + """"int with the number of :term:`reference` sequences in the file. + This is a read-only attribute.""" def __get__(self): - if not self._isOpen(): raise ValueError( "I/O operation on closed file" ) + if not self.is_open(): + raise ValueError("I/O operation on closed file") return self.header.n_targets property references: - """tuple with the names of :term:`reference` sequences.""" + """tuple with the names of :term:`reference` sequences. This is a + read-only attribute""" def __get__(self): - if not self._isOpen(): raise ValueError( "I/O operation on closed file" ) + if not self.is_open(): raise ValueError( "I/O operation on closed file" ) t = [] for x from 0 <= x < self.header.n_targets: - t.append(_charptr_to_str(self.header.target_name[x])) + t.append(charptr_to_str(self.header.target_name[x])) return tuple(t) property lengths: - """tuple of the lengths of the :term:`reference` sequences. The - lengths are in the same order as + """tuple of the lengths of the :term:`reference` sequences. This is a + read-only attribute. The lengths are in the same order as :attr:`pysam.AlignmentFile.references` """ def __get__(self): - if not self._isOpen(): + if not self.is_open(): raise ValueError("I/O operation on closed file") t = [] for x from 0 <= x < self.header.n_targets: @@ -1123,70 +1375,66 @@ cdef class AlignmentFile: return tuple(t) property mapped: - """total number of mapped alignments according - to the statistics recorded in the index. + """int with total number of mapped alignments according to the + statistics recorded in the index. This is a read-only + attribute. """ def __get__(self): - self._checkIndex() + self.check_index() cdef int tid cdef uint64_t total = 0 cdef uint64_t mapped, unmapped for tid from 0 <= tid < self.header.n_targets: - hts_idx_get_stat(self.index, tid, &mapped, &unmapped) + with nogil: + hts_idx_get_stat(self.index, tid, &mapped, &unmapped) total += mapped return total - def _checkIndex(self): - '''check if index is present. Otherwise raise - an error.''' - if not self._isOpen(): - raise ValueError("I/O operation on closed file") - if not self.is_bam and not self.is_cram: - raise AttributeError( - "AlignmentFile.mapped only available in bam files") - if self.index == NULL: - raise ValueError( - "mapping information not recorded in index " - "or index not available") - - property unmapped: - """total number of unmapped reads according - to the statistics recorded in the index. + """int with total number of unmapped reads according to the statistics + recorded in the index. This number of reads includes the number of reads + without coordinates. This is a read-only attribute. """ def __get__(self): - self._checkIndex() + self.check_index() cdef int tid - cdef uint64_t total = 0 + cdef uint64_t total = hts_idx_get_n_no_coor(self.index) cdef uint64_t mapped, unmapped for tid from 0 <= tid < self.header.n_targets: - hts_idx_get_stat(self.index, tid, &mapped, &unmapped) + with nogil: + hts_idx_get_stat(self.index, tid, &mapped, &unmapped) total += unmapped return total property nocoordinate: - """total number of reads without coordinates according - to the statistics recorded in the index. + """int with total number of reads without coordinates according to the + statistics recorded in the index. This is a read-only attribute. """ def __get__(self): - self._checkIndex() - return hts_idx_get_n_no_coor(self.index) + self.check_index() + cdef uint64_t n + with nogil: + n = hts_idx_get_n_no_coor(self.index) + return n property text: - '''full contents of the :term:`sam file` header as a string - + '''string with the full contents of the :term:`sam file` header as a + string. + + This is a read-only attribute. + See :attr:`pysam.AlignmentFile.header` to get a parsed representation of the header. - ''' def __get__(self): - if not self._isOpen(): + if not self.is_open(): raise ValueError( "I/O operation on closed file" ) return from_string_and_size(self.header.text, self.header.l_text) property header: - '''header information within the :term:`sam file`. The records and - fields are returned as a two-level dictionary. + """two-level dictionay with header information from the file. + + This is a read-only attribute. The first level contains the record (``HD``, ``SQ``, etc) and the second level contains the fields (``VN``, ``LN``, etc). @@ -1204,9 +1452,9 @@ cdef class AlignmentFile: options that contain characters that are not valid field separators. - ''' + """ def __get__(self): - if not self._isOpen(): + if not self.is_open(): raise ValueError( "I/O operation on closed file" ) result = {} @@ -1245,19 +1493,11 @@ cdef class AlignmentFile: # header. Thus, in contravention to the # SAM API, consume the rest of the line. key, value = "\t".join(fields[idx+1:]).split(":", 1) - x[key] = VALID_HEADER_FIELDS[record][key](value) + x[key] = KNOWN_HEADER_FIELDS[record][key](value) break - # uppercase keys must be valid - if key in VALID_HEADER_FIELDS[record]: - x[key] = VALID_HEADER_FIELDS[record][key](value) - # lowercase are permitted for user fields - elif not key.isupper(): - x[key] = value - else: - raise ValueError( - "unknown field code '%s' in record '%s'" % - (key, record)) + # interpret type of known header record tags, default to str + x[key] = KNOWN_HEADER_FIELDS[record].get(key, str)(value) if VALID_HEADER_TYPES[record] == dict: if record in result: @@ -1269,8 +1509,9 @@ cdef class AlignmentFile: if record not in result: result[record] = [] result[record].append(x) - # if there are no SQ lines in the header, add the reference names - # from the information in the bam file. + # if there are no SQ lines in the header, add the + # reference names from the information in the bam + # file. # # Background: c-samtools keeps the textual part of the # header separate from the list of reference names and @@ -1285,113 +1526,14 @@ cdef class AlignmentFile: return result - def _buildLine(self, fields, record): - '''build a header line from *fields* dictionary for *record*''' - - # TODO: add checking for field and sort order - line = ["@%s" % record] - # comment - if record == "CO": - line.append(fields) - # user tags - elif record.islower(): - for key in sorted(fields): - line.append("%s:%s" % (key, str(fields[key]))) - # defined tags - else: - # write fields of the specification - for key in VALID_HEADER_ORDER[record]: - if key in fields: - line.append("%s:%s" % (key, str(fields[key]))) - # write user fields - for key in fields: - if not key.isupper(): - line.append("%s:%s" % (key, str(fields[key]))) - - return "\t".join(line) - - cdef bam_hdr_t * _buildHeader(self, new_header): - '''return a new header built from a dictionary in *new_header*. - - This method inserts the text field, target_name and target_len. - ''' - - lines = [] - - # check if hash exists - - # create new header and copy old data - cdef bam_hdr_t * dest - - dest = bam_hdr_init() - - # first: defined tags - for record in VALID_HEADERS: - if record in new_header: - ttype = VALID_HEADER_TYPES[record] - data = new_header[record] - if type(data) != type(ttype()): - raise ValueError( - "invalid type for record %s: %s, expected %s" % - (record, type(data), type(ttype()))) - if type(data) is dict: - lines.append(self._buildLine(data, record)) - else: - for fields in new_header[record]: - lines.append(self._buildLine(fields, record)) - - # then: user tags (lower case), sorted alphabetically - for record, data in sorted(new_header.items()): - if record in VALID_HEADERS: continue - if type( data ) is dict: - lines.append( self._buildLine( data, record ) ) - else: - for fields in new_header[record]: - lines.append( self._buildLine( fields, record ) ) - - text = "\n".join(lines) + "\n" - if dest.text != NULL: free( dest.text ) - dest.text = calloc( len(text), sizeof(char)) - dest.l_text = len(text) - cdef bytes btext = text.encode('ascii') - strncpy( dest.text, btext, dest.l_text ) - - cdef bytes bseqname - # collect targets - if "SQ" in new_header: - seqs = [] - for fields in new_header["SQ"]: - try: - seqs.append( (fields["SN"], fields["LN"] ) ) - except KeyError: - raise KeyError( "incomplete sequence information in '%s'" % str(fields)) - - dest.n_targets = len(seqs) - dest.target_name = calloc(dest.n_targets, sizeof(char*)) - dest.target_len = calloc(dest.n_targets, sizeof(uint32_t)) - - for x from 0 <= x < dest.n_targets: - seqname, seqlen = seqs[x] - dest.target_name[x] = calloc( - len(seqname) + 1, sizeof(char)) - bseqname = seqname.encode('ascii') - strncpy(dest.target_name[x], bseqname, - len(seqname) + 1) - dest.target_len[x] = seqlen - - return dest - - ############################################################### - ############################################################### ############################################################### ## file-object like iterator access ## note: concurrent access will cause errors (see IteratorRow ## and multiple_iterators) ## Possible solutions: deprecate or open new file handle - ############################################################### def __iter__(self): - if not self._isOpen(): - raise ValueError( "I/O operation on closed file" ) + if not self.is_open(): + raise ValueError("I/O operation on closed file") if not self.is_bam and self.header.n_targets == 0: raise NotImplementedError( @@ -1405,21 +1547,28 @@ cdef class AlignmentFile: ''' cversion of iterator. Used by :class:`pysam.AlignmentFile.IteratorColumn`. ''' - return sam_read1(self.htsfile, - self.header, - self.b) + cdef int ret + with nogil: + ret = sam_read1(self.htsfile, + self.header, + self.b) + return ret def __next__(self): - """ - python version of next(). - """ cdef int ret = self.cnext() if (ret >= 0): - return makeAlignedSegment(self.b) + return makeAlignedSegment(self.b, self) elif ret == -2: raise IOError('truncated file') else: raise StopIteration + + # Compatibility functions for pysam < 0.8.3 + def gettid(self, reference): + return self.get_tid(reference) + + def getrname(self, tid): + return self.get_reference_name(tid) cdef class IteratorRow: @@ -1442,14 +1591,15 @@ cdef class IteratorRow: .. note:: It is usually not necessary to create an object of this class - explicitely. It is returned as a result of call to a + explicitly. It is returned as a result of call to a :meth:`AlignmentFile.fetch`. ''' def __init__(self, AlignmentFile samfile, int multiple_iterators=False): + cdef char *cfilename - if not samfile._isOpen(): + if not samfile.is_open(): raise ValueError("I/O operation on closed file") # makes sure that samfile stays alive as long as the @@ -1459,11 +1609,14 @@ cdef class IteratorRow: # reopen the file - note that this makes the iterator # slow and causes pileup to slow down significantly. if multiple_iterators: - self.htsfile = hts_open(samfile._filename, 'r') + cfilename = samfile._filename + with nogil: + self.htsfile = hts_open(cfilename, 'r') assert self.htsfile != NULL # read header - required for accurate positioning # could a tell/seek work? - self.header = sam_hdr_read(self.htsfile) + with nogil: + self.header = sam_hdr_read(self.htsfile) assert self.header != NULL self.owns_samfile = True else: @@ -1491,7 +1644,7 @@ cdef class IteratorRowRegion(IteratorRow): .. note:: It is usually not necessary to create an object of this class - explicitely. It is returned as a result of call to a + explicitly. It is returned as a result of call to a :meth:`AlignmentFile.fetch`. """ @@ -1503,14 +1656,15 @@ cdef class IteratorRowRegion(IteratorRow): IteratorRow.__init__(self, samfile, multiple_iterators=multiple_iterators) - if not samfile._hasIndex(): + if not samfile.has_index(): raise ValueError("no index available for iteration") - self.iter = sam_itr_queryi( - self.samfile.index, - tid, - beg, - end) + with nogil: + self.iter = sam_itr_queryi( + self.samfile.index, + tid, + beg, + end) def __iter__(self): return self @@ -1520,17 +1674,16 @@ cdef class IteratorRowRegion(IteratorRow): cdef int cnext(self): '''cversion of iterator. Used by IteratorColumn''' - self.retval = hts_itr_next(hts_get_bgzfp(self.htsfile), - self.iter, - self.b, - self.htsfile) + with nogil: + self.retval = hts_itr_next(hts_get_bgzfp(self.htsfile), + self.iter, + self.b, + self.htsfile) def __next__(self): - """python version of next(). - """ self.cnext() if self.retval >= 0: - return makeAlignedSegment(self.b) + return makeAlignedSegment(self.b, self.samfile) elif self.retval == -2: # Note: it is currently not the case that hts_iter_next # returns -2 for a truncated file. @@ -1546,7 +1699,7 @@ cdef class IteratorRowRegion(IteratorRow): cdef class IteratorRowHead(IteratorRow): """*(AlignmentFile samfile, n, int multiple_iterators=False)* - iterate over first n reads in *samfile* + iterate over first n reads in `samfile` .. note:: It is usually not necessary to create an object of this class @@ -1572,23 +1725,22 @@ cdef class IteratorRowHead(IteratorRow): cdef int cnext(self): '''cversion of iterator. Used by IteratorColumn''' - return sam_read1(self.htsfile, - self.samfile.header, - self.b) + cdef int ret + with nogil: + ret = sam_read1(self.htsfile, + self.samfile.header, + self.b) + return ret def __next__(self): - """python version of next(). - - pyrex uses this non-standard name instead of next() - """ if self.current_row >= self.max_rows: raise StopIteration cdef int ret = self.cnext() - if (ret >= 0): + if ret >= 0: self.current_row += 1 - return makeAlignedSegment( self.b ) - elif (ret == -2): + return makeAlignedSegment(self.b, self.samfile) + elif ret == -2: raise IOError('truncated file') else: raise StopIteration @@ -1597,7 +1749,7 @@ cdef class IteratorRowHead(IteratorRow): cdef class IteratorRowAll(IteratorRow): """*(AlignmentFile samfile, int multiple_iterators=False)* - iterate over all reads in *samfile* + iterate over all reads in `samfile` .. note:: @@ -1621,19 +1773,18 @@ cdef class IteratorRowAll(IteratorRow): cdef int cnext(self): '''cversion of iterator. Used by IteratorColumn''' - return sam_read1(self.htsfile, - self.samfile.header, - self.b) + cdef int ret + with nogil: + ret = sam_read1(self.htsfile, + self.samfile.header, + self.b) + return ret def __next__(self): - """python version of next(). - - pyrex uses this non-standard name instead of next() - """ cdef int ret = self.cnext() - if (ret >= 0): - return makeAlignedSegment(self.b) - elif (ret == -2): + if ret >= 0: + return makeAlignedSegment(self.b, self.samfile) + elif ret == -2: raise IOError('truncated file') else: raise StopIteration @@ -1645,7 +1796,7 @@ cdef class IteratorRowAllRefs(IteratorRow): .. note:: It is usually not necessary to create an object of this class - explicitely. It is returned as a result of call to a + explicitly. It is returned as a result of call to a :meth:`AlignmentFile.fetch`. """ @@ -1656,7 +1807,7 @@ cdef class IteratorRowAllRefs(IteratorRow): IteratorRow.__init__(self, samfile, multiple_iterators=multiple_iterators) - if not samfile._hasIndex(): + if not samfile.has_index(): raise ValueError("no index available for fetch") self.tid = -1 @@ -1681,10 +1832,6 @@ cdef class IteratorRowAllRefs(IteratorRow): return self def __next__(self): - """python version of next(). - - pyrex uses this non-standard name instead of next() - """ # Create an initial iterator if self.tid == -1: if not self.samfile.nreferences: @@ -1697,7 +1844,7 @@ cdef class IteratorRowAllRefs(IteratorRow): # If current iterator is not exhausted, return aligned read if self.rowiter.retval > 0: - return makeAlignedSegment(self.rowiter.b) + return makeAlignedSegment(self.rowiter.b, self.samfile) self.tid += 1 @@ -1711,11 +1858,11 @@ cdef class IteratorRowAllRefs(IteratorRow): cdef class IteratorRowSelection(IteratorRow): """*(AlignmentFile samfile)* - iterate over reads in *samfile* at a given list of file positions. + iterate over reads in `samfile` at a given list of file positions. .. note:: It is usually not necessary to create an object of this class - explicitely. It is returned as a result of call to a :meth:`AlignmentFile.fetch`. + explicitly. It is returned as a result of call to a :meth:`AlignmentFile.fetch`. """ def __init__(self, AlignmentFile samfile, positions, int multiple_iterators=True): @@ -1733,26 +1880,27 @@ cdef class IteratorRowSelection(IteratorRow): cdef int cnext(self): '''cversion of iterator''' - # end iteration if out of positions if self.current_pos >= len(self.positions): return -1 - bgzf_seek(hts_get_bgzfp(self.htsfile), - self.positions[self.current_pos], - 0) + cdef uint64_t pos = self.positions[self.current_pos] + with nogil: + bgzf_seek(hts_get_bgzfp(self.htsfile), + pos, + 0) self.current_pos += 1 - return sam_read1(self.htsfile, - self.samfile.header, - self.b) - def __next__(self): - """python version of next(). + cdef int ret + with nogil: + ret = sam_read1(self.htsfile, + self.samfile.header, + self.b) + return ret - pyrex uses this non-standard name instead of next() - """ + def __next__(self): cdef int ret = self.cnext() if (ret >= 0): - return makeAlignedSegment(self.b) + return makeAlignedSegment(self.b, self.samfile) elif (ret == -2): raise IOError('truncated file') else: @@ -1764,7 +1912,10 @@ cdef int __advance_nofilter(void *data, bam1_t *b): ''' cdef __iterdata * d d = <__iterdata*>data - return sam_itr_next(d.htsfile, d.iter, b) + cdef int ret + with nogil: + ret = sam_itr_next(d.htsfile, d.iter, b) + return ret cdef int __advance_all(void *data, bam1_t *b): @@ -1777,10 +1928,12 @@ cdef int __advance_all(void *data, bam1_t *b): cdef __iterdata * d cdef mask = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP d = <__iterdata*>data - cdef int ret = sam_itr_next(d.htsfile, d.iter, b) - while ret >= 0 and b.core.flag & mask: + cdef int ret + with nogil: ret = sam_itr_next(d.htsfile, d.iter, b) - + while ret >= 0 and b.core.flag & mask: + with nogil: + ret = sam_itr_next(d.htsfile, d.iter, b) return ret @@ -1798,13 +1951,16 @@ cdef int __advance_snpcalls(void * data, bam1_t * b): cdef __iterdata * d d = <__iterdata*>data - cdef int ret = sam_itr_next(d.htsfile, d.iter, b) + cdef int ret cdef int skip = 0 cdef int q cdef int is_cns = 1 cdef int is_nobaq = 0 cdef int capQ_thres = 0 + with nogil: + ret = sam_itr_next(d.htsfile, d.iter, b) + # reload sequence if d.fastafile != NULL and b.core.tid != d.tid: if d.seq != NULL: @@ -1813,7 +1969,7 @@ cdef int __advance_snpcalls(void * data, bam1_t * b): d.seq = faidx_fetch_seq( d.fastafile, d.header.target_name[d.tid], - 0, max_pos, + 0, MAX_POS, &d.seq_len) if d.seq == NULL: @@ -1844,7 +2000,8 @@ cdef int __advance_snpcalls(void * data, bam1_t * b): break # additional filters - ret = sam_itr_next(d.htsfile, d.iter, b) + with nogil: + ret = sam_itr_next(d.htsfile, d.iter, b) return ret @@ -1862,7 +2019,7 @@ cdef class IteratorColumn: result = list( f.pileup() ) Here, ``result`` will contain ``n`` objects of type - :class:`PileupColumn` for ``n`` columns, but each object in + :class:`~pysam.PileupColumn` for ``n`` columns, but each object in ``result`` will contain the same information. The desired behaviour can be achieved by list comprehension:: @@ -1870,9 +2027,9 @@ cdef class IteratorColumn: result = [ x.pileups() for x in f.pileup() ] ``result`` will be a list of ``n`` lists of objects of type - :class:`PileupRead`. + :class:`~pysam.PileupRead`. - If the iterator is associated with a :class:`Fastafile` using the + If the iterator is associated with a :class:`~pysam.Fastafile` using the :meth:`addReference` method, then the iterator will export the current sequence via the methods :meth:`getSequence` and :meth:`seq_len`. @@ -1887,7 +2044,7 @@ cdef class IteratorColumn: See AlignmentFile.pileup for description. fastafile - A :class:`FastaFile` object + A :class:`~pysam.FastaFile` object max_depth maximum read depth. The default is 8000. @@ -1896,8 +2053,6 @@ cdef class IteratorColumn: def __cinit__( self, AlignmentFile samfile, **kwargs ): self.samfile = samfile - # TODO - # self.mask = kwargs.get("mask", BAM_DEF_MASK ) self.fastafile = kwargs.get("fastafile", None) self.stepper = kwargs.get("stepper", None) self.max_depth = kwargs.get("max_depth", 8000) @@ -1914,12 +2069,12 @@ cdef class IteratorColumn: cdef int cnext(self): '''perform next iteration. ''' - self.plp = bam_plp_auto( self.pileup_iter, - &self.tid, - &self.pos, - &self.n_plp ) + self.plp = bam_plp_auto(self.pileup_iter, + &self.tid, + &self.pos, + &self.n_plp ) - cdef char * getSequence( self ): + cdef char * getSequence(self): '''return current reference sequence underlying the iterator. ''' return self.iterdata.seq @@ -1930,7 +2085,7 @@ cdef class IteratorColumn: def addReference(self, Fastafile fastafile): ''' - add reference sequences in *fastafile* to iterator.''' + add reference sequences in `fastafile` to iterator.''' self.fastafile = fastafile if self.iterdata.seq != NULL: free(self.iterdata.seq) self.iterdata.tid = -1 @@ -1944,7 +2099,7 @@ cdef class IteratorColumn: cdef setMask(self, mask): '''set masking flag in iterator. - reads with bits set in *mask* will be skipped. + reads with bits set in `mask` will be skipped. ''' raise NotImplementedError() # self.mask = mask @@ -1954,7 +2109,7 @@ cdef class IteratorColumn: int tid, int start, int end, - int multiple_iterators = 0 ): + int multiple_iterators=0 ): '''setup the iterator structure''' self.iter = IteratorRowRegion(self.samfile, tid, start, end, multiple_iterators) @@ -2040,7 +2195,7 @@ cdef class IteratorColumnRegion(IteratorColumn): def __cinit__(self, AlignmentFile samfile, int tid = 0, int start = 0, - int end = max_pos, + int end = MAX_POS, int truncate = False, **kwargs ): @@ -2051,8 +2206,6 @@ cdef class IteratorColumnRegion(IteratorColumn): self.truncate = truncate def __next__(self): - """python version of next(). - """ while 1: self.cnext() @@ -2069,7 +2222,8 @@ cdef class IteratorColumnRegion(IteratorColumn): return makePileupColumn(&self.plp, self.tid, self.pos, - self.n_plp) + self.n_plp, + self.samfile) cdef class IteratorColumnAllRefs(IteratorColumn): @@ -2085,11 +2239,9 @@ cdef class IteratorColumnAllRefs(IteratorColumn): raise StopIteration # initialize iterator - self.setupIteratorData(self.tid, 0, max_pos, 1) + self.setupIteratorData(self.tid, 0, MAX_POS, 1) def __next__(self): - """python version of next(). - """ while 1: self.cnext() @@ -2102,1852 +2254,166 @@ cdef class IteratorColumnAllRefs(IteratorColumn): return makePileupColumn(&self.plp, self.tid, self.pos, - self.n_plp) + self.n_plp, + self.samfile) # otherwise, proceed to next reference or stop self.tid += 1 if self.tid < self.samfile.nreferences: - self.setupIteratorData(self.tid, 0, max_pos, 0) + self.setupIteratorData(self.tid, 0, MAX_POS, 0) else: raise StopIteration -cdef inline int32_t _getQueryStart(bam1_t *src) except -1: - cdef uint32_t * cigar_p - cdef uint32_t k, op - cdef uint32_t start_offset = 0 - - if pysam_get_n_cigar(src): - cigar_p = pysam_bam_get_cigar(src); - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - if op == BAM_CHARD_CLIP: - if start_offset != 0 and start_offset != src.core.l_qseq: - PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string') - return -1 - elif op == BAM_CSOFT_CLIP: - start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT - else: - break - - return start_offset - - -cdef inline int32_t _getQueryEnd(bam1_t *src) except -1: - cdef uint32_t * cigar_p - cdef uint32_t k, op - cdef uint32_t end_offset = src.core.l_qseq - - if pysam_get_n_cigar(src) > 1: - cigar_p = pysam_bam_get_cigar(src); - for k from pysam_get_n_cigar(src) > k >= 1: - op = cigar_p[k] & BAM_CIGAR_MASK - if op == BAM_CHARD_CLIP: - if end_offset != 0 and end_offset != src.core.l_qseq: - PyErr_SetString(ValueError, - 'Invalid clipping in CIGAR string') - return -1 - elif op == BAM_CSOFT_CLIP: - end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT - else: - break - if end_offset == 0: - end_offset = src.core.l_qseq - return end_offset +cdef class SNPCall: + '''the results of a SNP call.''' + cdef int _tid + cdef int _pos + cdef char _reference_base + cdef char _genotype + cdef int _consensus_quality + cdef int _snp_quality + cdef int _rms_mapping_quality + cdef int _coverage + property tid: + '''the chromosome ID as is defined in the header''' + def __get__(self): + return self._tid -cdef inline object _getSequenceRange(bam1_t *src, - uint32_t start, uint32_t end): - cdef uint8_t * p - cdef uint32_t k - cdef char * s + property pos: + '''nucleotide position of SNP.''' + def __get__(self): return self._pos - if not src.core.l_qseq: - return None + property reference_base: + '''reference base at pos. ``N`` if no reference sequence supplied.''' + def __get__(self): return from_string_and_size( &self._reference_base, 1 ) - seq = PyBytes_FromStringAndSize(NULL, end - start) - s = seq - p = pysam_bam_get_seq(src) + property genotype: + '''the genotype called.''' + def __get__(self): return from_string_and_size( &self._genotype, 1 ) - for k from start <= k < end: - # equivalent to seq_nt16_str[bam1_seqi(s, i)] (see bam.c) - # note: do not use string literal as it will be a python string - s[k-start] = seq_nt16_str[p[k/2] >> 4 * (1 - k%2) & 0xf] + property consensus_quality: + '''the genotype quality (Phred-scaled).''' + def __get__(self): return self._consensus_quality - return _charptr_to_str(seq) + property snp_quality: + '''the snp quality (Phred scaled) - probability of consensus being + identical to reference sequence.''' + def __get__(self): return self._snp_quality + property mapping_quality: + '''the root mean square (rms) of the mapping quality of all reads + involved in the call.''' + def __get__(self): return self._rms_mapping_quality -cdef inline object _getQualitiesRange(bam1_t *src, - uint32_t start, - uint32_t end): - '''return an array of quality values.''' + property coverage: + '''coverage or read depth - the number of reads involved in the call.''' + def __get__(self): return self._coverage - cdef uint8_t * p - cdef uint32_t k + def __str__(self): - p = pysam_bam_get_qual(src) - if p[0] == 0xff: - return None + return "\t".join( map(str, ( + self.tid, + self.pos, + self.reference_base, + self.genotype, + self.consensus_quality, + self.snp_quality, + self.mapping_quality, + self.coverage ) ) ) - # 'B': unsigned char - cdef array.array result = array.array('B', [0]) - array.resize(result, end - start) - # copy data - memcpy(result.data.as_voidptr, &p[start], end - start) +cdef class IndexedReads: + """*(AlignmentFile samfile, multiple_iterators=True) - return result + Index a Sam/BAM-file by query name while keeping the + original sort order intact. + The index is kept in memory and can be substantial. -def toQualityString(qualities): - '''convert a list of quality score to the string - representation used in the SAM format.''' - if qualities is None: - return None - return "".join([chr(x+33) for x in qualities]) - + By default, the file is re-openend to avoid conflicts if multiple + operators work on the same file. Set `multiple_iterators` = False + to not re-open `samfile`. -def fromQualityString(quality_string): - '''return a list of quality scores from the - stringn representation of quality scores used - in the SAM format.''' - if quality_string is None: - return None - return array.array('B', [ord(x)-33 for x in quality_string]) - - -cdef inline uint8_t _get_value_code(value, value_type=None): - '''guess type code for a *value*. If *value_type* is None, - the type code will be inferred based on the Python type of - *value*''' - cdef uint8_t type_code - cdef char * _char_type - - if value_type is None: - if isinstance(value, int): - type_code = 'i' - elif isinstance(value, float): - type_code = 'd' - elif isinstance(value, str): - type_code = 'Z' - elif isinstance(value, bytes): - type_code = 'Z' - else: - return 0 - else: - if value_type not in 'Zidf': - return 0 - value_type = _forceBytes(value_type) - _char_type = value_type - type_code = (_char_type)[0] + Parameters + ---------- - return type_code + samfile : AlignmentFile + File to be indexed. + multiple_iterators : bool + Flag indicating whether the file should be reopened. Reopening prevents + existing iterators being affected by the indexing. -cdef inline _get_value_type(value, maximum_value=None): - '''returns the value type of a value. + """ - If max is specified, the approprite type is - returned for a range where value is the minimum. - ''' - - if maximum_value is None: - maximum_value = value - - t = type(value) - - if t is float: - valuetype = b'f' - elif t is int: - # signed ints - if value < 0: - if value >= -128 and maximum_value < 128: - valuetype = b'c' - elif value >= -32768 and maximum_value < 32768: - valuetype = b's' - elif value < -2147483648 or maximum_value >= 2147483648: - raise ValueError( - "at least one signed integer out of range of " - "BAM/SAM specification") - else: - valuetype = b'i' - # unsigned ints - else: - if maximum_value < 256: - valuetype = b'C' - elif maximum_value < 65536: - valuetype = b'S' - elif maximum_value >= 4294967296: - raise ValueError( - "at least one integer out of range of BAM/SAM specification") - else: - valuetype = b'I' - else: - # Note: hex strings (H) are not supported yet - if t is not bytes: - value = value.encode('ascii') - if len(value) == 1: - valuetype = b"A" - else: - valuetype = b'Z' + def __init__(self, AlignmentFile samfile, int multiple_iterators=True): + cdef char *cfilename - return valuetype + # makes sure that samfile stays alive as long as this + # object is alive. + self.samfile = samfile + assert samfile.is_bam, "can only IndexReads on bam files" -cdef inline _pack_tags(tags): - """pack a list of tags. Each tag is a tuple of (tag, tuple). - - Values are packed into the most space efficient data structure - possible unless the tag contains a third field with the type code. + # multiple_iterators the file - note that this makes the iterator + # slow and causes pileup to slow down significantly. + if multiple_iterators: + cfilename = samfile._filename + with nogil: + self.htsfile = hts_open(cfilename, 'r') + assert self.htsfile != NULL + # read header - required for accurate positioning + with nogil: + self.header = sam_hdr_read(self.htsfile) + self.owns_samfile = True + else: + self.htsfile = self.samfile.htsfile + self.header = self.samfile.header + self.owns_samfile = False - Returns a fmt string and the associated list of arguments - to used in a call to struct.pack_into. - """ - fmts, args = ["<"], [] + def build(self): + '''build the index.''' - for tag in tags: + self.index = collections.defaultdict(list) - if len(tag) == 2: - pytag, value = tag - valuetype = None - elif len(tag) == 3: - pytag, value, valuetype = tag - else: - raise ValueError("malformatted tag: %s" % str(tag)) - - if not type(pytag) is bytes: - pytag = pytag.encode('ascii') - - datatype2format = {'c': 'b', - 's': 'h', - 'i': 'i', - 'C': 'B', - 'S': 'H', - 'I': 'I', - 'f': 'f', - 'A': 'c',} - - t = type(value) - if t is tuple or t is list: - # binary tags are treated separately - if valuetype is None: - # automatically determine value type - first value - # determines type. If there is a mix of types, the - # result is undefined. - valuetype = _get_value_type(min(value), max(value)) - - if valuetype not in datatype2format: - raise ValueError("invalid value type '%s'" % valuetype) - datafmt = "2sccI%i%s" % (len(value), datatype2format[valuetype]) - - args.extend([pytag[:2], - b"B", - valuetype, - len(value)] + list(value)) - fmts.append(datafmt) + # this method will start indexing from the current file + # position if you decide + cdef int ret = 1 + cdef bam1_t * b = calloc(1, sizeof( bam1_t)) - else: - - if valuetype is None: - valuetype = _get_value_type(value) - - if valuetype == b"Z": - fmt = "2sc%is" % (len(value)+1) - else: - fmt = "2sc%s" % datatype2format[valuetype] - - args.extend([pytag[:2], - valuetype, - value]) - - fmts.append(fmt) - - return "".join(fmts), args - - -cdef class AlignedSegment: - '''Class representing an aligned segment. - - This class stores a handle to the samtools C-structure representing - an aligned read. Member read access is forwarded to the C-structure - and converted into python objects. This implementation should be fast, - as only the data needed is converted. - - For write access, the C-structure is updated in-place. This is - not the most efficient way to build BAM entries, as the variable - length data is concatenated and thus needs to be resized if - a field is updated. Furthermore, the BAM entry might be - in an inconsistent state. - - One issue to look out for is that the sequence should always - be set *before* the quality scores. Setting the sequence will - also erase any quality scores that were set previously. - ''' - - # Now only called when instances are created from Python - def __init__(self): - # see bam_init1 - self._delegate = calloc(1, sizeof(bam1_t)) - # allocate some memory. If size is 0, calloc does not return a - # pointer that can be passed to free() so allocate 40 bytes - # for a new read - self._delegate.m_data = 40 - self._delegate.data = calloc( - self._delegate.m_data, 1) - self._delegate.l_data = 0 - - def __dealloc__(self): - bam_destroy1(self._delegate) - - def __str__(self): - """return string representation of alignment. - - The representation is an approximate :term:`sam` format. - - An aligned read might not be associated with a :term:`AlignmentFile`. - As a result :term:`tid` is shown instead of the reference name. - - Similarly, the tags field is returned in its parsed state. - """ - # sam-parsing is done in sam.c/bam_format1_core which - # requires a valid header. - return "\t".join(map(str, (self.query_name, - self.flag, - self.reference_id, - self.reference_start, - self.mapping_quality, - self.cigarstring, - self.next_reference_id, - self.next_reference_start, - self.query_alignment_length, - self.query_sequence, - self.query_qualities, - self.tags))) - - def compare(self, AlignedSegment other): - '''return -1,0,1, if contents in this are binary - <,=,> to *other* - - ''' - - cdef int retval, x - cdef bam1_t *t - cdef bam1_t *o - - t = self._delegate - o = other._delegate - - # uncomment for debugging purposes - # cdef unsigned char * oo, * tt - # tt = (&t.core) - # oo = (&o.core) - # for x from 0 <= x < sizeof( bam1_core_t): print x, tt[x], oo[x] - # tt = (t.data) - # oo = (o.data) - # for x from 0 <= x < max(t.l_data, o.l_data): print x, tt[x], oo[x], chr(tt[x]), chr(oo[x]) - - # Fast-path test for object identity - if t == o: - return 0 - - retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t)) - - if retval: - return retval - # cmp(t.l_data, o.l_data) - retval = (t.l_data > o.l_data) - (t.l_data < o.l_data) - if retval: - return retval - return memcmp(t.data, o.data, t.l_data) - - def __richcmp__(self, AlignedSegment other, int op): - if op == 2: # == operator - return self.compare(other) == 0 - elif op == 3: # != operator - return self.compare(other) != 0 - else: - return NotImplemented - - # Disabled so long as __cmp__ is a special method - def __hash__(self): - cdef bam1_t * src - src = self._delegate - # shift and xor values in the core structure - # make sure tid and mtid are shifted by different amounts - # should variable length data be included? - cdef uint32_t hash_value = src.core.tid << 24 ^ \ - src.core.pos << 16 ^ \ - src.core.qual << 8 ^ \ - src.core.flag ^ \ - src.core.isize << 24 ^ \ - src.core.mtid << 16 ^ \ - src.core.mpos << 8 - - return hash_value - - ######################################################## - ## Basic attributes in order of appearance in SAM format - property query_name: - """the query template name (None if not present)""" - def __get__(self): - cdef bam1_t * src - src = self._delegate - if pysam_get_l_qname(src) == 0: - return None - return _charptr_to_str(pysam_bam_get_qname(src)) - - def __set__(self, qname): - if qname is None or len(qname) == 0: - return - qname = _forceBytes(qname) - cdef bam1_t * src - cdef int l - cdef char * p - - src = self._delegate - p = pysam_bam_get_qname(src) - - # the qname is \0 terminated - l = len(qname) + 1 - pysam_bam_update(src, - pysam_get_l_qname(src), - l, - p) - - - pysam_set_l_qname(src, l) - - # re-acquire pointer to location in memory - # as it might have moved - p = pysam_bam_get_qname(src) - - strncpy(p, qname, l) - - property flag: - """properties flag""" - def __get__(self): - return pysam_get_flag(self._delegate) - def __set__(self, flag): - pysam_set_flag(self._delegate, flag) - - property reference_id: - """:term:`reference` ID - - .. note:: - - This field contains the index of the reference sequence in - the sequence dictionary. To obtain the name of the - reference sequence, use - :meth:`pysam.AlignmentFile.getrname()` - - """ - def __get__(self): return self._delegate.core.tid - def __set__(self, tid): self._delegate.core.tid = tid - - property reference_start: - """0-based leftmost coordinate""" - def __get__(self): return self._delegate.core.pos - def __set__(self, pos): - ## setting the position requires updating the "bin" attribute - cdef bam1_t * src - src = self._delegate - src.core.pos = pos - if pysam_get_n_cigar(src): - pysam_set_bin(src, - hts_reg2bin( - src.core.pos, - bam_endpos(src), - 14, - 5)) - else: - pysam_set_bin(src, - hts_reg2bin( - src.core.pos, - src.core.pos + 1, - 14, - 5)) - - property mapping_quality: - """mapping quality""" - def __get__(self): - return pysam_get_qual(self._delegate) - def __set__(self, qual): - pysam_set_qual(self._delegate, qual) - - property cigarstring: - '''the :term:`cigar` alignment as a string. - - The cigar string is a string of alternating integers - and characters denoting the length and the type of - an operation. - - .. note:: - The order length,operation is specified in the - SAM format. It is different from the order of - the :attr:`cigar` property. - - Returns None if not present. - - To unset the cigarstring, assign None or the - empty string. - ''' - def __get__(self): - c = self.cigartuples - if c is None: - return None - # reverse order - else: - return "".join([ "%i%c" % (y,CODE2CIGAR[x]) for x,y in c]) - - def __set__(self, cigar): - if cigar is None or len(cigar) == 0: - self.cigartuples = [] - else: - parts = CIGAR_REGEX.findall(cigar) - # reverse order - self.cigartuples = [(CIGAR2CODE[ord(y)], int(x)) for x,y in parts] - - # TODO - # property cigar: - # """the cigar alignment""" - - property next_reference_id: - """the :term:`reference` id of the mate/next read.""" - def __get__(self): return self._delegate.core.mtid - def __set__(self, mtid): - self._delegate.core.mtid = mtid - - property next_reference_start: - """the position of the mate/next read.""" - def __get__(self): - return self._delegate.core.mpos - def __set__(self, mpos): - self._delegate.core.mpos = mpos - - property query_length: - """the length of the query/read. - - This value corresponds to the length of the sequence supplied - in the BAM/SAM file. The length of a query is 0 if there is no - sequence in the BAM/SAM file. In those cases, the read length - can be inferred from the CIGAR alignment, see - :meth:`pysam.AlignmentFile.infer_query_length.`. - - The length includes soft-clipped bases and is equal to - ``len(query_sequence)``. - - This property is read-only but can be set by providing a - sequence. - - Returns 0 if not available. - - """ - def __get__(self): - return self._delegate.core.l_qseq - - property template_length: - """the observed query template length""" - def __get__(self): - return self._delegate.core.isize - def __set__(self, isize): - self._delegate.core.isize = isize - - property query_sequence: - """read sequence bases, including :term:`soft clipped` bases - (None if not present). - - Note that assigning to seq will invalidate any quality scores. - Thus, to in-place edit the sequence and quality scores, copies of - the quality scores need to be taken. Consider trimming for example:: - - q = read.qual - read.seq = read.seq[5:10] - read.qual = q[5:10] - - The sequence is returned as it is stored in the BAM file. Some mappers - might have stored a reverse complement of the original read - sequence. - """ - def __get__(self): - cdef bam1_t * src - cdef char * s - src = self._delegate - - if src.core.l_qseq == 0: return None - - return _getSequenceRange(src, 0, src.core.l_qseq) - - def __set__(self, seq): - # samtools manages sequence and quality length memory together - # if no quality information is present, the first byte says 0xff. - cdef bam1_t * src - cdef uint8_t * p - cdef char * s - cdef int l, k, nbytes_new, nbytes_old - - if seq == None: - l = 0 - else: - l = len(seq) - seq = _forceBytes(seq) - - src = self._delegate - - # as the sequence is stored in half-bytes, the total length (sequence - # plus quality scores) is (l+1)/2 + l - nbytes_new = (l + 1) / 2 + l - nbytes_old = (src.core.l_qseq + 1) / 2 + src.core.l_qseq - - # acquire pointer to location in memory - p = pysam_bam_get_seq(src) - src.core.l_qseq = l - - # change length of data field - pysam_bam_update(src, - nbytes_old, - nbytes_new, - p) - - if l > 0: - # re-acquire pointer to location in memory - # as it might have moved - p = pysam_bam_get_seq(src) - for k from 0 <= k < nbytes_new: - p[k] = 0 - # convert to C string - s = seq - for k from 0 <= k < l: - p[k/2] |= seq_nt16_table[s[k]] << 4 * (1 - k % 2) - - # erase qualities - p = pysam_bam_get_qual(src) - p[0] = 0xff - - property query_qualities: - """read sequence base qualities, including :term:`soft - clipped` bases (None if not present). - - Quality scores are returned as a python array of unsigned - chars. Note that this is not the ASCII-encoded value typically - seen in FASTQ or SAM formatted files. Thus, no offset of 33 - needs to be subtracted. - - Note that to set quality scores the sequence has to be set - beforehand as this will determine the expected length of the - quality score array. - - This method raises a ValueError if the length of the - quality scores and the sequence are not the same. - - """ - def __get__(self): - - cdef bam1_t * src - cdef char * q - - src = self._delegate - - if src.core.l_qseq == 0: - return None - - return _getQualitiesRange(src, 0, src.core.l_qseq) - - def __set__(self, qual): - # note that memory is already allocated via setting the sequence - # hence length match of sequence and quality needs is checked. - cdef bam1_t * src - cdef uint8_t * p - cdef int l - - src = self._delegate - p = pysam_bam_get_qual(src) - if qual is None or len(qual) == 0: - # if absent and there is a sequence: set to 0xff - if src.core.l_qseq != 0: - p[0] = 0xff - return - - # check for length match - l = len(qual) - if src.core.l_qseq != l: - raise ValueError( - "quality and sequence mismatch: %i != %i" % - (l, src.core.l_qseq)) - - # create a python array object filling it - # with the quality scores - - # NB: should avoid this copying if qual is - # already of the correct type. - cdef array.array result = array.array('B', qual) - - # copy data - memcpy(p, result.data.as_voidptr, l) - - - property bin: - """properties bin""" - def __get__(self): - return pysam_get_bin(self._delegate) - def __set__(self, bin): - pysam_set_bin(self._delegate, bin) - - - ########################################################## - # Derived simple attributes. These are simple attributes of - # AlignedSegment getting and setting values. - ########################################################## - # 1. Flags - ########################################################## - property is_paired: - """true if read is paired in sequencing""" - def __get__(self): - return (self.flag & BAM_FPAIRED) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FPAIRED) - - property is_proper_pair: - """true if read is mapped in a proper pair""" - def __get__(self): - return (self.flag & BAM_FPROPER_PAIR) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FPROPER_PAIR) - property is_unmapped: - """true if read itself is unmapped""" - def __get__(self): - return (self.flag & BAM_FUNMAP) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FUNMAP) - property mate_is_unmapped: - """true if the mate is unmapped""" - def __get__(self): - return (self.flag & BAM_FMUNMAP) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FMUNMAP) - property is_reverse: - """true if read is mapped to reverse strand""" - def __get__(self): - return (self.flag & BAM_FREVERSE) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FREVERSE) - property mate_is_reverse: - """true is read is mapped to reverse strand""" - def __get__(self): - return (self.flag & BAM_FMREVERSE) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FMREVERSE) - property is_read1: - """true if this is read1""" - def __get__(self): - return (self.flag & BAM_FREAD1) != 0 - def __set__(self,val): - pysam_update_flag(self._delegate, val, BAM_FREAD1) - property is_read2: - """true if this is read2""" - def __get__(self): - return (self.flag & BAM_FREAD2) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FREAD2) - property is_secondary: - """true if not primary alignment""" - def __get__(self): - return (self.flag & BAM_FSECONDARY) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FSECONDARY) - property is_qcfail: - """true if QC failure""" - def __get__(self): - return (self.flag & BAM_FQCFAIL) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FQCFAIL) - property is_duplicate: - """true if optical or PCR duplicate""" - def __get__(self): - return (self.flag & BAM_FDUP) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FDUP) - property is_supplementary: - """true if this is a supplementary alignment""" - def __get__(self): - return (self.flag & BAM_FSUPPLEMENTARY) != 0 - def __set__(self, val): - pysam_update_flag(self._delegate, val, BAM_FSUPPLEMENTARY) - - # 2. Coordinates and lengths - property reference_end: - '''aligned reference position of the read on the reference genome. - - reference_end points to one past the last aligned residue. - Returns None if not available (read is unmapped or no cigar - alignment present). - - ''' - def __get__(self): - cdef bam1_t * src - src = self._delegate - if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0: - return None - return bam_endpos(src) - - property reference_length: - '''aligned length of the read on the reference genome. - - This is equal to `aend - pos`. Returns None if not available.''' - def __get__(self): - cdef bam1_t * src - src = self._delegate - if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0: - return None - return bam_endpos(src) - \ - self._delegate.core.pos - - property query_alignment_sequence: - """aligned portion of the read. - - This is a substring of :attr:`seq` that excludes flanking - bases that were :term:`soft clipped` (None if not present). It - is equal to ``seq[qstart:qend]``. - - SAM/BAM files may include extra flanking bases that are not - part of the alignment. These bases may be the result of the - Smith-Waterman or other algorithms, which may not require - alignments that begin at the first residue or end at the last. - In addition, extra sequencing adapters, multiplex identifiers, - and low-quality bases that were not considered for alignment - may have been retained. - - """ - - def __get__(self): - cdef bam1_t * src - cdef uint32_t start, end - - src = self._delegate - - if src.core.l_qseq == 0: - return None - - start = _getQueryStart(src) - end = _getQueryEnd(src) - - return _getSequenceRange(src, start, end) - - property query_alignment_qualities: - """aligned query sequence quality values (None if not present). These - are the quality values that correspond to :attr:`query`, that - is, they exclude qualities of :term:`soft clipped` bases. This - is equal to ``qual[qstart:qend]``. - - Quality scores are returned as a python array of unsigned - chars. Note that this is not the ASCII-encoded value typically - seen in FASTQ or SAM formatted files. Thus, no offset of 33 - needs to be subtracted. - - This property is read-only. - - """ - def __get__(self): - cdef bam1_t * src - cdef uint32_t start, end - - src = self._delegate - - if src.core.l_qseq == 0: - return None - - start = _getQueryStart(src) - end = _getQueryEnd(src) - - return _getQualitiesRange(src, start, end) - - property query_alignment_start: - """start index of the aligned query portion of the sequence (0-based, - inclusive). - - This the index of the first base in :attr:`seq` that is not - soft-clipped. - - """ - def __get__(self): - return _getQueryStart(self._delegate) - - property query_alignment_end: - """end index of the aligned query portion of the sequence (0-based, - exclusive)""" - def __get__(self): - return _getQueryEnd(self._delegate) - - property query_alignment_length: - """length of the aligned query sequence. - - This is equal to :attr:`qend` - :attr:`qstart`""" - def __get__(self): - cdef bam1_t * src - src = self._delegate - return _getQueryEnd(src) - _getQueryStart(src) - - ##################################################### - # Computed properties - - def get_reference_positions(self, full_length=False): - """a list of reference positions that this read aligns to. - - By default, this method only returns positions in the - reference that are within the alignment. If *full_length* is - set, None values will be included for any soft-clipped or - unaligned positions within the read. The returned list will - thus be of the same length as the read. - - """ - cdef uint32_t k, i, pos - cdef int op - cdef uint32_t * cigar_p - cdef bam1_t * src - cdef bint _full = full_length - - src = self._delegate - if pysam_get_n_cigar(src) == 0: - return [] - - result = [] - pos = src.core.pos - cigar_p = pysam_bam_get_cigar(src) - - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - l = cigar_p[k] >> BAM_CIGAR_SHIFT - - if op == BAM_CSOFT_CLIP or op == BAM_CINS: - if _full: - for i from 0 <= i < l: - result.append(None) - elif op == BAM_CMATCH: - for i from pos <= i < pos + l: - result.append(i) - pos += l - elif op == BAM_CDEL or op == BAM_CREF_SKIP: - pos += l - - return result - - def infer_query_length(self, always=True): - """inferred read length from CIGAR string. - - If *always* is set to True, the read length - will be always inferred. If set to False, the length - of the read sequence will be returned if it is - available. - - Returns None if CIGAR string is not present. - """ - cdef uint32_t k, qpos - cdef int op - cdef uint32_t * cigar_p - cdef bam1_t * src - - src = self._delegate - - if not always and src.core.l_qseq: - return src.core.l_qseq - - if pysam_get_n_cigar(src) == 0: - return None - - qpos = 0 - cigar_p = pysam_bam_get_cigar(src) - - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - - if op == BAM_CMATCH or op == BAM_CINS or \ - op == BAM_CSOFT_CLIP or \ - op == BAM_CEQUAL or op == BAM_CDIFF: - qpos += cigar_p[k] >> BAM_CIGAR_SHIFT - - return qpos - - def get_aligned_pairs(self, matches_only = False): - """a list of aligned read (query) and reference positions. - For inserts, deletions, skipping either query or reference position may be None. - - If @matches_only is True, only matched bases are returned - no None on either side. - - Padding is currently not supported and leads to an exception - - """ - cdef uint32_t k, i, pos, qpos - cdef int op - cdef uint32_t * cigar_p - cdef bam1_t * src - cdef int _matches_only - - _matches_only = bool(matches_only) - - src = self._delegate - if pysam_get_n_cigar(src) == 0: - return [] - - result = [] - pos = src.core.pos - qpos = 0 - cigar_p = pysam_bam_get_cigar(src) - - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - l = cigar_p[k] >> BAM_CIGAR_SHIFT - - if op == BAM_CMATCH or op == BAM_CEQUAL or op == BAM_CDIFF: - for i from pos <= i < pos + l: - result.append((qpos, i)) - qpos += 1 - pos += l - - elif op == BAM_CINS or op == BAM_CSOFT_CLIP: - if not _matches_only: - for i from pos <= i < pos + l: - result.append((qpos, None)) - qpos += 1 - else: - qpos += l - - elif op == BAM_CDEL or op == BAM_CREF_SKIP: - if not _matches_only: - for i from pos <= i < pos + l: - result.append((None, i)) - pos += l - - elif op == BAM_CHARD_CLIP: - pass # advances neither - - elif op == BAM_CPAD: - raise NotImplementedError("Padding (BAM_CPAD, 6) is currently not supported. Please implement. Sorry about that.") - - return result - - def get_blocks(self): - """ a list of start and end positions of - aligned gapless blocks. - - The start and end positions are in genomic - coordinates. - - Blocks are not normalized, i.e. two blocks - might be directly adjacent. This happens if - the two blocks are separated by an insertion - in the read. - """ - - cdef uint32_t k, pos, l - cdef int op - cdef uint32_t * cigar_p - cdef bam1_t * src - - src = self._delegate - if pysam_get_n_cigar(src) == 0: - return [] - - result = [] - pos = src.core.pos - cigar_p = pysam_bam_get_cigar(src) - l = 0 - - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - l = cigar_p[k] >> BAM_CIGAR_SHIFT - if op == BAM_CMATCH: - result.append((pos, pos + l)) - pos += l - elif op == BAM_CDEL or op == BAM_CREF_SKIP: - pos += l - - return result - - def get_overlap(self, uint32_t start, uint32_t end): - """return number of aligned bases of read overlapping the interval - *start* and *end* on the reference sequence. - - Return None if cigar alignment is not available. - """ - cdef uint32_t k, i, pos, overlap - cdef int op, o - cdef uint32_t * cigar_p - cdef bam1_t * src - - overlap = 0 - - src = self._delegate - if pysam_get_n_cigar(src) == 0: - return None - pos = src.core.pos - o = 0 - - cigar_p = pysam_bam_get_cigar(src) - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - l = cigar_p[k] >> BAM_CIGAR_SHIFT - - if op == BAM_CMATCH: - o = min( pos + l, end) - max( pos, start ) - if o > 0: overlap += o - - if op == BAM_CMATCH or op == BAM_CDEL or op == BAM_CREF_SKIP: - pos += l - - return overlap - - ##################################################### - ## Unsorted as yet - # TODO: capture in CIGAR object - property cigartuples: - """the :term:`cigar` alignment. The alignment - is returned as a list of tuples of (operation, length). - - If the alignment is not present, None is returned. - - The operations are: - - +-----+--------------+-----+ - |M |BAM_CMATCH |0 | - +-----+--------------+-----+ - |I |BAM_CINS |1 | - +-----+--------------+-----+ - |D |BAM_CDEL |2 | - +-----+--------------+-----+ - |N |BAM_CREF_SKIP |3 | - +-----+--------------+-----+ - |S |BAM_CSOFT_CLIP|4 | - +-----+--------------+-----+ - |H |BAM_CHARD_CLIP|5 | - +-----+--------------+-----+ - |P |BAM_CPAD |6 | - +-----+--------------+-----+ - |= |BAM_CEQUAL |7 | - +-----+--------------+-----+ - |X |BAM_CDIFF |8 | - +-----+--------------+-----+ - - .. note:: - The output is a list of (operation, length) tuples, such as - ``[(0, 30)]``. - This is different from the SAM specification and - the :attr:`cigarstring` property, which uses a - (length, operation) order, for example: ``30M``. - - To unset the cigar property, assign an empty list - or None. - """ - def __get__(self): - cdef uint32_t * cigar_p - cdef bam1_t * src - cdef uint32_t op, l - cdef int k - - src = self._delegate - if pysam_get_n_cigar(src) == 0: - return None - - cigar = [] - - cigar_p = pysam_bam_get_cigar(src); - for k from 0 <= k < pysam_get_n_cigar(src): - op = cigar_p[k] & BAM_CIGAR_MASK - l = cigar_p[k] >> BAM_CIGAR_SHIFT - cigar.append((op, l)) - return cigar - - def __set__(self, values): - cdef uint32_t * p - cdef bam1_t * src - cdef op, l - cdef int k, ncigar - - k = 0 - - src = self._delegate - - # get location of cigar string - p = pysam_bam_get_cigar(src) - - # empty values for cigar string - if values is None: - values = [] - - ncigar = len(values) - # create space for cigar data within src.data - pysam_bam_update(src, - pysam_get_n_cigar(src) * 4, - ncigar * 4, - p) - - # length is number of cigar operations, not bytes - pysam_set_n_cigar(src, ncigar) - - # re-acquire pointer to location in memory - # as it might have moved - p = pysam_bam_get_cigar(src) - - # insert cigar operations - for op, l in values: - p[k] = l << BAM_CIGAR_SHIFT | op - k += 1 - - ## setting the cigar string requires updating the bin - pysam_set_bin(src, - hts_reg2bin( - src.core.pos, - bam_endpos(src), - 14, - 5)) - - - cpdef set_tag(self, - tag, - value, - value_type=None, - replace=True): - """sets a particular field *tag* to *value* in the optional alignment - section. - - *value_type* describes the type of *value* that is to entered - into the alignment record.. It can be set explicitely to one - of the valid one-letter type codes. If unset, an appropriate - type will be chosen automatically. - - An existing value of the same *tag* will be overwritten unless - replace is set to False. This is usually not recommened as a - tag may only appear once in the optional alignment section. - - If *value* is None, the tag will be deleted. - """ - - cdef int value_size - cdef uint8_t * value_ptr - cdef uint8_t *existing_ptr - cdef uint8_t type_code - cdef float float_value - cdef double double_value - cdef int32_t int_value - cdef bam1_t * src = self._delegate - cdef char * _value_type - - if len(tag) != 2: - raise ValueError('Invalid tag: %s' % tag) - - tag = _forceBytes(tag) - if replace: - existing_ptr = bam_aux_get(src, tag) - if existing_ptr: - bam_aux_del(src, existing_ptr) - - # setting value to None deletes a tag - if value is None: - return - - type_code = _get_value_code(value, value_type) - if type_code == 0: - raise ValueError("can't guess type or invalid type code specified") - - # Not Endian-safe, but then again neither is samtools! - if type_code == 'Z': - value = _forceBytes(value) - value_ptr = value - value_size = len(value)+1 - elif type_code == 'i': - int_value = value - value_ptr = &int_value - value_size = sizeof(int32_t) - elif type_code == 'd': - double_value = value - value_ptr = &double_value - value_size = sizeof(double) - elif type_code == 'f': - float_value = value - value_ptr = &float_value - value_size = sizeof(float) - else: - raise ValueError('Unsupported value_type in set_option') - - - bam_aux_append(src, - tag, - type_code, - value_size, - value_ptr) - - cpdef has_tag(self, tag): - """returns true if the optional alignment section - contains a given *tag*.""" - cdef uint8_t * v - cdef int nvalues - btag = _forceBytes(tag) - v = bam_aux_get(self._delegate, btag) - return v != NULL - - cpdef get_tag(self, tag): - """retrieves data from the optional alignment section - given a two-letter *tag* denoting the field. - - If *tag* is not present, a KeyError is raised. - - The returned value is cast into an appropriate python type. - - This method is the fastest way to access the optional - alignment section if only few tags need to be retrieved. - """ - cdef uint8_t * v - cdef int nvalues - btag = _forceBytes(tag) - v = bam_aux_get(self._delegate, btag) - if v == NULL: - raise KeyError("tag '%s' not present" % tag) - auxtype = chr(v[0]) - if auxtype == 'c' or auxtype == 'C' or auxtype == 's' or auxtype == 'S': - return bam_aux2i(v) - elif auxtype == 'i' or auxtype == 'I': - return bam_aux2i(v) - elif auxtype == 'f' or auxtype == 'F': - return bam_aux2f(v) - elif auxtype == 'd' or auxtype == 'D': - return bam_aux2f(v) - elif auxtype == 'A': - # there might a more efficient way - # to convert a char into a string - return '%c' % bam_aux2A(v) - elif auxtype == 'Z': - return _charptr_to_str(bam_aux2Z(v)) - elif auxtype == 'B': - bytesize, nvalues, values = convertBinaryTagToList(v + 1) - return values - else: - raise ValueError("unknown auxilliary type '%s'" % auxtype) - - def get_tags(self, with_value_type=False): - """the fields in the optional aligment section. - - Returns a list of all fields in the optional - alignment section. Values are converted to appropriate python - values. For example: - - [(NM, 2), (RG, "GJP00TM04")] - - If *with_value_type* is set, the value type as encode in - the AlignedSegment record will be returned as well: - - [(NM, 2, "i"), (RG, "GJP00TM04", "Z")] - - This method will convert all values in the optional alignment - section. When getting only one or few tags, please see - :meth:`get_tag` for a quicker way to achieve this. - - """ - - cdef char * ctag - cdef bam1_t * src - cdef uint8_t * s - cdef char auxtag[3] - cdef char auxtype - cdef uint8_t byte_size - cdef int32_t nvalues - - src = self._delegate - if src.l_data == 0: - return [] - s = pysam_bam_get_aux(src) - result = [] - auxtag[2] = 0 - while s < (src.data + src.l_data): - # get tag - auxtag[0] = s[0] - auxtag[1] = s[1] - s += 2 - auxtype = s[0] - if auxtype in ('c', 'C'): - value = bam_aux2i(s) - s += 1 - elif auxtype in ('s', 'S'): - value = bam_aux2i(s) - s += 2 - elif auxtype in ('i', 'I'): - value = bam_aux2i(s) - s += 4 - elif auxtype == 'f': - value = bam_aux2f(s) - s += 4 - elif auxtype == 'd': - value = bam_aux2f(s) - s += 8 - elif auxtype == 'A': - value = "%c" % bam_aux2A(s) - s += 1 - elif auxtype in ('Z', 'H'): - value = _charptr_to_str(bam_aux2Z(s)) - # +1 for NULL terminated string - s += len(value) + 1 - elif auxtype == 'B': - s += 1 - byte_size, nvalues, value = convertBinaryTagToList(s) - # 5 for 1 char and 1 int - s += 5 + (nvalues * byte_size) - 1 - else: - raise KeyError("unknown type '%s'" % auxtype) - - s += 1 - - result.append((_charptr_to_str(auxtag), value)) - - return result - - def set_tags(self, tags): - """sets the fields in the optional alignmest section with - a list of (tag, value) tuples. - - The :term:`value type` of the values is determined from the - python type. Optionally, a type may be given explicitely as - a third value in the tuple, For example: - - x.set_tags([(NM, 2, "i"), (RG, "GJP00TM04", "Z")] - - This method will not enforce the rule that the same tag may appear - only once in the optional alignment section. - """ - - cdef bam1_t * src - cdef uint8_t * s - cdef char * temp - cdef int new_size = 0 - cdef int old_size - src = self._delegate - - # convert and pack the data - if tags is not None and len(tags) > 0: - fmt, args =_pack_tags(tags) - new_size = struct.calcsize(fmt) - buffer = ctypes.create_string_buffer(new_size) - struct.pack_into(fmt, - buffer, - 0, - *args) - - # delete the old data and allocate new space. - # If total_size == 0, the aux field will be - # empty - old_size = pysam_bam_get_l_aux(src) - pysam_bam_update(src, - old_size, - new_size, - pysam_bam_get_aux(src)) - - # copy data only if there is any - if new_size > 0: - - # get location of new data - s = pysam_bam_get_aux(src) - - # check if there is direct path from buffer.raw to tmp - p = buffer.raw - # create handle to make sure buffer stays alive long - # enough for memcpy, see issue 129 - temp = p - memcpy(s, temp, new_size) - - - ######################################################## - # Compatibility Accessors - # Functions, properties for compatibility with pysam < 0.8 - # - # Several options - # change the factory functions according to API - # * requires code changes throughout, incl passing - # handles to factory functions - # subclass functions and add attributes at runtime - # e.g.: AlignedSegments.qname = AlignedSegments.query_name - # * will slow down the default interface - # explicit declaration of getters/setters - ######################################################## - property qname: - def __get__(self): return self.query_name - def __set__(self, v): self.query_name = v - property tid: - def __get__(self): return self.reference_id - def __set__(self, v): self.reference_id = v - property pos: - def __get__(self): return self.reference_start - def __set__(self, v): self.reference_start = v - property mapq: - def __get__(self): return self.mapping_quality - def __set__(self, v): self.mapping_quality = v - property rnext: - def __get__(self): return self.next_reference_id - def __set__(self, v): self.next_reference_id = v - property pnext: - def __get__(self): - return self.next_reference_start - def __set__(self, v): - self.next_reference_start = v - property cigar: - def __get__(self): - r = self.cigartuples - if r is None: - r = [] - return r - def __set__(self, v): self.cigartuples = v - property tlen: - def __get__(self): - return self.template_length - def __set__(self, v): - self.template_length = v - property seq: - def __get__(self): return self.query_sequence - def __set__(self, v): self.query_sequence = v - property qual: - def __get__(self): - return toQualityString(self.query_qualities) - def __set__(self, v): - self.query_qualities = fromQualityString(v) - property alen: - def __get__(self): - return self.reference_length - def __set__(self, v): - self.reference_length = v - property aend: - def __get__(self): - return self.reference_end - def __set__(self, v): - self.reference_end = v - property rlen: - def __get__(self): - return self.query_length - def __set__(self, v): - self.query_length = v - property query: - def __get__(self): - return self.query_alignment_sequence - def __set__(self, v): - self.query_alignment_sequence = v - property qqual: - def __get__(self): - return toQualityString(self.query_alignment_qualities) - def __set__(self, v): - self.query_alignment_qualities = fromQualityString(v) - property qstart: - def __get__(self): - return self.query_alignment_start - def __set__(self, v): - self.query_alignment_start = v - property qend: - def __get__(self): - return self.query_alignment_end - def __set__(self, v): - self.query_alignment_end = v - property qlen: - def __get__(self): - return self.query_alignment_length - def __set__(self, v): - self.query_alignment_length = v - property mrnm: - def __get__(self): - return self.next_reference_id - def __set__(self, v): - self.next_reference_id = v - property mpos: - def __get__(self): - return self.next_reference_start - def __set__(self, v): - self.next_reference_start = v - property rname: - def __get__(self): - return self.reference_id - def __set__(self, v): - self.reference_id = v - property isize: - def __get__(self): - return self.template_length - def __set__(self, v): - self.template_length = v - property blocks: - def __get__(self): - return self.get_blocks() - property aligned_pairs: - def __get__(self): - return self.get_aligned_pairs() - property inferred_length: - def __get__(self): - return self.infer_query_length() - property positions: - def __get__(self): - return self.get_reference_positions() - property tags: - def __get__(self): - return self.get_tags() - def __set__(self, tags): - self.set_tags(tags) - def overlap(self): - return self.get_overlap() - def opt(self, tag): - return self.get_tag(tag) - def setTag(self, tag, value, value_type=None, replace=True): - return self.set_tag(tag, value, value_type, replace) - - -cdef class PileupColumn: - '''A pileup of reads at a particular reference sequence postion - (:term:`column`). A pileup column contains all the reads that map - to a certain target base. - - This class is a proxy for results returned by the samtools pileup - engine. If the underlying engine iterator advances, the results - of this column will change. - - ''' - def __init__(self): - raise TypeError("this class cannot be instantiated from Python") - - def __str__(self): - return "\t".join(map(str, - (self.reference_id, - self.reference_pos, - self.nsegments))) +\ - "\n" +\ - "\n".join(map(str, self.pileups)) - - property reference_id: - '''the reference sequence number as defined in the header''' - def __get__(self): - return self.tid - - property nsegments: - '''number of reads mapping to this column.''' - def __get__(self): - return self.n_pu - def __set__(self, n): - self.n_pu = n - - property reference_pos: - '''the position in the reference sequence (0-based).''' - def __get__(self): - return self.pos - - property pileups: - '''list of reads (:class:`pysam.PileupRead`) aligned to this column''' - def __get__(self): - cdef int x - pileups = [] - - if self.plp == NULL or self.plp[0] == NULL: - raise ValueError("PileupColumn accessed after iterator finished") - - # warning: there could be problems if self.n and self.buf are - # out of sync. - for x from 0 <= x < self.n_pu: - pileups.append(makePileupRead(&(self.plp[0][x]))) - return pileups - - ######################################################## - # Compatibility Accessors - # Functions, properties for compatibility with pysam < 0.8 - ######################################################## - property pos: - def __get__(self): - return self.reference_pos - def __set__(self, v): - self.reference_pos = v - - property tid: - def __get__(self): - return self.reference_id - def __set__(self, v): - self.reference_id = v - - property n: - def __get__(self): - return self.nsegments - def __set__(self, v): - self.nsegments = v - - -cdef class PileupRead: - '''Representation of a read aligned to a particular position in the - reference sequence. - - ''' - - def __init__(self): - raise TypeError( - "this class cannot be instantiated from Python") - - def __str__(self): - return "\t".join( - map(str, - (self.alignment, self.query_position, - self.indel, self.level, - self.is_del, self.is_head, - self.is_tail, self.is_refskip))) - - property alignment: - """a :class:`pysam.AlignedSegment` object of the aligned read""" - def __get__(self): - return self._alignment - - property query_position: - """position of the read base at the pileup site, 0-based. - None if is_del or is_refskip is set. - - """ - def __get__(self): - if self.is_del or self.is_refskip: - return None - else: - return self._qpos - - property indel: - """indel length; 0 for no indel, positive for ins and negative for del""" - def __get__(self): - return self._indel - - property level: - """the level of the read in the "viewer" mode""" - def __get__(self): - return self._level - - property is_del: - """1 iff the base on the padded read is a deletion""" - def __get__(self): - return self._is_del - - property is_head: - def __get__(self): - return self._is_head - - property is_tail: - def __get__(self): - return self._is_tail - - property is_refskip: - def __get__(self): - return self._is_refskip - - -cdef class SNPCall: - '''the results of a SNP call.''' - cdef int _tid - cdef int _pos - cdef char _reference_base - cdef char _genotype - cdef int _consensus_quality - cdef int _snp_quality - cdef int _rms_mapping_quality - cdef int _coverage - - property tid: - '''the chromosome ID as is defined in the header''' - def __get__(self): - return self._tid - - property pos: - '''nucleotide position of SNP.''' - def __get__(self): return self._pos - - property reference_base: - '''reference base at pos. ``N`` if no reference sequence supplied.''' - def __get__(self): return from_string_and_size( &self._reference_base, 1 ) - - property genotype: - '''the genotype called.''' - def __get__(self): return from_string_and_size( &self._genotype, 1 ) - - property consensus_quality: - '''the genotype quality (Phred-scaled).''' - def __get__(self): return self._consensus_quality - - property snp_quality: - '''the snp quality (Phred scaled) - probability of consensus being - identical to reference sequence.''' - def __get__(self): return self._snp_quality - - property mapping_quality: - '''the root mean square (rms) of the mapping quality of all reads - involved in the call.''' - def __get__(self): return self._rms_mapping_quality - - property coverage: - '''coverage or read depth - the number of reads involved in the call.''' - def __get__(self): return self._coverage - - def __str__(self): - - return "\t".join( map(str, ( - self.tid, - self.pos, - self.reference_base, - self.genotype, - self.consensus_quality, - self.snp_quality, - self.mapping_quality, - self.coverage ) ) ) - - -cdef class IndexedReads: - """index a Sam/BAM-file by query name. - - The index is kept in memory and can be substantial. - - By default, the file is re-openend to avoid conflicts if multiple - operators work on the same file. Set *multiple_iterators* = False - to not re-open *samfile*. - """ - - def __init__(self, AlignmentFile samfile, int multiple_iterators=True): - - # makes sure that samfile stays alive as long as this - # object is alive. - self.samfile = samfile - - assert samfile.is_bam, "can only IndexReads on bam files" - - # multiple_iterators the file - note that this makes the iterator - # slow and causes pileup to slow down significantly. - if multiple_iterators: - self.htsfile = hts_open(samfile._filename, 'r') - assert self.htsfile != NULL - # read header - required for accurate positioning - self.header = sam_hdr_read(self.htsfile) - self.owns_samfile = True - else: - self.htsfile = self.samfile.htsfile - self.header = self.samfile.header - self.owns_samfile = False - - def build(self): - '''build index.''' - - self.index = collections.defaultdict(list) - - # this method will start indexing from the current file - # position if you decide - cdef int ret = 1 - cdef bam1_t * b = calloc(1, sizeof( bam1_t)) - - cdef uint64_t pos + cdef uint64_t pos while ret > 0: - pos = bgzf_tell(hts_get_bgzfp(self.htsfile)) - ret = sam_read1(self.htsfile, - self.samfile.header, - b) + with nogil: + pos = bgzf_tell(hts_get_bgzfp(self.htsfile)) + ret = sam_read1(self.htsfile, + self.samfile.header, + b) if ret > 0: - qname = _charptr_to_str(pysam_bam_get_qname(b)) + qname = charptr_to_str(pysam_bam_get_qname(b)) self.index[qname].append(pos) bam_destroy1(b) def find(self, query_name): - '''find *query_name* in index. + '''find `query_name` in index. - Returns an iterator over all reads with query_name. + Returns + ------- + + IteratorRowSelection + Returns an iterator over all reads with query_name. + + Raises + ------ + + KeyError + if the `query_name` is not in the index. - Raise a KeyError if the *query_name* is not in the index. ''' if query_name in self.index: return IteratorRowSelection( @@ -3962,31 +2428,8 @@ cdef class IndexedReads: hts_close(self.htsfile) bam_hdr_destroy(self.header) -cpdef set_verbosity(int verbosity): - u"""Set htslib's hts_verbose global variable to the specified value. - """ - return hts_set_verbosity(verbosity) - -cpdef get_verbosity(): - u"""Return the value of htslib's hts_verbose global variable. - """ - return hts_get_verbosity() - -__all__ = ["AlignmentFile", - "IteratorRow", - "IteratorColumn", - "AlignedSegment", - "PileupColumn", - "PileupRead", - "IndexedReads", - "toQualityString", - "fromQualityString", - "get_verbosity", - "set_verbosity"] - # "IteratorSNPCalls", - # "SNPCaller", - # "IndelCaller", - # "IteratorIndelCalls", - - - +__all__ = [ + "AlignmentFile", + "IteratorRow", + "IteratorColumn", + "IndexedReads"] diff --git a/pysam/cbcf.pxd b/pysam/cbcf.pxd index 83e628a..b56f7ed 100644 --- a/pysam/cbcf.pxd +++ b/pysam/cbcf.pxd @@ -41,7 +41,7 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from libc.stdlib cimport malloc, calloc, realloc, free from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup -from chtslib cimport * +from pysam.chtslib cimport * cdef class VariantHeader(object): diff --git a/pysam/cbcf.pyx b/pysam/cbcf.pyx index ae274d5..4882503 100644 --- a/pysam/cbcf.pyx +++ b/pysam/cbcf.pyx @@ -76,7 +76,7 @@ # fetch(contig=None, start=None, stop=None, region=None, reopen=False) # subset_samples(include_samples) # -# VariantHeader(mode) # mode='r' for reading, mode='w' for writing +# VariantHeader() # # version: VCF version # samples: sequence-like access to samples @@ -188,7 +188,7 @@ from __future__ import division, print_function import os import sys -from libc.string cimport strcmp +from libc.string cimport strcmp, strpbrk cimport cython @@ -217,62 +217,8 @@ cdef tuple COMPRESSION = ('NONE', 'GZIP', 'BGZF', 'CUSTOM') ## Python 3 compatibility functions ######################################################################## -IS_PYTHON3 = PY_MAJOR_VERSION >= 3 - - -# filename encoding (copied from lxml.etree.pyx) -cdef str FILENAME_ENCODING -FILENAME_ENCODING = sys.getfilesystemencoding() -if FILENAME_ENCODING is None: - FILENAME_ENCODING = sys.getdefaultencoding() -if FILENAME_ENCODING is None: - FILENAME_ENCODING = 'ascii' - - -cdef bytes encode_filename(object filename): - """Make sure a filename is 8-bit encoded (or None).""" - if filename is None: - return None - elif PyBytes_Check(filename): - return filename - elif PyUnicode_Check(filename): - return filename.encode(FILENAME_ENCODING) - else: - raise TypeError('Argument must be string or unicode.') - - -cdef force_str(object s): - """Return s converted to str type of current Python (bytes in Py2, unicode in Py3)""" - if s is None: - return None - if PY_MAJOR_VERSION < 3: - return s - elif PyBytes_Check(s): - return s.decode('ascii') - else: - # assume unicode - return s - - -cdef bytes force_bytes(object s): - """convert string or unicode object to bytes, assuming ascii encoding.""" - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode('ascii') - else: - raise TypeError('Argument must be string, bytes or unicode.') - - -cdef charptr_to_str(const char* s): - if PY_MAJOR_VERSION < 3: - return s - else: - return s.decode('ascii') +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport encode_filename, from_string_and_size ######################################################################## @@ -285,7 +231,7 @@ cdef tuple char_array_to_tuple(const char **a, int n, int free_after=0): if not a: return None try: - return tuple( charptr_to_str(a[i]) for i in range(n) ) + return tuple(charptr_to_str(a[i]) for i in range(n)) finally: if free_after and a: free(a) @@ -371,8 +317,8 @@ cdef object bcf_info_value(const bcf_info_t *z): return value -cdef inline int is_gt_fmt(bcf_hdr_t *h, bcf_fmt_t *fmt): - return strcmp(bcf_hdr_int2id(h, BCF_DT_ID, fmt.id), "GT") == 0 +cdef inline int is_gt_fmt(bcf_hdr_t *hdr, bcf_fmt_t *fmt): + return strcmp(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt.id), "GT") == 0 ######################################################################## @@ -412,25 +358,103 @@ cdef class VariantHeaderRecord(object): return tuple( (r.keys[i] if r.keys[i] else None, r.vals[i] if r.vals[i] else None) for i in range(r.nkeys) ) + def __len__(self): + cdef bcf_hrec_t *r = self.ptr + return r.nkeys + + def __bool__(self): + cdef bcf_hrec_t *r = self.ptr + cdef int i + for i in range(r.nkeys): + yield r.keys[i] + + def __getitem__(self, key): + """get attribute value""" + cdef bcf_hrec_t *r = self.ptr + cdef int i + for i in range(r.nkeys): + if r.keys[i] and r.keys[i] == key: + return r.vals[i] if r.vals[i] else None + raise KeyError('cannot find metadata key') + + def __iter__(self): + cdef bcf_hrec_t *r = self.ptr + cdef int i + for i in range(r.nkeys): + if r.keys[i]: + yield r.keys[i] + + def get(self, key, default=None): + """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" + try: + return self[key] + except KeyError: + return default + + def __contains__(self, key): + try: + self[key] + except KeyError: + return False + else: + return True + + def iterkeys(self): + """D.iterkeys() -> an iterator over the keys of D""" + return iter(self) + + def itervalues(self): + """D.itervalues() -> an iterator over the values of D""" + cdef bcf_hrec_t *r = self.ptr + cdef int i + for i in range(r.nkeys): + if r.keys[i]: + yield r.vals[i] if r.vals[i] else None + + def iteritems(self): + """D.iteritems() -> an iterator over the (key, value) items of D""" + cdef bcf_hrec_t *r = self.ptr + cdef int i + for i in range(r.nkeys): + if r.keys[i]: + yield r.keys[i], r.vals[i] if r.vals[i] else None + + def keys(self): + """D.keys() -> list of D's keys""" + return list(self) + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples""" + return list(self.iteritems()) + + def values(self): + """D.values() -> list of D's values""" + return list(self.itervalues()) + + # Mappings are not hashable by default, but subclasses can change this + __hash__ = None + + #TODO: implement __richcmp__ + def __str__(self): cdef bcf_hrec_t *r = self.ptr if r.type == BCF_HL_GEN: return '##{}={}'.format(self.key, self.value) else: attrs = ','.join('{}={}'.format(k, v) for k,v in self.attrs if k != 'IDX') - return '##{}=<{}>'.format(self.type, attrs) + return '##{}=<{}>'.format(self.key or self.type, attrs) -cdef VariantHeaderRecord makeVariantHeaderRecord(VariantHeader header, bcf_hrec_t *h): +cdef VariantHeaderRecord makeVariantHeaderRecord(VariantHeader header, bcf_hrec_t *hdr): if not header: raise ValueError('invalid VariantHeader') - if not h: + if not hdr: return None cdef VariantHeaderRecord record = VariantHeaderRecord.__new__(VariantHeaderRecord) record.header = header - record.ptr = h + record.ptr = hdr return record @@ -472,8 +496,8 @@ cdef class VariantMetadata(object): property name: """metadata name""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - return h.id[BCF_DT_ID][self.id].key + cdef bcf_hdr_t *hdr = self.header.ptr + return hdr.id[BCF_DT_ID][self.id].key # Q: Should this be exposed? property id: @@ -484,12 +508,12 @@ cdef class VariantMetadata(object): property number: """metadata number (i.e. cardinality)""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - if not bcf_hdr_idinfo_exists(h, self.type, self.id) or self.type == BCF_HL_FLT: + cdef bcf_hdr_t *hdr = self.header.ptr + if not bcf_hdr_idinfo_exists(hdr, self.type, self.id) or self.type == BCF_HL_FLT: return None - cdef int l = bcf_hdr_id2length(h, self.type, self.id) + cdef int l = bcf_hdr_id2length(hdr, self.type, self.id) if l == BCF_VL_FIXED: - return bcf_hdr_id2number(h, self.type, self.id) + return bcf_hdr_id2number(hdr, self.type, self.id) elif l == BCF_VL_VAR: return '.' else: @@ -498,18 +522,26 @@ cdef class VariantMetadata(object): property type: """metadata value type""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - if not bcf_hdr_idinfo_exists(h, self.type, self.id) or self.type == BCF_HL_FLT: + cdef bcf_hdr_t *hdr = self.header.ptr + if not bcf_hdr_idinfo_exists(hdr, self.type, self.id) or self.type == BCF_HL_FLT: return None - return VALUE_TYPES[bcf_hdr_id2type(h, self.type, self.id)] + return VALUE_TYPES[bcf_hdr_id2type(hdr, self.type, self.id)] - property header: + property description: + """metadata description (or None if not set)""" + def __get__(self): + descr = self.record.get('Description') + if descr: + descr = descr.strip('"') + return descr + + property record: """:class:`VariantHeaderRecord` associated with this :class:`VariantMetadata` object""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - if not bcf_hdr_idinfo_exists(h, self.type, self.id): + cdef bcf_hdr_t *hdr = self.header.ptr + if not bcf_hdr_idinfo_exists(hdr, self.type, self.id): return None - cdef bcf_hrec_t *hrec = h.id[BCF_DT_ID][self.id].val.hrec[self.type] + cdef bcf_hrec_t *hrec = hdr.id[BCF_DT_ID][self.id].val.hrec[self.type] if not hrec: return None return makeVariantHeaderRecord(self.header, hrec) @@ -536,33 +568,56 @@ cdef VariantMetadata makeVariantMetadata(VariantHeader header, int type, int id) cdef class VariantHeaderMetadata(object): """mapping from filter, info or format name to :class:`VariantMetadata` object""" + def add(self, id, number, type, description, **kwargs): + """Add a new filter, info or format record""" + if id in self: + raise ValueError('Header already exists for id={}'.format(id)) + + if self.type == BCF_HL_FLT: + if number is not None: + raise ValueError('Number must be None when adding a filter') + if type is not None: + raise ValueError('Type must be None when adding a filter') + + items = [('ID', id), ('Description', description)] + else: + if type not in VALUE_TYPES: + raise ValueError('unknown type specified: {}'.format(type)) + if number is None: + number = '.' + + items = [('ID', id), ('Number', number), ('Type', type), ('Description', description)] + + items += kwargs.items() + self.header.add_meta(METADATA_TYPES[self.type], items=items) + def __len__(self): - cdef bcf_hdr_t *h = self.header.ptr + cdef bcf_hdr_t *hdr = self.header.ptr cdef bcf_idpair_t *idpair cdef int32_t i, n = 0 - for i in range(h.n[BCF_DT_ID]): - idpair = h.id[BCF_DT_ID] + i + for i in range(hdr.n[BCF_DT_ID]): + idpair = hdr.id[BCF_DT_ID] + i if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF: n += 1 return n def __bool__(self): - cdef bcf_hdr_t *h = self.header.ptr + cdef bcf_hdr_t *hdr = self.header.ptr cdef bcf_idpair_t *idpair cdef int32_t i - for i in range(h.n[BCF_DT_ID]): - idpair = h.id[BCF_DT_ID] + i + for i in range(hdr.n[BCF_DT_ID]): + idpair = hdr.id[BCF_DT_ID] + i if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF: return True return False def __getitem__(self, key): - cdef bcf_hdr_t *h = self.header.ptr - cdef vdict_t *d = h.dict[BCF_DT_ID] + cdef bcf_hdr_t *hdr = self.header.ptr + cdef vdict_t *d = hdr.dict[BCF_DT_ID] cdef khiter_t k = kh_get_vdict(d, key) if k == kh_end(d) or kh_val_vdict(d, k).info[self.type] & 0xF == 0xF: @@ -571,12 +626,12 @@ cdef class VariantHeaderMetadata(object): return makeVariantMetadata(self.header, self.type, kh_val_vdict(d, k).id) def __iter__(self): - cdef bcf_hdr_t *h = self.header.ptr + cdef bcf_hdr_t *hdr = self.header.ptr cdef bcf_idpair_t *idpair cdef int32_t i - for i in range(h.n[BCF_DT_ID]): - idpair = h.id[BCF_DT_ID] + i + for i in range(hdr.n[BCF_DT_ID]): + idpair = hdr.id[BCF_DT_ID] + i if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF: yield idpair.key @@ -644,8 +699,8 @@ cdef class VariantContig(object): property name: """contig name""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - return h.id[BCF_DT_CTG][self.id].key + cdef bcf_hdr_t *hdr = self.header.ptr + return hdr.id[BCF_DT_CTG][self.id].key property id: """contig internal id number""" @@ -655,15 +710,15 @@ cdef class VariantContig(object): property length: """contig length or None if not available""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - cdef uint32_t length = h.id[BCF_DT_CTG][self.id].val.info[0] + cdef bcf_hdr_t *hdr = self.header.ptr + cdef uint32_t length = hdr.id[BCF_DT_CTG][self.id].val.info[0] return length if length else None property header: """:class:`VariantHeaderRecord` associated with this :class:`VariantContig` object""" def __get__(self): - cdef bcf_hdr_t *h = self.header.ptr - cdef bcf_hrec_t *hrec = h.id[BCF_DT_CTG][self.id].val.hrec[0] + cdef bcf_hdr_t *hdr = self.header.ptr + cdef bcf_hrec_t *hrec = hdr.id[BCF_DT_CTG][self.id].val.hrec[0] return makeVariantHeaderRecord(self.header, hrec) @@ -685,26 +740,26 @@ cdef class VariantHeaderContigs(object): """mapping from contig name or index to :class:`VariantContig` object.""" def __len__(self): - cdef bcf_hdr_t *h = self.header.ptr - assert kh_size(h.dict[BCF_DT_CTG]) == h.n[BCF_DT_CTG] - return h.n[BCF_DT_CTG] + cdef bcf_hdr_t *hdr = self.header.ptr + assert kh_size(hdr.dict[BCF_DT_CTG]) == hdr.n[BCF_DT_CTG] + return hdr.n[BCF_DT_CTG] def __bool__(self): - cdef bcf_hdr_t *h = self.header.ptr - assert kh_size(h.dict[BCF_DT_CTG]) == h.n[BCF_DT_CTG] - return h.n[BCF_DT_CTG] != 0 + cdef bcf_hdr_t *hdr = self.header.ptr + assert kh_size(hdr.dict[BCF_DT_CTG]) == hdr.n[BCF_DT_CTG] + return hdr.n[BCF_DT_CTG] != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.header.ptr + cdef bcf_hdr_t *hdr = self.header.ptr cdef int index if isinstance(key, int): index = key - if index < 0 or index >= h.n[BCF_DT_CTG]: + if index < 0 or index >= hdr.n[BCF_DT_CTG]: raise IndexError('invalid contig index') return makeVariantContig(self.header, index) - cdef vdict_t *d = h.dict[BCF_DT_CTG] + cdef vdict_t *d = hdr.dict[BCF_DT_CTG] cdef khiter_t k = kh_get_vdict(d, key) if k == kh_end(d): @@ -715,14 +770,14 @@ cdef class VariantHeaderContigs(object): return makeVariantContig(self.header, id) def __iter__(self): - cdef bcf_hdr_t *h = self.header.ptr - cdef vdict_t *d = h.dict[BCF_DT_CTG] + cdef bcf_hdr_t *hdr = self.header.ptr + cdef vdict_t *d = hdr.dict[BCF_DT_CTG] cdef uint32_t n = kh_size(d) - assert n == h.n[BCF_DT_CTG] + assert n == hdr.n[BCF_DT_CTG] for i in range(n): - yield bcf_hdr_id2name(h, i) + yield bcf_hdr_id2name(hdr, i) def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -770,6 +825,14 @@ cdef class VariantHeaderContigs(object): #TODO: implement __richcmp__ + def add(self, id, **kwargs): + """Add a new contig record""" + if id in self: + raise ValueError('Header already exists for contig {}'.format(id)) + + items = [('ID', id)] + kwargs.items() + self.header.add_meta('contig', items=items) + cdef VariantHeaderContigs makeVariantHeaderContigs(VariantHeader header): if not header: @@ -791,26 +854,25 @@ cdef class VariantHeaderSamples(object): return bcf_hdr_nsamples(self.header.ptr) != 0 def __getitem__(self, index): - cdef bcf_hdr_t *h = self.header.ptr - cdef int32_t n = bcf_hdr_nsamples(h) + cdef bcf_hdr_t *hdr = self.header.ptr + cdef int32_t n = bcf_hdr_nsamples(hdr) cdef int32_t i = index if i < 0 or i >= n: raise IndexError('invalid sample index') - return h.samples[i] + return hdr.samples[i] def __iter__(self): - cdef bcf_hdr_t *h = self.header.ptr - cdef int32_t n = bcf_hdr_nsamples(h) - cdef int32_t i + cdef bcf_hdr_t *hdr = self.header.ptr + cdef int32_t i, n = bcf_hdr_nsamples(hdr) for i in range(n): - yield h.samples[i] + yield hdr.samples[i] def __contains__(self, key): - cdef bcf_hdr_t *h = self.header.ptr - cdef vdict_t *d = h.dict[BCF_DT_SAMPLE] + cdef bcf_hdr_t *hdr = self.header.ptr + cdef vdict_t *d = hdr.dict[BCF_DT_SAMPLE] cdef khiter_t k = kh_get_vdict(d, key) return k != kh_end(d) @@ -820,6 +882,10 @@ cdef class VariantHeaderSamples(object): #TODO: implement __richcmp__ + def add(self, name): + """Add a new sample""" + self.header.add_sample(name) + cdef VariantHeaderSamples makeVariantHeaderSamples(VariantHeader header): if not header: @@ -839,17 +905,12 @@ cdef class VariantHeader(object): #FIXME: Add mutable methods # See makeVariantHeader for C constructor - def __cinit__(self, mode): + def __cinit__(self): self.ptr = NULL # Python constructor - def __init__(self, mode): - if mode not in 'rw': - raise ValueError("invalid header mode specified '{}'".format(mode)) - - mode = force_bytes(mode) - self.ptr = bcf_hdr_init(mode) - + def __init__(self): + self.ptr = bcf_hdr_init(b'w') if not self.ptr: raise ValueError('cannot create VariantHeader') @@ -900,6 +961,18 @@ cdef class VariantHeader(object): def __get__(self): return makeVariantHeaderMetadata(self, BCF_HL_FMT) + property alts: + """ + alt metadata (:class:`dict` ID->record). The data returned just a snapshot of alt records, + is created every time the property is requested, and modifications will not be reflected + in the header metadata and vice versa. + + i.e. it is just a dict that reflects the state of alt records at the time it is created. + """ + def __get__(self): + return { record['ID']:record for record in self.records if record.key.upper() == 'ALT' } + + # only safe to do when opening an htsfile cdef _subset_samples(self, include_samples): keep_samples = set(self.samples) @@ -926,13 +999,67 @@ cdef class VariantHeader(object): free(hstr) return force_str(hstr) + def add_record(self, VariantHeaderRecord record): + """Add an existing :class:`VariantHeaderRecord` to this header""" + cdef bcf_hrec_t *r = record.ptr + + if r.type == BCF_HL_GEN: + self.add_meta(r.key, r.value) + else: + items = [(k,v) for k,v in record.attrs if k != 'IDX'] + self.add_meta(r.key, items=items) + + def add_line(self, line): + """Add a metadata line to this header""" + if bcf_hdr_append(self.ptr, line) < 0: + raise ValueError('invalid header line') -cdef VariantHeader makeVariantHeader(bcf_hdr_t *h): - if not h: + if self.ptr.dirty: + bcf_hdr_sync(self.ptr) + + def add_meta(self, key, value=None, items=None): + """Add metadata to this header""" + if not ((value is not None) ^ (items is not None)): + raise ValueError('either value or items must be specified') + + cdef bcf_hrec_t *hrec = calloc(1, sizeof(bcf_hrec_t)) + cdef int quoted + + try: + hrec.key = strdup(key) + + if value is not None: + hrec.value = strdup(value) + else: + for key, value in items: + bcf_hrec_add_key(hrec, key, len(key)) + + value = str(value) + quoted = strpbrk(value, ' ;,"\t<>') != NULL + bcf_hrec_set_val(hrec, hrec.nkeys-1, value, len(value), quoted) + except: + bcf_hrec_destroy(hrec) + raise + + bcf_hdr_add_hrec(self.ptr, hrec) + + if self.ptr.dirty: + bcf_hdr_sync(self.ptr) + + def add_sample(self, name): + """Add a new sample to this header""" + if bcf_hdr_add_sample(self.ptr, name) < 0: + raise ValueError('Duplicated sample name: {}'.format(name)) + if self.ptr.dirty: + bcf_hdr_sync(self.ptr) + + +cdef VariantHeader makeVariantHeader(bcf_hdr_t *hdr): + if not hdr: raise ValueError('cannot create VariantHeader') - cdef VariantHeader header = VariantHeader.__new__(VariantHeader, None) - header.ptr = h + cdef VariantHeader header = VariantHeader.__new__(VariantHeader) + header.ptr = hdr return header @@ -952,7 +1079,7 @@ cdef class VariantRecordFilter(object): return self.record.ptr.d.n_flt != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef int index, id cdef int n = r.d.n_flt @@ -968,20 +1095,20 @@ cdef class VariantRecordFilter(object): if key == '.': key = 'PASS' - id = bcf_hdr_id2int(h, BCF_DT_ID, key) + id = bcf_hdr_id2int(hdr, BCF_DT_ID, key) - if not bcf_hdr_idinfo_exists(h, BCF_HL_FLT, id) or not bcf_has_filter(h, self.record.ptr, key): + if not bcf_hdr_idinfo_exists(hdr, BCF_HL_FLT, id) or not bcf_has_filter(hdr, self.record.ptr, key): raise KeyError('Invalid filter') return makeVariantMetadata(self.record.header, BCF_HL_FLT, id) def __iter__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef int i, n = r.d.n_flt for i in range(n): - yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.flt[i]) + yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.flt[i]) def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -991,9 +1118,9 @@ cdef class VariantRecordFilter(object): return default def __contains__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - return bcf_has_filter(h, r, key) == 1 + return bcf_has_filter(hdr, r, key) == 1 def iterkeys(self): """D.iterkeys() -> an iterator over the keys of D""" @@ -1047,7 +1174,7 @@ cdef class VariantRecordFormat(object): return self.record.ptr.n_fmt != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef bcf_fmt_t *fmt cdef int index @@ -1059,19 +1186,19 @@ cdef class VariantRecordFormat(object): raise IndexError('invalid format index') fmt = &r.d.fmt[index] else: - fmt = bcf_get_fmt(h, r, key) + fmt = bcf_get_fmt(hdr, r, key) if not fmt: raise KeyError('unknown format') return makeVariantMetadata(self.record.header, BCF_HL_FMT, fmt.id) def __iter__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef int i, n = r.n_fmt for i in range(n): - yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.fmt[i].id) + yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.fmt[i].id) def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -1081,9 +1208,9 @@ cdef class VariantRecordFormat(object): return default def __contains__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef bcf_fmt_t *fmt = bcf_get_fmt(h, r, key) + cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, key) return fmt != NULL def iterkeys(self): @@ -1139,9 +1266,9 @@ cdef class VariantRecordInfo(object): return self.record.ptr.n_info != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef bcf_info_t *info = bcf_get_info(h, r, key) + cdef bcf_info_t *info = bcf_get_info(hdr, r, key) if not info: raise KeyError('Unknown INFO field: {}'.format(key)) @@ -1149,12 +1276,12 @@ cdef class VariantRecordInfo(object): return bcf_info_value(info) def __iter__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef int i, n = r.n_info for i in range(n): - yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.info[i].key) + yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.info[i].key) def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -1164,9 +1291,9 @@ cdef class VariantRecordInfo(object): return default def __contains__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef bcf_info_t *info = bcf_get_info(h, r, key) + cdef bcf_info_t *info = bcf_get_info(hdr, r, key) return info != NULL @@ -1186,14 +1313,14 @@ cdef class VariantRecordInfo(object): def iteritems(self): """D.iteritems() -> an iterator over the (key, value) items of D""" - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef bcf_info_t *info cdef int i, n = r.n_info for i in range(n): info = &r.d.info[i] - key = bcf_hdr_int2id(h, BCF_DT_ID, info.key) + key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key) value = bcf_info_value(info) yield key, value @@ -1226,7 +1353,7 @@ cdef VariantRecordInfo makeVariantRecordInfo(VariantRecord record): cdef class VariantRecordSamples(object): - """mapping from sample index or name to :class:`makeVariantRecordSample` object.""" + """mapping from sample index or name to :class:`VariantRecordSample` object.""" def __len__(self): return bcf_hdr_nsamples(self.record.header.ptr) @@ -1235,9 +1362,9 @@ cdef class VariantRecordSamples(object): return bcf_hdr_nsamples(self.record.header.ptr) != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int n = bcf_hdr_nsamples(h) + cdef int n = bcf_hdr_nsamples(hdr) cdef int sample_index cdef vdict_t *d cdef khiter_t k @@ -1245,7 +1372,7 @@ cdef class VariantRecordSamples(object): if isinstance(key, int): sample_index = key else: - sample_index = bcf_hdr_id2int(h, BCF_DT_SAMPLE, key) + sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, key) if sample_index < 0: raise KeyError('invalid sample name') @@ -1255,12 +1382,12 @@ cdef class VariantRecordSamples(object): return makeVariantRecordSample(self.record, sample_index) def __iter__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t i, n = bcf_hdr_nsamples(h) + cdef int32_t i, n = bcf_hdr_nsamples(hdr) for i in range(n): - yield h.samples[i] + yield hdr.samples[i] def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -1270,9 +1397,9 @@ cdef class VariantRecordSamples(object): return default def __contains__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int n = bcf_hdr_nsamples(h) + cdef int n = bcf_hdr_nsamples(hdr) cdef int sample_index cdef vdict_t *d cdef khiter_t k @@ -1280,7 +1407,7 @@ cdef class VariantRecordSamples(object): if isinstance(key, int): sample_index = key else: - sample_index = bcf_hdr_id2int(h, BCF_DT_SAMPLE, key) + sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, key) if sample_index < 0: raise KeyError('invalid sample name') @@ -1292,21 +1419,21 @@ cdef class VariantRecordSamples(object): def itervalues(self): """D.itervalues() -> an iterator over the values of D""" - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t i, n = bcf_hdr_nsamples(h) + cdef int32_t i, n = bcf_hdr_nsamples(hdr) for i in range(n): yield makeVariantRecordSample(self.record, i) def iteritems(self): """D.iteritems() -> an iterator over the (key, value) items of D""" - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t i, n = bcf_hdr_nsamples(h) + cdef int32_t i, n = bcf_hdr_nsamples(hdr) for i in range(n): - yield h.samples[i], makeVariantRecordSample(self.record, i) + yield hdr.samples[i], makeVariantRecordSample(self.record, i) def keys(self): """D.keys() -> list of D's keys""" @@ -1330,10 +1457,10 @@ cdef VariantRecordSamples makeVariantRecordSamples(VariantRecord record): if not record: raise ValueError('invalid VariantRecord') - cdef VariantRecordSamples genos = VariantRecordSamples.__new__(VariantRecordSamples) - genos.record = record + cdef VariantRecordSamples samples = VariantRecordSamples.__new__(VariantRecordSamples) + samples.record = record - return genos + return samples cdef class VariantRecord(object): @@ -1348,41 +1475,82 @@ cdef class VariantRecord(object): """internal reference id number""" def __get__(self): return self.ptr.rid + def __set__(self, rid): + cdef bcf_hdr_t *hdr = self.header.ptr + cdef int r = rid + if rid < 0 or r >= hdr.n[BCF_DT_CTG] or not hdr.id[BCF_DT_CTG][r].val: + raise ValueError('invalid reference id') + self.ptr.rid = r property chrom: """chromosome/contig name""" def __get__(self): return bcf_hdr_id2name(self.header.ptr, self.ptr.rid) + def __set__(self, chrom): + cdef vdict_t *d = self.header.ptr.dict[BCF_DT_CTG] + cdef khint_t k = kh_get_vdict(d, chrom) + if k == kh_end(d): + raise ValueError('Invalid chromosome/contig') + self.ptr.rid = kh_val_vdict(d, k).id property contig: """chromosome/contig name""" def __get__(self): return bcf_hdr_id2name(self.header.ptr, self.ptr.rid) + def __set__(self, chrom): + cdef vdict_t *d = self.header.ptr.dict[BCF_DT_CTG] + cdef khint_t k = kh_get_vdict(d, chrom) + if k == kh_end(d): + raise ValueError('Invalid chromosome/contig') + self.ptr.rid = kh_val_vdict(d, k).id property pos: """record start position on chrom/contig (1-based inclusive)""" def __get__(self): return self.ptr.pos + 1 + def __set__(self, pos): + if pos < 1: + raise ValueError('Position must be positive') + # FIXME: check start <= stop? + self.ptr.pos = pos - 1 property start: """record start position on chrom/contig (0-based inclusive)""" def __get__(self): return self.ptr.pos + def __set__(self, start): + if start < 0: + raise ValueError('Start coordinate must be non-negative') + # FIXME: check start <= stop? + self.ptr.pos = start property stop: """record stop position on chrom/contig (0-based exclusive)""" def __get__(self): return self.ptr.pos + self.ptr.rlen + def __set__(self, stop): + if stop < self.ptr.pos: + raise ValueError('Stop coordinate must be greater than or equal to start') + self.ptr.rlen = stop - self.ptr.pos property rlen: - """record length on chrom/contig (rec.stop - rec.start)""" + """record length on chrom/contig (typically rec.stop - rec.start unless END info is supplied)""" def __get__(self): return self.ptr.rlen + def __set__(self, rlen): + if rlen < 0: + raise ValueError('Reference length must be non-negative') + self.ptr.rlen = rlen property qual: """phred scaled quality score or None if not available""" def __get__(self): return self.ptr.qual if not bcf_float_is_missing(self.ptr.qual) else None + def __set__(self, qual): + if qual is not None: + self.ptr.qual = qual + else: + memcpy(&self.ptr.qual, &bcf_float_missing, 4) # property n_info: # def __get__(self): @@ -1423,6 +1591,12 @@ cdef class VariantRecord(object): raise ValueError('Error unpacking VariantRecord') id = self.ptr.d.id return id if id != b'.' else None + def __set__(self, id): + cdef char *idstr = NULL + if id is not None: + idstr = id + if bcf_update_id(self.header.ptr, self.ptr, idstr) < 0: + raise ValueError('Error updating id') property ref: """reference allele""" @@ -1430,6 +1604,10 @@ cdef class VariantRecord(object): if bcf_unpack(self.ptr, BCF_UN_STR) < 0: raise ValueError('Error unpacking VariantRecord') return self.ptr.d.allele[0] if self.ptr.d.allele else None + def __set__(self, ref): + alleles = list(self.alleles) + alleles[0] = ref + self.alleles = alleles property alleles: """tuple of reference allele followed by alt alleles""" @@ -1439,6 +1617,12 @@ cdef class VariantRecord(object): if not self.ptr.d.allele: return None return tuple(self.ptr.d.allele[i] for i in range(self.ptr.n_allele)) + def __set__(self, values): + if bcf_unpack(self.ptr, BCF_UN_STR) < 0: + raise ValueError('Error unpacking VariantRecord') + values = ','.join(values) + if bcf_update_alleles_str(self.header.ptr, self.ptr, values) < 0: + raise ValueError('Error updating alleles') property alts: """tuple of alt alleles""" @@ -1448,6 +1632,10 @@ cdef class VariantRecord(object): if self.ptr.n_allele < 2 or not self.ptr.d.allele: return None return tuple(self.ptr.d.allele[i] for i in range(1,self.ptr.n_allele)) + def __set__(self, alts): + alleles = [self.ref] + alleles.extend(alts) + self.alleles = alleles property filter: """filter information (see :class:`VariantRecordFilter`)""" @@ -1533,27 +1721,27 @@ cdef class VariantRecordSample(object): property name: """sample name""" def __get__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t n = bcf_hdr_nsamples(h) + cdef int32_t n = bcf_hdr_nsamples(hdr) if self.index < 0 or self.index >= n: raise ValueError('invalid sample index') - return h.samples[self.index] + return hdr.samples[self.index] property allele_indices: """allele indices for called genotype, if present. Otherwise None""" def __get__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t n = bcf_hdr_nsamples(h) + cdef int32_t n = bcf_hdr_nsamples(hdr) if self.index < 0 or self.index >= n or not r.n_fmt: return None cdef bcf_fmt_t *fmt0 = r.d.fmt - cdef int gt0 = is_gt_fmt(h, fmt0) + cdef int gt0 = is_gt_fmt(hdr, fmt0) if not gt0 or not fmt0.n: return None @@ -1587,16 +1775,16 @@ cdef class VariantRecordSample(object): property alleles: """alleles for called genotype, if present. Otherwise None""" def __get__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef int32_t nsamples = bcf_hdr_nsamples(h) + cdef int32_t nsamples = bcf_hdr_nsamples(hdr) cdef int32_t nalleles = r.n_allele if self.index < 0 or self.index >= nsamples or not r.n_fmt: return None cdef bcf_fmt_t *fmt0 = r.d.fmt - cdef int gt0 = is_gt_fmt(h, fmt0) + cdef int gt0 = is_gt_fmt(hdr, fmt0) if not gt0 or not fmt0.n: return None @@ -1631,6 +1819,55 @@ cdef class VariantRecordSample(object): return tuple(alleles) + property phased: + """False if genotype is missing or any allele is unphased. Otherwise True.""" + def __get__(self): + cdef bcf_hdr_t *hdr = self.record.header.ptr + cdef bcf1_t *r = self.record.ptr + cdef int32_t n = bcf_hdr_nsamples(hdr) + + if self.index < 0 or self.index >= n or not r.n_fmt: + return False + + cdef bcf_fmt_t *fmt0 = r.d.fmt + cdef int gt0 = is_gt_fmt(hdr, fmt0) + + if not gt0 or not fmt0.n: + return False + + cdef int8_t *data8 + cdef int16_t *data16 + cdef int32_t *data32 + + phased = False + + if fmt0.type == BCF_BT_INT8: + data8 = (fmt0.p + self.index * fmt0.size) + for i in range(fmt0.n): + if data8[i] == bcf_int8_vector_end: + break + if i and data8[i] & 1 == 0: + return False + phased = True + elif fmt0.type == BCF_BT_INT16: + data16 = (fmt0.p + self.index * fmt0.size) + for i in range(fmt0.n): + if data16[i] == bcf_int16_vector_end: + break + if i and data16[i] & 1 == 0: + return False + phased = True + elif fmt0.type == BCF_BT_INT32: + data32 = (fmt0.p + self.index * fmt0.size) + for i in range(fmt0.n): + if data32[i] == bcf_int32_vector_end: + break + if i and data32[i] & 1 == 0: + return False + phased = True + + return phased + def __len__(self): return self.record.ptr.n_fmt @@ -1638,7 +1875,7 @@ cdef class VariantRecordSample(object): return self.record.ptr.n_fmt != 0 def __getitem__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef bcf_fmt_t *fmt cdef int index @@ -1649,12 +1886,12 @@ cdef class VariantRecordSample(object): raise IndexError('invalid format index') fmt = r.d.fmt + index else: - fmt = bcf_get_fmt(h, r, key) + fmt = bcf_get_fmt(hdr, r, key) if not fmt: raise KeyError('invalid format requested') - if is_gt_fmt(h, fmt): + if is_gt_fmt(hdr, fmt): return self.alleles elif fmt.p and fmt.n and fmt.size: return bcf_array_to_object(fmt.p + self.index * fmt.size, fmt.type, fmt.n, scalar=1) @@ -1662,12 +1899,12 @@ cdef class VariantRecordSample(object): return None def __iter__(self): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr cdef int i, n = r.n_fmt for i in range(n): - yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.fmt[i].id) + yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.fmt[i].id) def get(self, key, default=None): """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" @@ -1677,9 +1914,9 @@ cdef class VariantRecordSample(object): return default def __contains__(self, key): - cdef bcf_hdr_t *h = self.record.header.ptr + cdef bcf_hdr_t *hdr = self.record.header.ptr cdef bcf1_t *r = self.record.ptr - cdef bcf_fmt_t *fmt = bcf_get_fmt(h, r, key) + cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, key) return fmt != NULL def iterkeys(self): @@ -1905,6 +2142,8 @@ cdef class BCFIterator(BaseIterator): raise ValueError('bcf index required') cdef BCFIndex index = bcf.index + cdef int rid, cstart, cstop + cdef char *cregion if not index: raise ValueError('bcf index required') @@ -1916,7 +2155,9 @@ cdef class BCFIterator(BaseIterator): if contig is not None or start is not None or stop is not None: raise ValueError # FIXME - self.iter = bcf_itr_querys(index.ptr, bcf.header.ptr, region) + cregion = region + with nogil: + self.iter = bcf_itr_querys(index.ptr, bcf.header.ptr, cregion) else: if contig is None: raise ValueError # FIXME @@ -1928,7 +2169,10 @@ cdef class BCFIterator(BaseIterator): if stop is None: stop = MAX_POS - self.iter = bcf_itr_queryi(index.ptr, rid, start, stop) + cstart, cstop = start, stop + + with nogil: + self.iter = bcf_itr_queryi(index.ptr, rid, cstart, cstop) # Do not fail on self.iter == NULL, since it signifies a null query. @@ -1953,7 +2197,10 @@ cdef class BCFIterator(BaseIterator): if self.bcf.drop_samples: record.max_unpack = BCF_UN_SHR - cdef int ret = bcf_itr_next(self.bcf.htsfile, self.iter, record) + cdef int ret + + with nogil: + ret = bcf_itr_next(self.bcf.htsfile, self.iter, record) if ret < 0: _stop_BCFIterator(self, record) @@ -2031,7 +2278,10 @@ cdef class TabixIterator(BaseIterator): if not self.iter: raise StopIteration - cdef int ret = tbx_itr_next(self.bcf.htsfile, self.index.ptr, self.iter, &self.line_buffer) + cdef int ret + + with nogil: + ret = tbx_itr_next(self.bcf.htsfile, self.index.ptr, self.iter, &self.line_buffer) if ret < 0: tbx_itr_destroy(self.iter) @@ -2191,7 +2441,8 @@ cdef class VariantFile(object): if self.drop_samples: record.max_unpack = BCF_UN_SHR - ret = bcf_read1(self.htsfile, self.header.ptr, record) + with nogil: + ret = bcf_read1(self.htsfile, self.header.ptr, record) if ret < 0: bcf_destroy1(record) @@ -2209,9 +2460,13 @@ cdef class VariantFile(object): raise ValueError cdef VariantFile vars = VariantFile.__new__(VariantFile) + cdef bcf_hdr_t *hdr + cdef char *cfilename, *cmode # FIXME: re-open using fd or else header and index could be invalid - vars.htsfile = hts_open(self.filename, self.mode) + cfilename, cmode = self.filename, self.mode + with nogil: + vars.htsfile = hts_open(cfilename, cmode) if not vars.htsfile: raise ValueError('Cannot re-open htsfile') @@ -2232,7 +2487,9 @@ cdef class VariantFile(object): if self.htsfile.is_bin: vars.seek(self.tell()) else: - makeVariantHeader(bcf_hdr_read(vars.htsfile)) + with nogil: + hdr = bcf_hdr_read(vars.htsfile) + makeVariantHeader(hdr) return vars @@ -2242,6 +2499,11 @@ cdef class VariantFile(object): If open is called on an existing VariantFile, the current file will be closed and a new file will be opened. """ + cdef bcf_hdr_t *hdr + cdef hts_idx_t *idx + cdef tbx_t *tidx + cdef char *cfilename, *cmode + # close a previously opened file if self.is_open: self.close() @@ -2285,33 +2547,47 @@ cdef class VariantFile(object): # open file. Header gets written to file at the same time for bam files # and sam files (in the latter case, the mode needs to be wh) - self.htsfile = hts_open(filename, mode) + cfilename, cmode = filename, mode + with nogil: + self.htsfile = hts_open(cfilename, cmode) if not self.htsfile: raise ValueError("could not open file `{}` (mode='{}')".format((filename, mode))) - bcf_hdr_write(self.htsfile, self.header.ptr) + with nogil: + bcf_hdr_write(self.htsfile, self.header.ptr) elif mode[0] == b'r': # open file for reading if filename != b'-' and not self.is_remote and not os.path.exists(filename): raise IOError('file `{}` not found'.format(filename)) - self.htsfile = hts_open(filename, mode) + cfilename, cmode = filename, mode + with nogil: + self.htsfile = hts_open(cfilename, cmode) if not self.htsfile: raise ValueError("could not open file `{}` (mode='{}') - is it VCF/BCF format?".format((filename, mode))) - self.header = makeVariantHeader(bcf_hdr_read(self.htsfile)) + with nogil: + hdr = bcf_hdr_read(self.htsfile) + self.header = makeVariantHeader(hdr) if not self.header: raise ValueError("file `{}` does not have valid header (mode='{}') - is it BCF format?".format((filename, mode))) # check for index and open if present if self.htsfile.format.format == bcf: - self.index = makeBCFIndex(self.header, bcf_index_load(filename)) + cfilename = filename + with nogil: + idx = bcf_index_load(cfilename) + self.index = makeBCFIndex(self.header, idx) else: - self.index = makeTabixIndex(tbx_index_load(filename + '.tbi')) + tabix_filename = filename + '.tbi' + cfilename = tabix_filename + with nogil: + tidx = tbx_index_load(cfilename) + self.index = makeTabixIndex(tidx) if not self.is_stream: self.start_offset = self.tell() @@ -2327,10 +2603,15 @@ cdef class VariantFile(object): if self.is_stream: raise OSError('seek not available in streams') + cdef int ret if self.htsfile.format.compression != no_compression: - return bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET) + with nogil: + ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET) else: - return hts_useek(self.htsfile, offset, SEEK_SET) + with nogil: + ret = hts_useek(self.htsfile, offset, SEEK_SET) + return ret + def tell(self): """return current file position, see :meth:`pysam.VariantFile.seek`.""" @@ -2339,10 +2620,14 @@ cdef class VariantFile(object): if self.is_stream: raise OSError('tell not available in streams') + cdef int ret if self.htsfile.format.compression != no_compression: - return bgzf_tell(hts_get_bgzfp(self.htsfile)) + with nogil: + ret = bgzf_tell(hts_get_bgzfp(self.htsfile)) else: - return hts_utell(self.htsfile) + with nogil: + ret = hts_utell(self.htsfile) + return ret def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False): """fetch records in a :term:`region` using 0-based indexing. The @@ -2391,7 +2676,10 @@ cdef class VariantFile(object): if not self.is_open: return 0 - cdef int ret = bcf_write1(self.htsfile, self.header.ptr, record.ptr) + cdef int ret + + with nogil: + ret = bcf_write1(self.htsfile, self.header.ptr, record.ptr) if ret < 0: raise ValueError('write failed') diff --git a/pysam/cfaidx.pxd b/pysam/cfaidx.pxd index b7926df..34e825e 100644 --- a/pysam/cfaidx.pxd +++ b/pysam/cfaidx.pxd @@ -3,13 +3,39 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t from libc.stdlib cimport malloc, calloc, realloc, free from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup from libc.stdio cimport FILE, printf +cimport cython -from chtslib cimport faidx_t, kseq_t, gzFile - -cdef extern from "htslib/kstring.h" nogil: - ctypedef struct kstring_t: - size_t l, m - char *s +from cpython cimport array +from pysam.chtslib cimport faidx_t, gzFile, kstring_t + +# These functions are put here and not in chtslib.pxd in order +# to avoid warnings for unused functions. +cdef extern from "pysam_stream.h" nogil: + + ctypedef struct kstream_t: + pass + + ctypedef struct kseq_t: + kstring_t name + kstring_t comment + kstring_t seq + kstring_t qual + + gzFile gzopen(char *, char *) + kseq_t *kseq_init(gzFile) + int kseq_read(kseq_t *) + void kseq_destroy(kseq_t *) + int gzclose(gzFile) + + kstream_t *ks_init(gzFile) + void ks_destroy(kstream_t *) + + # Retrieve characters from stream until delimiter + # is reached placing results in str. + int ks_getuntil(kstream_t *, + int delimiter, + kstring_t * str, + int * dret) cdef class FastaFile: cdef object _filename, _references, _lengths, reference2length @@ -20,23 +46,35 @@ cdef class FastaFile: cdef class FastqProxy: cdef kseq_t * _delegate + cdef cython.str tostring(self) + cpdef array.array get_quality_array(self, int offset=*) + + +cdef class PersistentFastqProxy: + """ + Python container for pysam.cfaidx.FastqProxy with persistence. + """ + cdef public str comment, quality, sequence, name + cdef cython.str tostring(self) + cpdef array.array get_quality_array(self, int offset=*) cdef class FastxFile: cdef object _filename cdef gzFile fastqfile cdef kseq_t * entry + cdef bint persist cdef kseq_t * getCurrent(self) cdef int cnext(self) + # Compatibility Layer for pysam 0.8.1 cdef class FastqFile(FastxFile): pass + # Compatibility Layer for pysam < 0.8 cdef class Fastafile(FastaFile): pass -cdef class Fastqfile(FastxFile): - pass diff --git a/pysam/cfaidx.pyx b/pysam/cfaidx.pyx index 5338299..a1dc488 100644 --- a/pysam/cfaidx.pyx +++ b/pysam/cfaidx.pyx @@ -1,16 +1,53 @@ -# cython: embedsignature=True + # cython: embedsignature=True # cython: profile=True -# adds doc-strings for sphinx +############################################################################### +############################################################################### +# Cython wrapper for SAM/BAM/CRAM files based on htslib +############################################################################### +# The principal classes defined in this module are: +# +# class FastaFile random read read/write access to faidx indexd files +# class FastxFile streamed read/write access to fasta/fastq files +# +# Additionally this module defines several additional classes that are part +# of the internal API. These are: +# +# class FastqProxy +# class PersistentFastqProxy +# +# For backwards compatibility, the following classes are also defined: +# +# class Fastafile equivalent to FastaFile +# class FastqFile equivalent to FastxFile +# +############################################################################### +# +# The MIT License +# +# Copyright (c) 2015 Andreas Heger +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### import sys import os - -cdef class FastqProxy -cdef makeFastqProxy(kseq_t * src): - '''enter src into AlignedRead.''' - cdef FastqProxy dest = FastqProxy.__new__(FastqProxy) - dest._delegate = src - return dest - +from cpython cimport array from cpython cimport PyErr_SetString, \ PyBytes_Check, \ @@ -19,68 +56,58 @@ from cpython cimport PyErr_SetString, \ from cpython.version cimport PY_MAJOR_VERSION -from chtslib cimport \ +from pysam.chtslib cimport \ faidx_nseq, fai_load, fai_destroy, fai_fetch, \ - faidx_fetch_seq, gzopen, gzclose, \ - kseq_init, kseq_destroy, kseq_read - + faidx_seq_len, \ + faidx_fetch_seq, gzopen, gzclose -######################################################################## -######################################################################## -######################################################################## -## Python 3 compatibility functions -######################################################################## -IS_PYTHON3 = PY_MAJOR_VERSION >= 3 +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport encode_filename, from_string_and_size +from pysam.cutils cimport qualitystring_to_array, parse_region -# filename encoding (copied from lxml.etree.pyx) -cdef str _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' - -#cdef char* _C_FILENAME_ENCODING -#_C_FILENAME_ENCODING = _FILENAME_ENCODING - -cdef bytes _encodeFilename(object filename): - """Make sure a filename is 8-bit encoded (or None).""" - if filename is None: - return None - elif PyBytes_Check(filename): - return filename - elif PyUnicode_Check(filename): - return filename.encode(_FILENAME_ENCODING) - else: - raise TypeError, u"Argument must be string or unicode." - - - -##################################################################### -# hard-coded constants -cdef int max_pos = 2 << 29 +cdef class FastqProxy +cdef makeFastqProxy(kseq_t * src): + '''enter src into AlignedRead.''' + cdef FastqProxy dest = FastqProxy.__new__(FastqProxy) + dest._delegate = src + return dest ## TODO: ## add automatic indexing. ## add function to get sequence names. cdef class FastaFile: - '''*(filename)* + """Random access to fasta formatted files that + have been indexed by :term:`faidx`. + + The file is automatically opened. The index file of file + ```` is expected to be called ``.fai``. + + Parameters + ---------- + + filename : string + Filename of fasta file to be opened. - A *FASTA* file. The file is automatically opened. + Raises + ------ + + ValueError + if index file is missing - This class expects an indexed fasta file and permits - random access to fasta sequences. - ''' + IOError + if file could not be opened - def __cinit__(self, *args, **kwargs ): + """ + + def __cinit__(self, *args, **kwargs): self.fastafile = NULL self._filename = None self._references = None self._lengths = None self.reference2length = None - self._open( *args, **kwargs ) + self._open(*args, **kwargs) - def _isOpen( self ): + def is_open(self): '''return true if samfile has been opened.''' return self.fastafile != NULL @@ -97,15 +124,18 @@ cdef class FastaFile: ''' # close a previously opened file - if self.fastafile != NULL: self.close() - self._filename = _encodeFilename(filename) - self.fastafile = fai_load(self._filename) + if self.fastafile != NULL: + self.close() + self._filename = encode_filename(filename) + cdef char *cfilename = self._filename + with nogil: + self.fastafile = fai_load(cfilename) if self.fastafile == NULL: raise IOError("could not open file `%s`" % filename) # read index - if not os.path.exists( self._filename + b".fai" ): + if not os.path.exists(self._filename + b".fai"): raise ValueError("could not locate index file") with open( self._filename + b".fai" ) as inf: @@ -114,16 +144,32 @@ cdef class FastaFile: self._lengths = tuple(int(x[1]) for x in data) self.reference2length = dict(zip(self._references, self._lengths)) - def close( self ): + def close(self): + """close the file.""" if self.fastafile != NULL: - fai_destroy( self.fastafile ) + fai_destroy(self.fastafile) self.fastafile = NULL def __dealloc__(self): self.close() + # context manager interface + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + return False + + property closed: + """"bool indicating the current state of the file object. + This is a read-only attribute; the close() method changes the value. + """ + def __get__(self): + return not self.is_open() + property filename: - '''filename associated with this object.''' + """filename associated with this object. This is a read-only attribute.""" def __get__(self): return self._filename @@ -133,12 +179,13 @@ cdef class FastaFile: return self._references property nreferences: - '''number of :term:`reference` sequences in the file.''' + """"int with the number of :term:`reference` sequences in the file. + This is a read-only attribute.""" def __get__(self): return len(self._references) if self.references else None property lengths: - '''tuple with the lengths of :term:`reference` sequences.''' + """tuple with the lengths of :term:`reference` sequences.""" def __get__(self): return self._lengths @@ -147,84 +194,88 @@ cdef class FastaFile: start=None, end=None, region=None): + """fetch sequences in a :term:`region`. - '''*(reference = None, start = None, end = None, region = None)* + A region can + either be specified by :term:`reference`, `start` and + `end`. `start` and `end` denote 0-based, half-open + intervals. - fetch sequences in a :term:`region` using 0-based indexing. + Alternatively, a samtools :term:`region` string can be + supplied. + + If any of the coordinates are missing they will be replaced by the + minimum (`start`) or maximum (`end`) coordinate. - The region is specified by :term:`reference`, *start* and *end*. + Note that region strings are 1-based, while `start` and `end` denote + an interval in python coordinates. + The region is specified by :term:`reference`, `start` and `end`. + + Returns + ------- - fetch returns an empty string if the region is out of range or - addresses an unknown *reference*. + string : a string with the sequence specified by the region. - If *reference* is given and *start* is None, the sequence from the - first base is returned. Similarly, if *end* is None, the sequence - until the last base is returned. + Raises + ------ - Alternatively, a samtools :term:`region` string can be supplied. - ''' + IndexError + if the coordinates are out of range + + ValueError + if the region is invalid - if not self._isOpen(): - raise ValueError( "I/O operation on closed file" ) + """ + + if not self.is_open(): + raise ValueError("I/O operation on closed file" ) cdef int length - cdef char * seq - - if not region: - if reference is None: - raise ValueError('no sequence/region supplied.') - if start is None: - start = 0 - if end is None: - end = max_pos - 1 - - if start > end: - raise ValueError( - 'invalid region: start (%i) > end (%i)' % (start, end)) - if start == end: - return b"" - # valid ranges are from 0 to 2^29-1 - if not 0 <= start < max_pos: - raise IndexError('start out of range (%i)' % start) - if not 0 <= end < max_pos: - raise IndexError('end out of range (%i)' % end) - # note: faidx_fetch_seq has a bug such that out-of-range access - # always returns the last residue. Hence do not use faidx_fetch_seq, - # but use fai_fetch instead - # seq = faidx_fetch_seq(self.fastafile, - # reference, - # start, - # end-1, - # &length) - region = "%s:%i-%i" % (reference, start+1, end) - if PY_MAJOR_VERSION >= 3: - region = region.encode('ascii') - seq = fai_fetch( self.fastafile, - region, - &length ) - else: - # samtools adds a '\0' at the end - seq = fai_fetch( self.fastafile, region, &length ) + cdef char *seq + cdef char *ref + cdef int rstart, rend + + reference, rstart, rend = parse_region(reference, start, end, region) + + if reference is None: + raise ValueError("no sequence/region supplied.") + + if rstart == rend: + return "" + + ref = reference + length = faidx_seq_len(self.fastafile, ref) + if length == -1: + raise KeyError("sequence '%s' not present" % reference) + if rstart >= length: + return "" + + # fai_fetch adds a '\0' at the end + with nogil: + seq = faidx_fetch_seq(self.fastafile, + ref, + rstart, + rend-1, + &length) - # copy to python if seq == NULL: - return b"" - else: - try: - py_seq = seq[:length] - finally: - free(seq) + raise ValueError( + "failure when retrieving sequence on '%s'" % reference) - return py_seq + try: + return charptr_to_str(seq) + finally: + free(seq) - cdef char * _fetch( self, char * reference, int start, int end, int * length ): + cdef char * _fetch(self, char * reference, int start, int end, int * length): '''fetch sequence for reference, start and end''' - return faidx_fetch_seq(self.fastafile, - reference, - start, - end-1, - length ) + with nogil: + return faidx_fetch_seq(self.fastafile, + reference, + start, + end-1, + length) def get_reference_length(self, reference): '''return the length of reference.''' @@ -243,80 +294,176 @@ cdef class FastqProxy: property name: def __get__(self): - return self._delegate.name.s + return charptr_to_str(self._delegate.name.s) property sequence: def __get__(self): - return self._delegate.seq.s + return charptr_to_str(self._delegate.seq.s) property comment: def __get__(self): if self._delegate.comment.l: - return self._delegate.comment.s - else: return None + return charptr_to_str(self._delegate.comment.s) + else: + return None property quality: def __get__(self): if self._delegate.qual.l: - return self._delegate.qual.s - else: return None + return charptr_to_str(self._delegate.qual.s) + else: + return None + + cdef cython.str tostring(self): + if self.comment is None: + comment = "" + else: + comment = " %s" % self.comment + + if self.quality is None: + return ">%s%s\n%s" % (self.name, comment, self.sequence) + else: + return "@%s%s\n%s\n+\n%s" % (self.name, comment, + self.sequence, self.quality) + + def __str__(self): + return self.tostring() + + cpdef array.array get_quality_array(self, int offset=33): + '''return quality values as array after subtracting offset.''' + if self.quality is None: + return None + return qualitystring_to_array(force_bytes(self.quality), + offset=offset) + +cdef class PersistentFastqProxy: + """ + Python container for pysam.cfaidx.FastqProxy with persistence. + Needed to compare multiple fastq records from the same file. + """ + def __init__(self, FastqProxy FastqRead): + self.comment = FastqRead.comment + self.quality = FastqRead.quality + self.sequence = FastqRead.sequence + self.name = FastqRead.name + + cdef cython.str tostring(self): + if self.comment is None: + comment = "" + else: + comment = " %s" % self.comment + + if self.quality is None: + return ">%s%s\n%s" % (self.name, comment, self.sequence) + else: + return "@%s%s\n%s\n+\n%s" % (self.name, comment, + self.sequence, self.quality) + + def __str__(self): + return self.tostring() + + cpdef array.array get_quality_array(self, int offset=33): + '''return quality values as array after subtracting offset.''' + if self.quality is None: + return None + return qualitystring_to_array(force_bytes(self.quality), + offset=offset) cdef class FastxFile: - '''*(filename)* + """Stream access to :term:`fasta` or :term:`fastq` formatted files. + + The file is automatically opened. - A :term:`fastq` or :term:`fasta` formatted file. The file - is automatically opened. + Entries in the file can be both fastq or fasta formatted or even a + mixture of the two. - Entries in the file can be both fastq or fasta formatted - or even a mixture of the two. + This file object permits iterating over all entries in the + file. Random access is not implemented. The iteration returns + objects of type :class:`FastqProxy` - This file object permits iterating over all entries in - the file. Random access is not implemented. The iteration - returns objects of type :class:`FastqProxy` + Parameters + ---------- - ''' + filename : string + Filename of fasta/fastq file to be opened. + + persist : bool + + If True (default) make a copy of the entry in the file during + iteration. If set to False, no copy will be made. This will + permit faster iteration, but an entry will not persist when + the iteration continues. + + Raises + ------ + + IOError + if file could not be opened + + """ def __cinit__(self, *args, **kwargs): # self.fastqfile = NULL self._filename = None self.entry = NULL self._open(*args, **kwargs) - def _isOpen( self ): + def is_open(self): '''return true if samfile has been opened.''' return self.entry != NULL - def _open(self, filename): - '''open a fastq/fasta file. + def _open(self, filename, persist=True): + '''open a fastq/fasta file in *filename* + + Paramentes + ---------- + + persist : bool + + if True return a copy of the underlying data (default + True). The copy will persist even if the iteration + on the file continues. + ''' self.close() if not os.path.exists(filename): raise IOError("no such file or directory: %s" % filename) - filename = _encodeFilename(filename) - self.fastqfile = gzopen(filename, "r") - self.entry = kseq_init(self.fastqfile) + self.persist = persist + + filename = encode_filename(filename) + cdef char *cfilename = filename + with nogil: + self.fastqfile = gzopen(cfilename, "r") + self.entry = kseq_init(self.fastqfile) self._filename = filename - def close( self ): - '''close file.''' + def close(self): + '''close the file.''' if self.entry != NULL: gzclose(self.fastqfile) if self.entry: kseq_destroy(self.entry) self.entry = NULL - + def __dealloc__(self): self.close() + property closed: + """"bool indicating the current state of the file object. + This is a read-only attribute; the close() method changes the value. + """ + def __get__(self): + return not self.is_open() + property filename: - '''filename associated with this object.''' + """string with the filename associated with this object.""" def __get__(self): return self._filename def __iter__(self): - if not self._isOpen(): + if not self.is_open(): raise ValueError("I/O operation on closed file") return self @@ -326,15 +473,19 @@ cdef class FastxFile: cdef int cnext(self): '''C version of iterator ''' - return kseq_read(self.entry) + with nogil: + return kseq_read(self.entry) def __next__(self): """ python version of next(). """ cdef int l - l = kseq_read(self.entry) + with nogil: + l = kseq_read(self.entry) if (l > 0): + if self.persist: + return PersistentFastqProxy(makeFastqProxy(self.entry)) return makeFastqProxy(self.entry) else: raise StopIteration @@ -347,10 +498,9 @@ cdef class FastqFile(FastxFile): cdef class Fastafile(FastaFile): pass -cdef class Fastqfile(FastxFile): - pass - __all__ = ["FastaFile", "FastqFile", - "Fastafile", - "Fastqfile"] + "FastxFile", + "Fastafile"] + + diff --git a/pysam/chtslib.pxd b/pysam/chtslib.pxd index d714072..299e84a 100644 --- a/pysam/chtslib.pxd +++ b/pysam/chtslib.pxd @@ -6,7 +6,6 @@ from libc.stdio cimport FILE, printf from posix.types cimport off_t cdef extern from "Python.h": - long _Py_HashPointer(void*) FILE* PyFile_AsFile(object) @@ -23,6 +22,7 @@ cdef extern from "zlib.h" nogil: char * gzgets(gzFile file, char *buf, int len) int gzeof(gzFile file) + cdef extern from "htslib/kstring.h" nogil: ctypedef struct kstring_t: size_t l, m @@ -363,7 +363,7 @@ cdef extern from "htslib/hts.h" nogil: hFILE *hfile void *voidp - ctypedef enum htsFormatCategory: + cdef enum htsFormatCategory: unknown_category sequence_data # Sequence data -- SAM, BAM, CRAM, etc variant_data # Variant calling data -- VCF, BCF, etc @@ -371,14 +371,14 @@ cdef extern from "htslib/hts.h" nogil: region_list # Coordinate intervals or regions -- BED, etc category_maximum - ctypedef enum htsExactFormat: + cdef enum htsExactFormat: unknown_format binary_format text_format sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed format_maximum - ctypedef enum htsCompression: + cdef enum htsCompression: no_compression, gzip, bgzf, custom compression_maximum @@ -951,35 +951,7 @@ cdef extern from "htslib/sam.h" nogil: # ctypedef bam_pileup1_t * const_bam_pileup1_t_ptr "const bam_pileup1_t *" -cdef extern from "pysam_stream.h" nogil: - - ctypedef struct kstream_t: - pass - - ctypedef struct kseq_t: - kstring_t name - kstring_t comment - kstring_t seq - kstring_t qual - - gzFile gzopen(char *, char *) - kseq_t *kseq_init(gzFile) - int kseq_read(kseq_t *) - void kseq_destroy(kseq_t *) - int gzclose(gzFile) - - kstream_t *ks_init(gzFile) - void ks_destroy(kstream_t *) - - # Retrieve characters from stream until delimiter - # is reached placing results in str. - int ks_getuntil(kstream_t *, - int delimiter, - kstring_t * str, - int * dret) - - -cdef extern from "htslib/faidx.h": +cdef extern from "htslib/faidx.h" nogil: ctypedef struct faidx_t: pass @@ -1702,3 +1674,8 @@ cdef extern from "htslib/vcf.h" nogil: int bcf_itr_next(htsFile *fp, hts_itr_t *iter, void *r) hts_idx_t *bcf_index_load(const char *fn) const char **bcf_index_seqnames(const hts_idx_t *idx, const bcf_hdr_t *hdr, int *nptr) + +cdef extern from "htslib_util.h": + + int hts_set_verbosity(int verbosity) + int hts_get_verbosity() diff --git a/pysam/chtslib.pyx b/pysam/chtslib.pyx index 2f91396..eab229f 100644 --- a/pysam/chtslib.pyx +++ b/pysam/chtslib.pyx @@ -1,94 +1,19 @@ # cython: embedsignature=True # cython: profile=True # adds doc-strings for sphinx -import tempfile -import os -import sys -import types -import itertools -import struct -import ctypes -import collections -import re -import platform -import warnings -from cpython cimport PyErr_SetString, \ - PyBytes_Check, \ - PyUnicode_Check, \ - PyBytes_FromStringAndSize +from pysam.chtslib cimport * -from cpython.version cimport PY_MAJOR_VERSION - -######################################################################## -######################################################################## -######################################################################## -## Python 3 compatibility functions -######################################################################## -IS_PYTHON3 = PY_MAJOR_VERSION >= 3 - -cdef from_string_and_size(char* s, size_t length): - if PY_MAJOR_VERSION < 3: - return s[:length] - else: - return s[:length].decode("ascii") - -# filename encoding (copied from lxml.etree.pyx) -cdef str _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' - -#cdef char* _C_FILENAME_ENCODING -#_C_FILENAME_ENCODING = _FILENAME_ENCODING - -cdef bytes _encodeFilename(object filename): - """Make sure a filename is 8-bit encoded (or None).""" - if filename is None: - return None - elif PyBytes_Check(filename): - return filename - elif PyUnicode_Check(filename): - return filename.encode(_FILENAME_ENCODING) - else: - raise TypeError, u"Argument must be string or unicode." - -cdef _forceStr(object s): - """Return s converted to str type of current Python - (bytes in Py2, unicode in Py3)""" - if s is None: - return None - if PY_MAJOR_VERSION < 3: - return s - elif PyBytes_Check(s): - return s.decode('ascii') - else: - # assume unicode - return s - -cdef bytes _forceBytes(object s): - u"""convert string or unicode object to bytes, assuming ascii encoding. +cpdef set_verbosity(int verbosity): + u"""Set htslib's hts_verbose global variable to the specified value. """ - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode('ascii') - else: - raise TypeError, u"Argument must be string, bytes or unicode." + return hts_set_verbosity(verbosity) -cdef inline bytes _forceCmdlineBytes(object s): - return _forceBytes(s) - -cdef _charptr_to_str(char* s): - if PY_MAJOR_VERSION < 3: - return s - else: - return s.decode("ascii") +cpdef get_verbosity(): + u"""Return the value of htslib's hts_verbose global variable. + """ + return hts_get_verbosity() -__all__ = [] +__all__ = [ + "get_verbosity", + "set_verbosity"] diff --git a/pysam/csamfile.pxd b/pysam/csamfile.pxd index f027b29..a76a599 100644 --- a/pysam/csamfile.pxd +++ b/pysam/csamfile.pxd @@ -1,10 +1,10 @@ -from calignmentfile cimport AlignedSegment, AlignmentFile +from pysam.calignmentfile cimport AlignedSegment, AlignmentFile ################################################# # Compatibility Layer for pysam < 0.8 # import all declarations from htslib -from chtslib cimport * +from pysam.chtslib cimport * cdef class AlignedRead(AlignedSegment): pass diff --git a/pysam/csamfile.pyx b/pysam/csamfile.pyx index b76ce89..ed9d79b 100644 --- a/pysam/csamfile.pyx +++ b/pysam/csamfile.pyx @@ -19,7 +19,7 @@ from cpython cimport PyErr_SetString, \ from cpython.version cimport PY_MAJOR_VERSION -from calignmentfile cimport AlignmentFile, AlignedSegment +from pysam.calignmentfile cimport AlignmentFile, AlignedSegment cdef class Samfile(AlignmentFile): diff --git a/pysam/csamtools.pxd b/pysam/csamtools.pxd index 10c89ba..53e04ea 100644 --- a/pysam/csamtools.pxd +++ b/pysam/csamtools.pxd @@ -1,19 +1,8 @@ from libc.stdlib cimport calloc, free cdef extern from "pysam_util.h": + int pysam_dispatch(int argc, char *argv[]) void pysam_set_stderr(int fd) void pysam_unset_stderr() - -cdef extern from "sam.h": - - ctypedef struct bam1_t - - # functions not actually declared in sam.h, but available - # as extern - # - # implemented in samtools/bam_md.c - int bam_prob_realn(bam1_t *b, char *ref) - int bam_cap_mapQ(bam1_t *b, char *ref, int thres) - diff --git a/pysam/csamtools.pyx b/pysam/csamtools.pyx index 7a3dd1f..c49f668 100644 --- a/pysam/csamtools.pyx +++ b/pysam/csamtools.pyx @@ -4,35 +4,8 @@ import tempfile import os import sys -import platform -from cpython cimport PyBytes_Check, PyUnicode_Check -from cpython.version cimport PY_MAJOR_VERSION - -######################################################################## -######################################################################## -######################################################################## -## Python 3 compatibility functions -######################################################################## -IS_PYTHON3 = PY_MAJOR_VERSION >= 3 - -cdef bytes _forceBytes(object s): - u"""convert string or unicode object to bytes, assuming ascii encoding. - """ - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode('ascii') - else: - raise TypeError, u"Argument must be string, bytes or unicode." - - -cdef inline bytes _forceCmdlineBytes(object s): - return _forceBytes(s) +from pysam.cutils cimport force_bytes, force_cmdline_bytes class Outs: '''http://mail.python.org/pipermail/python-list/2000-June/038406.html''' @@ -71,8 +44,8 @@ class Outs: def _samtools_dispatch(method, - args = (), - catch_stdout = True): + args=(), + catch_stdout=True): '''call ``method`` in samtools providing arguments in args. .. note:: @@ -105,12 +78,12 @@ def _samtools_dispatch(method, # redirect stderr and stdout to file stderr_h, stderr_f = tempfile.mkstemp() pysam_set_stderr(stderr_h) - + if catch_stdout: stdout_h, stdout_f = tempfile.mkstemp() try: - stdout_save = Outs( sys.stdout.fileno() ) - stdout_save.setfd( stdout_h ) + stdout_save = Outs(sys.stdout.fileno()) + stdout_save.setfd(stdout_h) except AttributeError: # stdout has already been redirected catch_stdout = False @@ -121,53 +94,53 @@ def _samtools_dispatch(method, if method == "view": if "-o" in args: raise ValueError("option -o is forbidden in samtools view") - args = ( "-o", stdout_f ) + args + args = ("-o", stdout_f) + args # do the function call to samtools cdef char ** cargs cdef int i, n, retval n = len(args) - method = _forceCmdlineBytes(method) - args = [ _forceCmdlineBytes(a) for a in args ] + method = force_cmdline_bytes(method) + args = [force_cmdline_bytes(a) for a in args ] # allocate two more for first (dummy) argument (contains command) - cargs = calloc( n+2, sizeof( char *) ) + cargs = calloc(n + 2, sizeof(char *)) cargs[0] = "samtools" cargs[1] = method - for i from 0 <= i < n: cargs[i+2] = args[i] + for i from 0 <= i < n: + cargs[i + 2] = args[i] retval = pysam_dispatch(n+2, cargs) - free( cargs ) + free(cargs) # restore stdout/stderr. This will also flush, so # needs to be before reading back the file contents if catch_stdout: stdout_save.restore() try: - with open( stdout_f, "r") as inf: + with open(stdout_f, "r") as inf: out_stdout = inf.readlines() except UnicodeDecodeError: with open( stdout_f, "rb") as inf: # read binary output out_stdout = inf.read() - os.remove( stdout_f ) + os.remove(stdout_f) else: out_stdout = [] # get error messages pysam_unset_stderr() + out_stderr = [] try: - with open( stderr_f, "r") as inf: + with open(stderr_f, "r") as inf: out_stderr = inf.readlines() except UnicodeDecodeError: with open( stderr_f, "rb") as inf: # read binary output out_stderr = inf.read() - else: - out_stderr = [] finally: - os.remove( stderr_f ) + os.remove(stderr_f) return retval, out_stderr, out_stdout diff --git a/pysam/ctabix.pxd b/pysam/ctabix.pxd index 31bf7c1..2d7c546 100644 --- a/pysam/ctabix.pxd +++ b/pysam/ctabix.pxd @@ -13,8 +13,38 @@ cdef extern from "unistd.h" nogil: ssize_t read(int fd, void *buf, size_t count) int close(int fd) -from chtslib cimport hts_idx_t, hts_itr_t, htsFile, \ - kstream_t, kstring_t, gzFile, tbx_t +from pysam.chtslib cimport hts_idx_t, hts_itr_t, htsFile, \ + gzFile, tbx_t, kstring_t + +# These functions are put here and not in chtslib.pxd in order +# to avoid warnings for unused functions. +cdef extern from "pysam_stream.h" nogil: + + ctypedef struct kstream_t: + pass + + ctypedef struct kseq_t: + kstring_t name + kstring_t comment + kstring_t seq + kstring_t qual + + gzFile gzopen(char *, char *) + kseq_t *kseq_init(gzFile) + int kseq_read(kseq_t *) + void kseq_destroy(kseq_t *) + int gzclose(gzFile) + + kstream_t *ks_init(gzFile) + void ks_destroy(kstream_t *) + + # Retrieve characters from stream until delimiter + # is reached placing results in str. + int ks_getuntil(kstream_t *, + int delimiter, + kstring_t * str, + int * dret) + cdef class tabix_file_iterator: cdef gzFile fh @@ -45,11 +75,6 @@ cdef class TabixFile: cdef encoding -########################################### -# used by cvcf.pyx -cdef _force_str(object s, encoding=?) - -########################################### cdef class Parser: cdef encoding diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx index 056c21e..58d0ffb 100644 --- a/pysam/ctabix.pyx +++ b/pysam/ctabix.pyx @@ -1,5 +1,57 @@ # cython: embedsignature=True -# adds doc-strings for sphinx +# cython: profile=True +############################################################################### +############################################################################### +# Cython wrapper for access to tabix indexed files in bgzf format +############################################################################### +# The principal classes and functions defined in this module are: +# +# class TabixFile class wrapping tabix indexed files in bgzf format +# +# class asTuple Parser class for tuples +# class asGT Parser class for GTF formatted rows +# class asBed Parser class for Bed formatted rows +# class asVCF Parser class for VCF formatted rows +# +# class tabix_generic_iterator Streamed iterator of bgzf formatted files +# +# Additionally this module defines several additional classes that are part +# of the internal API. These are: +# +# class Parser base class for parsers of tab-separated rows +# class tabix_file_iterator +# class TabixIterator iterator class over rows in bgzf file +# class EmptyIterator +# +# For backwards compatibility, the following classes are also defined: +# +# class Tabixfile equivalent to TabixFile +# +############################################################################### +# +# The MIT License +# +# Copyright (c) 2015 Andreas Heger +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### import os import sys @@ -14,73 +66,16 @@ from cpython cimport PyErr_SetString, PyBytes_Check, \ from cpython.version cimport PY_MAJOR_VERSION -cimport TabProxies +cimport pysam.ctabixproxies as ctabixproxies -from chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\ - BGZF, bgzf_open, bgzf_close, bgzf_write, \ - ks_init, ks_destroy, gzFile, ks_getuntil, kstring_t, \ +from pysam.chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\ + BGZF, bgzf_open, bgzf_close, bgzf_write, gzFile, \ tbx_index_build, tbx_index_load, tbx_itr_queryi, tbx_itr_querys, \ tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \ tbx_destroy, gzopen, gzclose, gzerror, gzdopen -PYTHON3 = PY_MAJOR_VERSION >= 3 - -# filename encoding (copied from lxml.etree.pyx) -cdef str _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' - -#cdef char* _C_FILENAME_ENCODING -#_C_FILENAME_ENCODING = _FILENAME_ENCODING - -cdef inline bytes _encodeFilename(object filename): - u"""Make sure a filename is 8-bit encoded (or None). - """ - if filename is None: - return None - elif PyBytes_Check(filename): - return filename - elif PyUnicode_Check(filename): - return filename.encode(_FILENAME_ENCODING) - else: - raise TypeError, u"Argument must be string or unicode." - -cdef inline bytes _force_bytes(object s, encoding="ascii"): - u"""convert string or unicode object to bytes, assuming ascii encoding. - """ - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode(encoding) - else: - raise TypeError, u"Argument must be string, bytes or unicode." - -cdef inline _charptr_to_str(char* s, encoding="ascii"): - if PY_MAJOR_VERSION < 3: - return s - else: - return s.decode(encoding) - -cdef _force_str(object s, encoding="ascii"): - """Return s converted to str type of current Python - (bytes in Py2, unicode in Py3)""" - if s is None: - return None - if PY_MAJOR_VERSION < 3: - return s - elif PyBytes_Check(s): - return s.decode(encoding) - else: - # assume unicode - return s - +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport encode_filename, from_string_and_size cdef class Parser: @@ -107,8 +102,8 @@ cdef class asTuple(Parser): A field in a row is accessed by numeric index. ''' cdef parse(self, char * buffer, int len): - cdef TabProxies.TupleProxy r - r = TabProxies.TupleProxy(self.encoding) + cdef ctabixproxies.TupleProxy r + r = ctabixproxies.TupleProxy(self.encoding) # need to copy - there were some # persistence issues with "present" r.copy(buffer, len) @@ -156,8 +151,8 @@ cdef class asGTF(Parser): ''' cdef parse(self, char * buffer, int len): - cdef TabProxies.GTFProxy r - r = TabProxies.GTFProxy(self.encoding) + cdef ctabixproxies.GTFProxy r + r = ctabixproxies.GTFProxy(self.encoding) r.copy(buffer, len) return r @@ -199,13 +194,13 @@ cdef class asBed(Parser): +-----------+-----------+------------------------------------------+ Only the first three fields are required. Additional - fields are optional, but if one is defined, all the preceeding + fields are optional, but if one is defined, all the preceding need to be defined as well. ''' cdef parse(self, char * buffer, int len): - cdef TabProxies.BedProxy r - r = TabProxies.BedProxy(self.encoding) + cdef ctabixproxies.BedProxy r + r = ctabixproxies.BedProxy(self.encoding) r.copy(buffer, len) return r @@ -245,26 +240,54 @@ cdef class asVCF(Parser): ''' cdef parse(self, char * buffer, int len): - cdef TabProxies.VCFProxy r - r = TabProxies.VCFProxy(self.encoding) + cdef ctabixproxies.VCFProxy r + r = ctabixproxies.VCFProxy(self.encoding) r.copy(buffer, len) return r cdef class TabixFile: - '''*(filename, mode='r', parser = None)* + """Random access to bgzf formatted files that + have been indexed by :term:`tabix`. - opens a :term:`tabix file` for reading. A missing - index (*filename* + ".tbi") will raise an exception. *index* - specifies an alternative name of the index. + The file is automatically opened. The index file of file + ```` is expected to be called ``.tbi`` + by default (see parameter `index`). + + Parameters + ---------- + + filename : string + Filename of bgzf file to be opened. - *parser* sets the default parser for this tabix file. If *parser* - is None, the results are returned as an unparsed string. - Otherwise, *parser* is assumed to be a functor that will return - parsed data (see for example :class:`~pysam.asTuple` and - :class:`~pysam.asGTF`). + index : string + The filename of the index. If not set, the default is to + assume that the index is called ``filename.tbi` - ''' + mode : char + The file opening mode. Currently, only ``r`` is permitted. + + parser : :class:`pysam.Parser` + + sets the default parser for this tabix file. If `parser` + is None, the results are returned as an unparsed string. + Otherwise, `parser` is assumed to be a functor that will return + parsed data (see for example :class:`~pysam.asTuple` and + :class:`~pysam.asGTF`). + + encoding : string + + The encoding passed to the parser + + Raises + ------ + + ValueError + if index file is missing. + + IOError + if file could not be opened + """ def __cinit__(self, filename, mode = 'r', @@ -307,15 +330,21 @@ cdef class TabixFile: self._filename_index = filename_index # encode all the strings to pass to tabix - _encoded_filename = _encodeFilename(filename) - _encoded_index = _encodeFilename(filename_index) + _encoded_filename = encode_filename(filename) + _encoded_index = encode_filename(filename_index) # open file - self.tabixfile = hts_open(_encoded_filename, 'r') + cdef char *cfilename = _encoded_filename + with nogil: + self.tabixfile = hts_open(cfilename, 'r') + if self.tabixfile == NULL: raise IOError("could not open file `%s`" % filename) - self.index = tbx_index_load(_encoded_index) + cfilename = _encoded_index + with nogil: + self.index = tbx_index_load(cfilename) + if self.index == NULL: raise IOError("could not open index for `%s`" % filename) @@ -330,7 +359,7 @@ cdef class TabixFile: index=self._filename_index, encoding=self.encoding) - def _isOpen(self): + def is_open(self): '''return true if samfile has been opened.''' return self.tabixfile != NULL @@ -362,7 +391,7 @@ cdef class TabixFile: some overhead, so beware. ''' - if not self._isOpen(): + if not self.is_open(): raise ValueError("I/O operation on closed file") # convert coordinates to region string @@ -381,6 +410,7 @@ cdef class TabixFile: # get iterator cdef hts_itr_t * iter + cdef char *cstr cdef TabixFile fileobj # reopen the same file if necessary @@ -391,13 +421,16 @@ cdef class TabixFile: if region is None: # without region or reference - iterate from start - iter = tbx_itr_queryi(fileobj.index, - HTS_IDX_START, - 0, - 0) + with nogil: + iter = tbx_itr_queryi(fileobj.index, + HTS_IDX_START, + 0, + 0) else: - s = _force_bytes(region, encoding=fileobj.encoding) - iter = tbx_itr_querys(fileobj.index, s) + s = force_bytes(region, encoding=fileobj.encoding) + cstr = s + with nogil: + iter = tbx_itr_querys(fileobj.index, cstr) if iter == NULL: if region is None: @@ -425,27 +458,53 @@ cdef class TabixFile: return a + # context manager interface + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + return False + ############################################################### ############################################################### ############################################################### ## properties ############################################################### + property closed: + """"bool indicating the current state of the file object. + This is a read-only attribute; the close() method changes the value. + """ + def __get__(self): + return not self.is_open() + property filename: '''filename associated with this object.''' def __get__(self): - if not self._isOpen(): + if not self.is_open(): raise ValueError("I/O operation on closed file") return self._filename property header: '''the file header. - + + The file header consists of the lines at the beginning of a + file that are prefixed by the comment character ``#``. + + .. note:: + The header is returned as an iterator presenting lines + without the newline character. + .. note:: - The header is returned as an iterator presenting lines without the - newline character. + The header is only available for local files. For remote + files an Attribute Error is raised. + ''' def __get__(self): + if self.isremote: + raise AttributeError( + "the header is not available for remote files") return GZIteratorHead(self.filename) property contigs: @@ -514,12 +573,13 @@ cdef class TabixIterator: cdef int retval while 1: - - retval = tbx_itr_next( - self.tabixfile.tabixfile, - self.tabixfile.index, - self.iterator, - &self.buffer) + with nogil: + retval = tbx_itr_next( + self.tabixfile.tabixfile, + self.tabixfile.index, + self.iterator, + &self.buffer) + if retval < 0: break @@ -540,7 +600,7 @@ cdef class TabixIterator: elif retval < 0: raise StopIteration - return _charptr_to_str(self.buffer.s, self.encoding) + return charptr_to_str(self.buffer.s, self.encoding) def next(self): return self.__next__() @@ -603,8 +663,10 @@ cdef class GZIterator: if not os.path.exists(filename): raise IOError("No such file or directory: %s" % filename) - filename = _encodeFilename(filename) - self.gzipfile = gzopen(filename, "r") + filename = encode_filename(filename) + cdef char *cfilename = filename + with nogil: + self.gzipfile = gzopen(cfilename, "r") self._filename = filename self.kstream = ks_init(self.gzipfile) self.encoding = encoding @@ -629,7 +691,8 @@ cdef class GZIterator: cdef int dret = 0 cdef int retval = 0 while 1: - retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret) + with nogil: + retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret) if retval < 0: break @@ -643,7 +706,7 @@ cdef class GZIterator: cdef int retval = self.__cnext__() if retval < 0: raise StopIteration - return _force_str(self.buffer.s, self.encoding) + return force_str(self.buffer.s, self.encoding) cdef class GZIteratorHead(GZIterator): @@ -706,12 +769,14 @@ def tabix_compress(filename_in, WINDOW_SIZE = 64 * 1024 - fn = _encodeFilename(filename_out) - fp = bgzf_open( fn, "w") + fn = encode_filename(filename_out) + cdef char *cfn = fn + with nogil: + fp = bgzf_open(cfn, "w") if fp == NULL: raise IOError("could not open '%s' for writing" % (filename_out, )) - fn = _encodeFilename(filename_in) + fn = encode_filename(filename_in) fd_src = open(fn, O_RDONLY) if fd_src == 0: raise IOError("could not open '%s' for reading" % (filename_in, )) @@ -720,8 +785,9 @@ def tabix_compress(filename_in, c = 1 while c > 0: - c = read(fd_src, buffer, WINDOW_SIZE) - r = bgzf_write(fp, buffer, c) + with nogil: + c = read(fd_src, buffer, WINDOW_SIZE) + r = bgzf_write(fp, buffer, c) if r < 0: free(buffer) raise OSError("writing failed") @@ -743,7 +809,7 @@ def tabix_index( filename, preset = None, meta_char = "#", zerobased = False, - min_shift = -1, + int min_shift = -1, ): '''index tab-separated *filename* using tabix. @@ -839,8 +905,10 @@ def tabix_index( filename, conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data - fn = _encodeFilename(filename) - tbx_index_build(fn, min_shift, &conf) + fn = encode_filename(filename) + cdef char *cfn = fn + with nogil: + tbx_index_build(cfn, min_shift, &conf) return filename @@ -967,8 +1035,8 @@ cdef class tabix_file_iterator: cdef int dret = 0 cdef int retval = 0 while 1: - - retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret) + with nogil: + retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret) if retval < 0: break @@ -1038,7 +1106,7 @@ class tabix_generic_iterator: if not line: break - s = _force_bytes(line, encoding) + s = force_bytes(line, encoding) b = s nbytes = len(line) assert b[nbytes] == '\0' @@ -1076,14 +1144,14 @@ def tabix_iterator(infile, parser): :class:`~pysam.asGTF`). """ - if PYTHON3: + if PY_MAJOR_VERSION >= 3: return tabix_generic_iterator(infile, parser) else: return tabix_file_iterator(infile, parser) # file objects can use C stdio # used to be: isinstance( infile, file): - # if PYTHON3: + # if PY_MAJOR_VERSION >= 3: # if isinstance( infile, io.IOBase ): # return tabix_copy_iterator( infile, parser ) # else: diff --git a/pysam/ctabixproxies.pxd b/pysam/ctabixproxies.pxd new file mode 100644 index 0000000..5317b81 --- /dev/null +++ b/pysam/ctabixproxies.pxd @@ -0,0 +1,59 @@ +#cdef extern from "Python.h": +# ctypedef struct FILE + +from libc.stdint cimport uint8_t, int32_t, uint32_t, int64_t, uint64_t + +cdef class TupleProxy: + + cdef: + char * data + char ** fields + int nfields + int index + int nbytes + int offset + bint is_modified + + cdef encoding + + cpdef int getMaxFields(self) + cpdef int getMinFields(self) +# cdef char * _getindex(self, int idx) + + cdef take(self, char * buffer, size_t nbytes) + cdef present(self, char * buffer, size_t nbytes) + cdef copy(self, char * buffer, size_t nbytes, bint reset=*) + cdef update(self, char * buffer, size_t nbytes) + +cdef class GTFProxy(TupleProxy) : + + cdef: + char * _attributes + cdef bint hasOwnAttributes + + cpdef int getMaxFields(self) + cpdef int getMinFields(self) + cdef char * getAttributes(self) + +cdef class NamedTupleProxy(TupleProxy): + pass + +cdef class BedProxy(NamedTupleProxy): + + cdef: + char * contig + uint32_t start + uint32_t end + int bedfields + + cpdef int getMaxFields(self) + cpdef int getMinFields(self) + cdef update(self, char * buffer, size_t nbytes) + +cdef class VCFProxy(NamedTupleProxy) : + + cdef: + char * contig + uint32_t pos + + cdef update(self, char * buffer, size_t nbytes) diff --git a/pysam/TabProxies.pyx b/pysam/ctabixproxies.pyx similarity index 87% rename from pysam/TabProxies.pyx rename to pysam/ctabixproxies.pyx index 0add831..d72f082 100644 --- a/pysam/TabProxies.pyx +++ b/pysam/ctabixproxies.pyx @@ -1,72 +1,12 @@ -import types -import sys -import string +from cpython cimport PyBytes_FromStringAndSize -from cpython.version cimport PY_MAJOR_VERSION -from cpython cimport PyErr_SetString, PyBytes_Check -from cpython cimport PyUnicode_Check, PyBytes_FromStringAndSize +from libc.stdio cimport printf, feof, fgets +from libc.string cimport strcpy, strlen, memcmp, memcpy, memchr, strstr, strchr +from libc.stdlib cimport free, malloc, calloc, realloc +from libc.stdlib cimport atoi, atol, atof -from libc.stdio cimport printf - - -cdef from_string_and_size(char* s, size_t length): - if PY_MAJOR_VERSION < 3: - return s[:length] - else: - return s[:length].decode("ascii") - -# filename encoding (copied from lxml.etree.pyx) -cdef str _FILENAME_ENCODING -_FILENAME_ENCODING = sys.getfilesystemencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = sys.getdefaultencoding() -if _FILENAME_ENCODING is None: - _FILENAME_ENCODING = 'ascii' - -cdef bytes _force_bytes(object s, encoding="ascii"): - u"""convert string or unicode object to bytes, assuming ascii encoding. - """ - if PY_MAJOR_VERSION < 3: - return s - elif s is None: - return None - elif PyBytes_Check(s): - return s - elif PyUnicode_Check(s): - return s.encode(encoding) - else: - raise TypeError, u"Argument must be string, bytes or unicode." - -cdef inline bytes _force_cmdline_bytes(object s): - return _force_bytes(s) - -cdef _charptr_to_str(char* s, encoding="ascii"): - if PY_MAJOR_VERSION < 3: - return s - else: - return s.decode(encoding) - -cdef inline _force_str(object s, encoding="ascii"): - """Return s converted to str type of current Python " - "(bytes in Py2, unicode in Py3)""" - if s is None: - return None - if PY_MAJOR_VERSION < 3: - return s - elif PyBytes_Check(s): - return s.decode(encoding) - else: - # assume unicode - return s - -cdef char * nextItem(char * buffer): - cdef char * pos - pos = strchr(buffer, '\t') - if pos == NULL: - raise ValueError("malformatted entry at %s" % buffer) - pos[0] = '\0' - pos += 1 - return pos +from pysam.cutils cimport force_bytes, force_str, charptr_to_str +from pysam.cutils cimport encode_filename, from_string_and_size cdef char *StrOrEmpty(char * buffer): if buffer == NULL: @@ -74,9 +14,13 @@ cdef char *StrOrEmpty(char * buffer): else: return buffer cdef int isNew(char * p, char * buffer, size_t nbytes): - if p == NULL: - return 0 - return not (buffer <= p < buffer + nbytes) + """return True if `p` is located within `buffer` of size + `nbytes` + """ + if p == NULL: + return 0 + return not (buffer <= p < buffer + nbytes) + cdef class TupleProxy: '''Proxy class for access to parsed row as a tuple. @@ -113,6 +57,39 @@ cdef class TupleProxy: if self.fields != NULL: free(self.fields) + def __copy__(self): + if self.is_modified: + raise NotImplementedError( + "copying modified tuples is not implemented") + cdef TupleProxy n = type(self)() + n.copy(self.data, self.nbytes, reset=True) + return n + + def compare(self, TupleProxy other): + '''return -1,0,1, if contents in this are binary + <,=,> to *other* + + ''' + if self.is_modified or other.is_modified: + raise NotImplementedError( + 'comparison of modified TupleProxies is not implemented') + if self.data == other.data: + return 0 + + if self.nbytes < other.nbytes: + return -1 + elif self.nbytes > other.nbytes: + return 1 + return memcmp(self.data, other.data, self.nbytes) + + def __richcmp__(self, TupleProxy other, int op): + if op == 2: # == operator + return self.compare(other) == 0 + elif op == 3: # != operator + return self.compare(other) != 0 + else: + return NotImplemented + cdef take(self, char * buffer, size_t nbytes): '''start presenting buffer. @@ -129,30 +106,34 @@ cdef class TupleProxy: ''' self.update(buffer, nbytes) - cdef copy(self, char * buffer, size_t nbytes): + cdef copy(self, char * buffer, size_t nbytes, bint reset=False): '''start presenting buffer of size *nbytes*. Buffer is a '\0'-terminated string without the '\n'. Take a copy of buffer. ''' - cdef int s # +1 for '\0' - s = sizeof(char) * (nbytes + 1) + cdef int s = sizeof(char) * (nbytes + 1) self.data = malloc(s) if self.data == NULL: raise ValueError("out of memory in TupleProxy.copy()") - self.nbytes = nbytes memcpy(self.data, buffer, s) + + if reset: + for x from 0 <= x < nbytes: + if self.data[x] == '\0': + self.data[x] = '\t' + self.update(self.data, nbytes) - cdef int getMinFields(self): + cpdef int getMinFields(self): '''return minimum number of fields.''' # 1 is not a valid tabix entry, but TupleProxy # could be more generic. return 1 - cdef int getMaxFields(self): + cpdef int getMaxFields(self): '''return maximum number of fields. Return 0 for unknown length.''' return 0 @@ -180,7 +161,7 @@ cdef class TupleProxy: assert strlen(buffer) == nbytes, \ "length of buffer (%i) != number of bytes (%i)" % ( - strlen(buffer), nbytes) + strlen(buffer), nbytes) if buffer[nbytes] != 0: raise ValueError("incomplete line at %s" % buffer) @@ -263,7 +244,7 @@ cdef class TupleProxy: raise IndexError( "list index out of range %i >= %i" % (i, self.nfields)) - return _force_str(self.fields[i], self.encoding) + return force_str(self.fields[i], self.encoding) def __getitem__(self, key): if type(key) == int: @@ -293,7 +274,7 @@ cdef class TupleProxy: return # conversion with error checking - value = _force_bytes(value) + value = force_bytes(value) cdef char * tmp = value self.fields[idx] = malloc((strlen( tmp ) + 1) * sizeof(char)) if self.fields[idx] == NULL: @@ -326,11 +307,12 @@ cdef class TupleProxy: if retval == NULL: return None else: - return _force_str(retval, self.encoding) + return force_str(retval, self.encoding) def __str__(self): '''return original data''' # copy and replace \0 bytes with \t characters + cdef char * cpy if self.is_modified: # todo: treat NULL values result = [] @@ -359,7 +341,7 @@ def toDot(v): def quote(v): '''return a quoted attribute.''' - if type(v) in types.StringTypes: + if isinstance(v, str): return '"%s"' % v else: return str(v) @@ -388,11 +370,11 @@ cdef class GTFProxy(TupleProxy): if self.hasOwnAttributes: free(self._attributes) - cdef int getMinFields(self): + cpdef int getMinFields(self): '''return minimum number of fields.''' return 9 - cdef int getMaxFields(self): + cpdef int getMaxFields(self): '''return max number of fields.''' return 9 @@ -495,7 +477,9 @@ cdef class GTFProxy(TupleProxy): # ...; transcript_name "TXNRD2;-001"; .... # The current heuristic is to split on a semicolon followed by a # space, see also http://mblab.wustl.edu/GTF22.html - fields = [x.strip() for x in attributes.split("; ")] + + # Remove white space to prevent a last empty field. + fields = [x.strip() for x in attributes.strip().split("; ")] result = {} @@ -507,7 +491,7 @@ cdef class GTFProxy(TupleProxy): # split at most once in order to avoid separating # multi-word values - d = [x.strip() for x in string.split(f, " ", maxsplit=1)] + d = [x.strip() for x in f.split(" ", 1)] n,v = d[0], d[1] if len(d) > 2: @@ -540,7 +524,7 @@ cdef class GTFProxy(TupleProxy): aa = [] for k,v in d.items(): - if type(v) in types.StringTypes: + if isinstance(v, str): aa.append( '%s "%s"' % (k,v) ) else: aa.append( '%s %s' % (k,str(v)) ) @@ -624,7 +608,7 @@ cdef class GTFProxy(TupleProxy): # add space in order to make sure # to not pick up a field that is a prefix of another field - r = _force_bytes(item + " ") + r = force_bytes(item + " ") query = r start = strstr(attributes, query) @@ -642,11 +626,11 @@ cdef class GTFProxy(TupleProxy): while end[0] != '\0' and end[0] != '"': end += 1 l = end - start - result = _force_str(PyBytes_FromStringAndSize(start, l), + result = force_str(PyBytes_FromStringAndSize(start, l), self.encoding) return result else: - return _force_str(start, self.encoding) + return force_str(start, self.encoding) def setAttribute(self, name, value): '''convenience method to set an attribute.''' @@ -673,7 +657,7 @@ cdef class NamedTupleProxy(TupleProxy): if self.nfields < idx: raise KeyError("field %s not set" % key) if f == str: - return _force_str(self.fields[idx], + return force_str(self.fields[idx], self.encoding) return f(self.fields[idx]) @@ -697,11 +681,11 @@ cdef class BedProxy(NamedTupleProxy): 'blockSizes': (10, str), 'blockStarts': (11, str), } - cdef int getMinFields(self): + cpdef int getMinFields(self): '''return minimum number of fields.''' return 3 - cdef int getMaxFields(self): + cpdef int getMaxFields(self): '''return max number of fields.''' return 12 @@ -736,14 +720,16 @@ cdef class BedProxy(NamedTupleProxy): cdef int save_fields = self.nfields # ensure fields to use correct format self.nfields = self.bedfields - retval = TupleProxy.__str__( self ) + retval = TupleProxy.__str__(self) self.nfields = save_fields return retval def __setattr__(self, key, value ): '''set attribute.''' - if key == "start": self.start = value - elif key == "end": self.end = value + if key == "start": + self.start = value + elif key == "end": + self.end = value cdef int idx idx, f = self.map_key2field[key] diff --git a/pysam/cutils.pxd b/pysam/cutils.pxd new file mode 100644 index 0000000..c2a7c5f --- /dev/null +++ b/pysam/cutils.pxd @@ -0,0 +1,27 @@ +######################################################################### +# Utility functions used across pysam +######################################################################### +cimport cython +from cpython cimport array as c_array + +cpdef parse_region(reference=*, start=*, end=*, region=*) + +######################################################################### +# Utility functions for quality string conversions + +cpdef c_array.array qualitystring_to_array(bytes input_str, int offset=*) +cpdef array_to_qualitystring(c_array.array arr, int offset=*) +cpdef qualities_to_qualitystring(qualities, int offset=*) + +######################################################################## +######################################################################## +######################################################################## +## Python 3 compatibility functions +######################################################################## +cdef charptr_to_str(char *s, encoding=*) +cdef force_str(object s, encoding=*) +cdef bytes force_bytes(object s, encoding=*) +cdef bytes force_cmdline_bytes(object s, encoding=*) +cdef bytes encode_filename(object filename) +cdef from_string_and_size(char *s, size_t length) + diff --git a/pysam/cutils.pyx b/pysam/cutils.pyx new file mode 100644 index 0000000..afbd97d --- /dev/null +++ b/pysam/cutils.pyx @@ -0,0 +1,214 @@ +import types +import sys +import string +import re + +from cpython.version cimport PY_MAJOR_VERSION +from cpython cimport PyBytes_Check, PyUnicode_Check + +from cpython cimport array as c_array +cimport cython + +##################################################################### +# hard-coded constants +cdef int MAX_POS = 2 << 29 + +################################################################# +# Utility functions for quality string conversions +cpdef c_array.array qualitystring_to_array(bytes input_str, int offset=33): + """convert a qualitystring to an array of quality values.""" + if input_str is None: + return None + cdef char i + return c_array.array('B', [i - offset for i in input_str]) + + +cpdef array_to_qualitystring(c_array.array qualities, int offset=33): + """convert an array of quality values to a string.""" + if qualities is None: + return None + cdef int x + + cdef c_array.array result + result = c_array.clone(qualities, len(qualities), zero=False) + + for x from 0 <= x < len(qualities): + result[x] = qualities[x] + offset + return result.tostring() + + +cpdef qualities_to_qualitystring(qualities, int offset=33): + """convert a list or array of quality scores to the string + representation used in the SAM format. + + Parameters + ---------- + offset : int + offset to be added to the quality scores to arrive at + the characters of the quality string (default=33). + + Returns + ------- + string + a quality string + + """ + cdef char x + if qualities is None: + return None + elif isinstance(qualities, c_array.array): + return array_to_qualitystring(qualities, offset=offset) + else: + # tuples and lists + return "".join([chr(x + offset) for x in qualities]) + + +######################################################################## +######################################################################## +######################################################################## +## Python 3 compatibility functions +######################################################################## +IS_PYTHON3 = PY_MAJOR_VERSION >= 3 + +cdef from_string_and_size(char* s, size_t length): + if PY_MAJOR_VERSION < 3: + return s[:length] + else: + return s[:length].decode("ascii") + +# filename encoding (copied from lxml.etree.pyx) +cdef str _FILENAME_ENCODING +_FILENAME_ENCODING = sys.getfilesystemencoding() +if _FILENAME_ENCODING is None: + _FILENAME_ENCODING = sys.getdefaultencoding() +if _FILENAME_ENCODING is None: + _FILENAME_ENCODING = 'ascii' + +#cdef char* _C_FILENAME_ENCODING +#_C_FILENAME_ENCODING = _FILENAME_ENCODING + +cdef bytes encode_filename(object filename): + """Make sure a filename is 8-bit encoded (or None).""" + if filename is None: + return None + elif PyBytes_Check(filename): + return filename + elif PyUnicode_Check(filename): + return filename.encode(_FILENAME_ENCODING) + else: + raise TypeError(u"Argument must be string or unicode.") + +cdef bytes force_bytes(object s, encoding="ascii"): + u"""convert string or unicode object to bytes, assuming + ascii encoding. + """ + if PY_MAJOR_VERSION < 3: + return s + elif s is None: + return None + elif PyBytes_Check(s): + return s + elif PyUnicode_Check(s): + return s.encode(encoding) + else: + raise TypeError(u"Argument must be string, bytes or unicode.") + +cdef bytes force_cmdline_bytes(object s, encoding="ascii"): + return force_bytes(s) + +cdef charptr_to_str(char* s, encoding="ascii"): + if s == NULL: + return None + if PY_MAJOR_VERSION < 3: + return s + else: + return s.decode(encoding) + +cdef force_str(object s, encoding="ascii"): + """Return s converted to str type of current Python + (bytes in Py2, unicode in Py3)""" + if s is None: + return None + if PY_MAJOR_VERSION < 3: + return s + elif PyBytes_Check(s): + return s.decode(encoding) + else: + # assume unicode + return s + +cpdef parse_region(reference=None, + start=None, + end=None, + region=None): + """parse alternative ways to specify a genomic region. A region can + either be specified by :term:`reference`, `start` and + `end`. `start` and `end` denote 0-based, half-open + intervals. + + Alternatively, a samtools :term:`region` string can be + supplied. + + If any of the coordinates are missing they will be replaced by the + minimum (`start`) or maximum (`end`) coordinate. + + Note that region strings are 1-based, while `start` and `end` denote + an interval in python coordinates. + + Returns + ------- + + tuple : a tuple of `reference`, `start` and `end`. + + Raises + ------ + + ValueError + for invalid or out of bounds regions. + + """ + cdef int rtid + cdef long long rstart + cdef long long rend + + rtid = -1 + rstart = 0 + rend = MAX_POS + if start != None: + try: + rstart = start + except OverflowError: + raise ValueError('start out of range (%i)' % start) + + if end != None: + try: + rend = end + except OverflowError: + raise ValueError('end out of range (%i)' % end) + + if region: + region = force_str(region) + parts = re.split("[:-]", region) + reference = parts[0] + if len(parts) >= 2: + rstart = int(parts[1]) - 1 + if len(parts) >= 3: + rend = int(parts[2]) + + if not reference: + return None, 0, 0 + + if not 0 <= rstart < MAX_POS: + raise ValueError('start out of range (%i)' % rstart) + if not 0 <= rend <= MAX_POS: + raise ValueError('end out of range (%i)' % rend) + if rstart > rend: + raise ValueError( + 'invalid region: start (%i) > end (%i)' % (rstart, rend)) + + return force_bytes(reference), rstart, rend + + +__all__ = ["qualitystring_to_array", + "array_to_qualitystring", + "qualities_to_qualitystring"] diff --git a/pysam/cvcf.pxd b/pysam/cvcf.pxd index a583d99..139597f 100644 --- a/pysam/cvcf.pxd +++ b/pysam/cvcf.pxd @@ -1,42 +1,2 @@ -cdef extern from "stdlib.h": - void free(void *) - void *malloc(size_t) - void *calloc(size_t,size_t) - void *realloc(void *,size_t) - int c_abs "abs" (int) - int c_abs "abs" (int) - int atoi( char *nptr) - long atol( char *nptr) - double atof( char *nptr) - -cdef extern from "Python.h": - ctypedef struct FILE - FILE* PyFile_AsFile(object) - char *fgets(char *str, int size, FILE *ifile) - int feof(FILE *stream) - size_t strlen(char *s) - size_t getline(char **lineptr, size_t *n, FILE *stream) - char *strstr(char *, char *) - char *strchr(char *string, int c) - int fileno(FILE *stream) - -cdef extern from "string.h": - int strcmp(char *s1, char *s2) - int strncmp(char *s1,char *s2,size_t len) - char *strcpy(char *dest,char *src) - char *strncpy(char *dest,char *src, size_t len) - char *strdup(char *) - char *strcat(char *,char *) - size_t strlen(char *s) - int memcmp( void * s1, void *s2, size_t len ) - void *memcpy(void *dest, void *src, size_t n) - void *memchr(void *s, int c, size_t n) - -cdef extern from "stdint.h": - ctypedef int int64_t - ctypedef int int32_t - ctypedef int uint32_t - ctypedef int uint8_t - ctypedef int uint64_t diff --git a/pysam/cvcf.pyx b/pysam/cvcf.pyx index 5feb2a6..e9fe3d0 100644 --- a/pysam/cvcf.pyx +++ b/pysam/cvcf.pyx @@ -1,3 +1,4 @@ +# cython: embedsignature=True # # Code to read, write and edit VCF files # @@ -39,6 +40,7 @@ # # NOTE: the position that is returned to Python is 0-based, NOT # 1-based as in the VCF file. +# NOTE: There is also preliminary VCF functionality in the VariantFile class. # # TODO: # only v4.0 writing is complete; alleles are not converted to v3.3 format @@ -48,8 +50,14 @@ from collections import namedtuple, defaultdict from operator import itemgetter import sys, re, copy, bisect -cimport ctabix -cimport TabProxies +from libc.stdlib cimport atoi +from libc.stdint cimport int8_t, int16_t, int32_t, int64_t +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t + +cimport pysam.ctabix as ctabix +cimport pysam.ctabixproxies as ctabixproxies + +from pysam.cutils cimport force_str import pysam @@ -83,43 +91,45 @@ def parse_regions( string ): raise ValueError("Don't understand region string '%s'" % r) result.append( (chrom,start,end) ) return result - + FORMAT = namedtuple('FORMAT','id numbertype number type description missingvalue') ########################################################################################################### -# +# # New class -# +# ########################################################################################################### -cdef class VCFRecord( TabProxies.TupleProxy): +cdef class VCFRecord( ctabixproxies.TupleProxy): '''vcf record. - initialized from data and vcf meta + initialized from data and vcf meta ''' - + cdef vcf cdef char * contig cdef uint32_t pos def __init__(self, vcf): self.vcf = vcf + self.encoding = vcf.encoding # if len(data) != len(self.vcf._samples): # self.vcf.error(str(data), - # self.BAD_NUMBER_OF_COLUMNS, + # self.BAD_NUMBER_OF_COLUMNS, # "expected %s for %s samples (%s), got %s" % \ - # (len(self.vcf._samples), - # len(self.vcf._samples), - # self.vcf._samples, + # (len(self.vcf._samples), + # len(self.vcf._samples), + # self.vcf._samples, # len(data))) - - def __cinit__(self, vcf): + + def __cinit__(self, vcf): # start indexed access at genotypes self.offset = 9 - + self.vcf = vcf - + self.encoding = vcf.encoding + def error(self, line, error, opt=None): '''raise error.''' # pass to vcf file for error handling @@ -127,10 +137,10 @@ cdef class VCFRecord( TabProxies.TupleProxy): cdef update(self, char * buffer, size_t nbytes): '''update internal data. - + nbytes does not include the terminal '\0'. ''' - TabProxies.TupleProxy.update(self, buffer, nbytes) + ctabixproxies.TupleProxy.update(self, buffer, nbytes) self.contig = self.fields[0] # vcf counts from 1 - correct here @@ -149,13 +159,13 @@ cdef class VCFRecord( TabProxies.TupleProxy): def __get__(self): return self.fields[2] property ref: - def __get__(self): + def __get__(self): return self.fields[3] property alt: def __get__(self): # convert v3.3 to v4.0 alleles below - alt = self.fields[4] + alt = self.fields[4] if alt == ".": alt = [] else: alt = alt.upper().split(',') return alt @@ -164,7 +174,7 @@ cdef class VCFRecord( TabProxies.TupleProxy): def __get__(self): qual = self.fields[5] if qual == b".": qual = -1 - else: + else: try: qual = float(qual) except: self.vcf.error(str(self),self.QUAL_NOT_NUMERICAL) return qual @@ -187,7 +197,7 @@ cdef class VCFRecord( TabProxies.TupleProxy): if len(elts) == 1: v = None elif len(elts) == 2: v = elts[1] else: self.vcf.error(str(self),self.ERROR_INFO_STRING) - info[elts[0]] = self.vcf.parse_formatdata(elts[0], v, self.vcf._info, str(self)) + info[elts[0]] = self.vcf.parse_formatdata(elts[0], v, self.vcf._info, str(self.vcf)) return info property format: @@ -199,7 +209,7 @@ cdef class VCFRecord( TabProxies.TupleProxy): return self.vcf._samples def __getitem__(self, key): - + # parse sample columns values = self.fields[self.vcf._sample2column[key]].split(':') alt = self.alt @@ -225,9 +235,9 @@ cdef class VCFRecord( TabProxies.TupleProxy): result[format[idx]] = result[format[idx]][:expected] return result - -cdef class asVCFRecord(ctabix.Parser): + +cdef class asVCFRecord(ctabix.Parser): '''converts a :term:`tabix row` into a VCF record.''' cdef vcffile def __init__(self, vcffile): @@ -351,10 +361,10 @@ class VCF(object): def parse_format(self,line,format,filter=False): if self._version == 40: - if not format.startswith('<'): + if not format.startswith('<'): self.error(line,self.V40_MISSING_ANGLE_BRACKETS) format = "<"+format - if not format.endswith('>'): + if not format.endswith('>'): self.error(line,self.V40_MISSING_ANGLE_BRACKETS) format += ">" format = format[1:-1] @@ -372,9 +382,9 @@ class VCF(object): elif first.startswith('Type='): data['type'] = first.split('=')[1] elif first.startswith('Description='): elts = format.split('"') - if len(elts)<3: + if len(elts)<3: self.error(line,self.FORMAT_MISSING_QUOTES) - elts = first.split('=') + [rest] + elts = first.split('=') + [rest] data['descr'] = elts[1] rest = '"'.join(elts[2:]) if rest.startswith(','): rest = rest[1:] @@ -384,7 +394,7 @@ class VCF(object): idx += 1 if filter and idx==1: idx=3 # skip number and type fields for FILTER format strings if not data['id']: self.error(line,self.BADLY_FORMATTED_FORMAT_STRING) - if 'descr' not in data: + if 'descr' not in data: # missing description self.error(line,self.BADLY_FORMATTED_FORMAT_STRING) data['descr'] = "" @@ -420,7 +430,7 @@ class VCF(object): data['type'] = 'Flag' return FORMAT(data['id'],t,n,data['type'],data['descr'],data['missing']) - + def format_format( self, fmt, filter=False ): values = [('ID',fmt.id)] if fmt.number != None and not filter: @@ -481,7 +491,7 @@ class VCF(object): for idx,v in enumerate(data[k]): if v == format[k].missingvalue: data[k][idx] = "." # make sure GT comes first; and ensure fixed ordering; also convert GT data back to string - for k in data: + for k in data: if k != 'GT': sdata.append( (k,data[k]) ) sdata.sort() if 'GT' in data: @@ -507,12 +517,12 @@ class VCF(object): for f in [FORMAT('GT',self.NT_NUMBER,1,'String','Genotype','.'), FORMAT('DP',self.NT_NUMBER,1,'Integer','Read depth at this position for this sample',-1), FORMAT('FT',self.NT_NUMBER,1,'String','Sample Genotype Filter','.'), - FORMAT('GL',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'), - FORMAT('GLE',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'), + FORMAT('GL',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'), + FORMAT('GLE',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'), FORMAT('GQ',self.NT_NUMBER,1,'Integer','Genotype Quality',-1), FORMAT('PL',self.NT_GENOTYPES,-1,'Integer','Phred-scaled genotype likelihoods', '.'), - FORMAT('GP',self.NT_GENOTYPES,-1,'Float','Genotype posterior probabilities','.'), - FORMAT('GQ',self.NT_GENOTYPES,-1,'Integer','Conditional genotype quality','.'), + FORMAT('GP',self.NT_GENOTYPES,-1,'Float','Genotype posterior probabilities','.'), + FORMAT('GQ',self.NT_GENOTYPES,-1,'Integer','Conditional genotype quality','.'), FORMAT('HQ',self.NT_UNKNOWN,-1,'Integer','Haplotype Quality',-1), # unknown number, since may be haploid FORMAT('PS',self.NT_UNKNOWN,-1,'Integer','Phase set','.'), FORMAT('PQ',self.NT_NUMBER,1,'Integer','Phasing quality',-1), @@ -557,7 +567,7 @@ class VCF(object): for key,value in self._header: stream.write("##%s=%s\n" % (key,value)) for var,label in [(self._info,"INFO"),(self._filter,"FILTER"),(self._format,"FORMAT")]: for f in var.itervalues(): stream.write("##%s=%s\n" % (label,self.format_format(f,filter=(label=="FILTER")))) - + def parse_heading( self, line ): assert line.startswith('#') @@ -572,7 +582,7 @@ class VCF(object): if len(headings)<=i or headings[i] != s: - if len(headings) <= i: + if len(headings) <= i: err = "(%sth entry not found)" % (i+1) else: err = "(found %s, expected %s)" % (headings[i],s) @@ -586,7 +596,7 @@ class VCF(object): self._samples = headings[9:] self._sample2column = dict( [(y,x+9) for x,y in enumerate( self._samples ) ] ) - + def write_heading( self, stream ): stream.write("#" + "\t".join(self._required + self._samples) + "\n") @@ -619,12 +629,12 @@ class VCF(object): if f.type in ["Float","Integer"] and len(values)>0 and values[-1].find(';') > -1: self.error(line,self.ERROR_TRAILING_DATA,values[-1]) values[-1] = values[-1].split(';')[0] - if f.type == "Integer": + if f.type == "Integer": for idx,v in enumerate(values): try: if v == ".": values[idx] = f.missingvalue else: values[idx] = int(v) - except: + except: self.error(line,self.ERROR_FORMAT_NOT_INTEGER,"%s=%s" % (key, str(values))) return [0] * len(values) return values @@ -633,7 +643,7 @@ class VCF(object): if f.id == "GT": values = list(map( self.convertGT, values )) return values elif f.type == "Character": - for v in values: + for v in values: if len(v) != 1: self.error(line,self.ERROR_FORMAT_NOT_CHAR) return values elif f.type == "Float": @@ -662,7 +672,7 @@ class VCF(object): cols.append("") else: self.error(line, - self.BAD_NUMBER_OF_COLUMNS, + self.BAD_NUMBER_OF_COLUMNS, "expected %s for %s samples (%s), got %s" % (len(self._samples)+9, len(self._samples), self._samples, len(cols))) chrom = cols[0] @@ -677,7 +687,7 @@ class VCF(object): # end of first-pass parse for sortedVCF if lineparse: return chrom, pos, line - + id = cols[2] ref = cols[3].upper() @@ -689,7 +699,7 @@ class VCF(object): for c in ref: if c not in "ACGTN": self.error(line,self.UNKNOWN_CHAR_IN_REF) if "N" in ref: ref = get_sequence(chrom,pos,pos+len(ref),self._reference) - + # make sure reference is sane if self._reference: left = max(0,pos-100) @@ -703,7 +713,7 @@ class VCF(object): else: alt = cols[4].upper().split(',') if cols[5] == ".": qual = -1 - else: + else: try: qual = float(cols[5]) except: self.error(line,self.QUAL_NOT_NUMERICAL) @@ -719,9 +729,9 @@ class VCF(object): if len(elts) == 1: v = None elif len(elts) == 2: v = elts[1] else: self.error(line,self.ERROR_INFO_STRING) - info[elts[0]] = self.parse_formatdata(elts[0], - v, - self._info, + info[elts[0]] = self.parse_formatdata(elts[0], + v, + self._info, line) # Gracefully deal with absent FORMAT column @@ -731,7 +741,7 @@ class VCF(object): # check: all filters are defined for f in filter: if f not in self._filter: self.error(line,self.FILTER_NOT_DEFINED, f) - + # check: format fields are defined if self._format: for f in format: @@ -794,7 +804,7 @@ class VCF(object): self.error(line,self.MISSING_INDEL_ALLELE_REF_BASE) # trim trailing bases in alleles - # AH: not certain why trimming this needs to be added + # AH: not certain why trimming this needs to be added # disabled now for unit testing # if alt: # for i in range(1,min(len(ref),min(map(len,alt)))): @@ -837,10 +847,10 @@ class VCF(object): else: if expected == -1: value = "." else: value = ",".join(["."]*expected) - - dict[format[idx]] = self.parse_formatdata(format[idx], - value, - self._format, + + dict[format[idx]] = self.parse_formatdata(format[idx], + value, + self._format, line) if expected != -1 and len(dict[format[idx]]) != expected: self.error(line,self.BAD_NUMBER_OF_PARAMETERS, @@ -861,7 +871,7 @@ class VCF(object): 'format':format} for key,value in zip(self._samples,samples): d[key] = value - + return d @@ -872,14 +882,14 @@ class VCF(object): if data['alt'] == []: alt = "." else: alt = ",".join(data['alt']) if data['filter'] == None: filter = "." - elif data['filter'] == []: + elif data['filter'] == []: if self._version == 33: filter = "0" else: filter = "PASS" else: filter = ';'.join(data['filter']) if data['qual'] == -1: qual = "." else: qual = str(data['qual']) - output = [data['chrom'], + output = [data['chrom'], str(data['pos']+1), # change to 1-based position data['id'], data['ref'], @@ -890,17 +900,17 @@ class VCF(object): data['info'], self._info, separator=";"), self.format_formatdata( data['format'], self._format, value=False)] - + for s in self._samples: output.append(self.format_formatdata( data[s], self._format, key=False)) - + stream.write( "\t".join(output) + "\n" ) def _parse_header(self, stream): self._lineno = 0 for line in stream: - line = ctabix._force_str(line, self.encoding) + line = force_str(line, self.encoding) self._lineno += 1 if line.startswith('##'): self.parse_header(line.strip()) @@ -1033,13 +1043,13 @@ class VCF(object): self.encoding=encoding self.tabixfile = pysam.Tabixfile(filename, encoding=encoding) self._parse_header(self.tabixfile.header) - + def fetch(self, reference=None, - start=None, - end=None, + start=None, + end=None, region=None ): - """ Parse a stream of VCF-formatted lines. + """ Parse a stream of VCF-formatted lines. Initializes class instance and return generator """ return self.tabixfile.fetch( reference, @@ -1053,7 +1063,7 @@ class VCF(object): returns a validated record. ''' - + raise NotImplementedError("needs to be checked") chrom, pos = record.chrom, record.pos @@ -1079,11 +1089,11 @@ class VCF(object): faref = faref_leftflank[pos-left:] if faref != ref: self.error(str(record),self.WRONG_REF,"(reference is %s, VCF says %s)" % (faref,ref)) ref = faref - + # check: format fields are defined for f in record.format: if f not in self._format: self.error(str(record),self.FORMAT_NOT_DEFINED, f) - + # check: all filters are defined for f in record.filter: if f not in self._filter: self.error(str(record),self.FILTER_NOT_DEFINED, f) @@ -1136,7 +1146,7 @@ class VCF(object): for allele in alt: if not alleleRegEx.match(allele): self.error(str(record),self.V40_BAD_ALLELE,allele) - + # check for leading nucleotide in indel calls for allele in alt: @@ -1146,7 +1156,7 @@ class VCF(object): self.error(str(record),self.MISSING_INDEL_ALLELE_REF_BASE) # trim trailing bases in alleles - # AH: not certain why trimming this needs to be added + # AH: not certain why trimming this needs to be added # disabled now for unit testing # for i in range(1,min(len(ref),min(map(len,alt)))): # if len(set(allele[-1].upper() for allele in alt)) > 1 or ref[-1].upper() != alt[0][-1].upper(): @@ -1177,5 +1187,3 @@ class VCF(object): __all__ = [ "VCF", "VCFRecord", ] - - diff --git a/pysam/htslib_util.h b/pysam/htslib_util.h index 1f9d491..46e44bc 100644 --- a/pysam/htslib_util.h +++ b/pysam/htslib_util.h @@ -36,9 +36,6 @@ typedef khash_t(s2i) s2i_t; ////////////////////////////////////////////////////////////////// // various helper functions // -// fill pileup buffer for next position. - -int pysam_dispatch(int argc, char *argv[] ); /*! @abstract Update the variable length data within a bam1_t entry diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c index 9560ed0..f8ccae7 100644 --- a/pysam/pysam_util.c +++ b/pysam/pysam_util.c @@ -87,7 +87,7 @@ int pysam_dispatch(int argc, char *argv[] ) if (argc < 2) return 1; int retval = 0; - + if (strcmp(argv[1], "view") == 0) retval = main_samview(argc-1, argv+1); else if (strcmp(argv[1], "import") == 0) retval = main_import(argc-1, argv+1); else if (strcmp(argv[1], "mpileup") == 0) retval = bam_mpileup(argc-1, argv+1); diff --git a/pysam/tabix_util.c b/pysam/tabix_util.c index 89ffc23..f94b09d 100644 --- a/pysam/tabix_util.c +++ b/pysam/tabix_util.c @@ -1,6 +1,7 @@ // Definition of pysamerr -#include "stdio.h" -#include "unistd.h" +#include +#include +#include FILE * pysamerr = NULL; #if !(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700) diff --git a/pysam/version.py b/pysam/version.py index 02c7d45..9047c04 100644 --- a/pysam/version.py +++ b/pysam/version.py @@ -1,6 +1,6 @@ # pysam versioning information -__version__ = "0.8.3" +__version__ = "0.8.4" __samtools_version__ = "1.2" diff --git a/requires.txt b/requirements.txt similarity index 100% rename from requires.txt rename to requirements.txt diff --git a/save/pysam_test2.6.py b/save/pysam_test2.6.py index a59968c..eb4848a 100755 --- a/save/pysam_test2.6.py +++ b/save/pysam_test2.6.py @@ -355,7 +355,7 @@ class IOTest(unittest.TestCase): If *use_template* is set, the header is copied from infile using the template mechanism, otherwise target names and lengths are passed - explicitely. + explicitly. ''' diff --git a/setup.py b/setup.py index 8c0a132..8009437 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,21 @@ -#!/usr/bin/python -''' +#! /usr/bin/python + +'''The SAM/BAM/CRAM format is a way to store efficiently large numbers +of alignments, such as those routinely are created by next-generation +sequencing methods. + +This module provides a low-level wrapper around the htslib C-API as +using cython and a high-level API for convenient access to the data in +SAM/BAM formatted files. Also included is an interface to the samtools +command line utilities and the tabix C-API for reading compressed and +indexed tabular data. -pysam -***** +The current version wraps htslib-1.2.1 and samtools-1.2. + +See: +http://www.htslib.org +https://github.com/pysam-developers/pysam +http://pysam.readthedocs.org/en/stable ''' @@ -239,50 +252,28 @@ if len(sys.argv) >= 2 and sys.argv[1] == "refresh": from setuptools import Extension, setup ####################################################### -####################################################### +parts = ["samtools", "htslib", "tabix", + "faidx", "samfile", "utils", + "alignmentfile", "tabixproxies", + "vcf", "bcf"] + try: from Cython.Distutils import build_ext except ImportError: # no Cython available - use existing C code cmdclass = {} - csamtools_sources = ["pysam/csamtools.c"] - chtslib_sources = ["pysam/chtslib.c"] - tabix_sources = ["pysam/ctabix.c"] - faidx_sources = ["pysam/cfaidx.c"] - csamfile_sources = ["pysam/csamfile.c"] - calignmentfile_sources = ["pysam/calignmentfile.c"] - tabproxies_sources = ["pysam/TabProxies.c"] - cvcf_sources = ["pysam/cvcf.c"] - cbcf_sources = ["pysam/cbcf.c"] + source_pattern = "pysam/c%s.c" else: # remove existing files to recompute # necessary to be both compatible for python 2.7 and 3.3 if IS_PYTHON3: - for f in ("pysam/csamtools.c", - "pysam/chtslib.c", - "pysam/ctabix.c", - "pysam/cfaidx.c", - "pysam/csamfile.c", - "pysam/TabProxies.c", - "pysam/cvcf.c", - "pysam/bvcf.c", - ): + for part in parts: try: - os.unlink(f) + os.unlink("pysam/c%s.c" % part) except: pass - + source_pattern = "pysam/c%s.pyx" cmdclass = {'build_ext': build_ext} - csamtools_sources = ["pysam/csamtools.pyx"] - chtslib_sources = ["pysam/chtslib.pyx"] - csamfile_sources = ["pysam/csamfile.pyx"] - calignmentfile_sources = ["pysam/calignmentfile.pyx"] - tabix_sources = ["pysam/ctabix.pyx"] - faidx_sources = ["pysam/cfaidx.pyx"] - tabproxies_sources = ["pysam/TabProxies.pyx"] - cvcf_sources = ["pysam/cvcf.pyx"] - cbcf_sources = ["pysam/cbcf.pyx"] - ####################################################### classifiers = """ @@ -308,153 +299,176 @@ else: os_c_files = [] ####################################################### -samtools = Extension( +extra_compile_args = ["-Wno-error=declaration-after-statement", + "-DSAMTOOLS=1"] +define_macros = [('_FILE_OFFSET_BITS', '64'), + ('_USE_KNETFILE', '')] + +csamtools = Extension( "pysam.csamtools", - csamtools_sources + - ["pysam/%s" % x for x in ( - "pysam_util.c", )] + + [source_pattern % "samtools", + "pysam/pysam_util.c"] + glob.glob(os.path.join("samtools", "*.pysam.c")) + - os_c_files + glob.glob(os.path.join("samtools", "*", "*.pysam.c")) + + os_c_files + htslib_sources, - library_dirs=[], + library_dirs=htslib_library_dirs, include_dirs=["samtools", "pysam"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=["-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')] + extra_compile_args=extra_compile_args, + define_macros=define_macros ) -htslib = Extension( +chtslib = Extension( "pysam.libchtslib", - chtslib_sources + - ["pysam/%s" % x for x in ( - "htslib_util.c", )] + + [source_pattern % "htslib", + "pysam/htslib_util.c"] + shared_htslib_sources + os_c_files, library_dirs=htslib_library_dirs, include_dirs=["pysam"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=["-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')] + extra_compile_args=extra_compile_args, + define_macros=define_macros ) # samfile requires functions defined in bam_md.c # for __advance_samtools method. # Selected ones have been copied into samfile_utils.c # Needs to be devolved somehow. -samfile = Extension( +csamfile = Extension( "pysam.csamfile", - csamfile_sources + - ["pysam/%s" % x for x in ( - "htslib_util.c", "samfile_util.c",)] + - ["samtools/kprobaln.c"] + + [source_pattern % "samfile", + "pysam/htslib_util.c", + "pysam/samfile_util.c", + "samtools/kprobaln.c"] + htslib_sources + os_c_files, library_dirs=htslib_library_dirs, include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=[ - "-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')] + extra_compile_args=extra_compile_args, + define_macros=define_macros ) # alignmentfile requires functions defined in bam_md.c # for __advance_samtools method. # Selected ones have been copied into samfile_utils.c # Needs to be devolved somehow. -alignmentfile = Extension( +calignmentfile = Extension( "pysam.calignmentfile", - calignmentfile_sources + - ["pysam/%s" % x for x in ( - "htslib_util.c", "samfile_util.c",)] + - ["samtools/kprobaln.c"] + + [source_pattern % "alignmentfile", + "pysam/htslib_util.c", + "pysam/samfile_util.c", + "samtools/kprobaln.c"] + htslib_sources + os_c_files, library_dirs=htslib_library_dirs, include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=[ - "-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')] + extra_compile_args=extra_compile_args, + define_macros=define_macros +) + +# alignmentfile requires functions defined in bam_md.c +# for __advance_samtools method. +# Selected ones have been copied into samfile_utils.c +# Needs to be devolved somehow. +calignedsegment = Extension( + "pysam.calignedsegment", + [source_pattern % "alignedsegment", + "pysam/htslib_util.c", + "pysam/samfile_util.c", + "samtools/kprobaln.c"] + + htslib_sources + + os_c_files, + library_dirs=htslib_library_dirs, + include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs, + libraries=["z"] + htslib_libraries, + language="c", + extra_compile_args=extra_compile_args, + define_macros=define_macros ) -tabix = Extension( +ctabix = Extension( "pysam.ctabix", - tabix_sources + - ["pysam/%s" % x for x in ("tabix_util.c", )] + + [source_pattern % "tabix", + "pysam/tabix_util.c"] + htslib_sources + os_c_files, library_dirs=["pysam"] + htslib_library_dirs, include_dirs=["pysam"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=["-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')], + extra_compile_args=extra_compile_args, + define_macros=define_macros ) -faidx = Extension( +cutils = Extension( + "pysam.cutils", + [source_pattern % "utils"] + + htslib_sources + + os_c_files, + library_dirs=["pysam"] + htslib_library_dirs, + include_dirs=["pysam"] + include_os + htslib_include_dirs, + libraries=["z"] + htslib_libraries, + language="c", + extra_compile_args=extra_compile_args, + define_macros=define_macros +) + +cfaidx = Extension( "pysam.cfaidx", - faidx_sources + + [source_pattern % "faidx"] + htslib_sources + os_c_files, - library_dirs=["pysam"], + library_dirs=["pysam"] + htslib_library_dirs, include_dirs=["pysam"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=["-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')], + extra_compile_args=extra_compile_args, + define_macros=define_macros ) -tabproxies = Extension( - "pysam.TabProxies", - tabproxies_sources + os_c_files, +ctabixproxies = Extension( + "pysam.ctabixproxies", + [source_pattern % "tabixproxies"] + + os_c_files, library_dirs=[], include_dirs=include_os, libraries=["z"], language="c", - extra_compile_args=["-Wno-error=declaration-after-statement"], + extra_compile_args=extra_compile_args, + define_macros=define_macros ) cvcf = Extension( "pysam.cvcf", - cvcf_sources + os_c_files, + [source_pattern % "vcf"] + + os_c_files, library_dirs=[], include_dirs=["htslib"] + include_os + htslib_include_dirs, libraries=["z"], language="c", - extra_compile_args=["-Wno-error=declaration-after-statement"], + extra_compile_args=extra_compile_args, + define_macros=define_macros ) cbcf = Extension( "pysam.cbcf", - cbcf_sources + + [source_pattern % "bcf"] + htslib_sources + os_c_files, library_dirs=htslib_library_dirs, include_dirs=["htslib"] + include_os + htslib_include_dirs, libraries=["z"] + htslib_libraries, language="c", - extra_compile_args=[ - "-Wno-error=declaration-after-statement", - "-DSAMTOOLS=1"], - define_macros=[('_FILE_OFFSET_BITS', '64'), - ('_USE_KNETFILE', '')] + extra_compile_args=extra_compile_args, + define_macros=define_macros ) metadata = { @@ -475,15 +489,17 @@ metadata = { # 'pysam.include.samtools.bcftools', 'pysam.include.samtools.win32'], 'requires': ['cython (>=0.21)'], - 'ext_modules': [samtools, - htslib, - samfile, - alignmentfile, - tabix, - tabproxies, + 'ext_modules': [csamtools, + chtslib, + csamfile, + calignmentfile, + calignedsegment, + ctabix, + ctabixproxies, cvcf, cbcf, - faidx], + cfaidx, + cutils], 'cmdclass': cmdclass, 'package_dir': {'pysam': 'pysam', 'pysam.include.htslib': 'htslib', diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py index 2f096c2..a42a6cb 100644 --- a/tests/AlignedSegment_test.py +++ b/tests/AlignedSegment_test.py @@ -2,6 +2,7 @@ import os import pysam import unittest from TestUtils import checkFieldEqual +import copy SAMTOOLS = "samtools" WORKDIR = "pysam_test_work" @@ -24,7 +25,7 @@ class ReadTest(unittest.TestCase): a.next_reference_id = 0 a.next_reference_start = 200 a.template_length = 167 - a.query_qualities = pysam.fromQualityString("1234") * 10 + a.query_qualities = pysam.qualitystring_to_array("1234") * 10 # todo: create tags return a @@ -39,7 +40,7 @@ class TestAlignedSegment(ReadTest): a = pysam.AlignedSegment() self.assertEqual(a.query_name, None) self.assertEqual(a.query_sequence, None) - self.assertEqual(pysam.toQualityString(a.query_qualities), None) + self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None) self.assertEqual(a.flag, 0) self.assertEqual(a.reference_id, 0) self.assertEqual(a.mapping_quality, 0) @@ -148,14 +149,14 @@ class TestAlignedSegment(ReadTest): ''' a = self.buildRead() a.query_sequence = a.query_sequence[5:10] - self.assertEqual(pysam.toQualityString(a.query_qualities), None) + self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None) a = self.buildRead() - s = pysam.toQualityString(a.query_qualities) + s = pysam.qualities_to_qualitystring(a.query_qualities) a.query_sequence = a.query_sequence[5:10] - a.query_qualities = pysam.fromQualityString(s[5:10]) + a.query_qualities = pysam.qualitystring_to_array(s[5:10]) - self.assertEqual(pysam.toQualityString(a.query_qualities), s[5:10]) + self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), s[5:10]) def testLargeRead(self): '''build an example read.''' @@ -171,7 +172,7 @@ class TestAlignedSegment(ReadTest): a.next_reference_id = 0 a.next_reference_start = 200 a.template_length = 167 - a.query_qualities = pysam.fromQualityString("1234") * 200 + a.query_qualities = pysam.qualitystring_to_array("1234") * 200 return a @@ -227,15 +228,8 @@ class TestAlignedSegment(ReadTest): [(20, 30), (31, 40), (40, 60)]) def test_get_aligned_pairs_soft_clipping(self): - a = pysam.AlignedSegment() - a.query_name = "read_12345" - a.query_sequence = "ACGT" * 10 - a.flag = 0 - a.reference_id = 0 - a.reference_start = 20 - a.mapping_quality = 20 + a = self.buildRead() a.cigartuples = ((4, 2), (0, 35), (4, 3)) - a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), [(0, None), (1, None)] + [(qpos, refpos) for (qpos, refpos) in zip( @@ -250,15 +244,8 @@ class TestAlignedSegment(ReadTest): ) def test_get_aligned_pairs_hard_clipping(self): - a = pysam.AlignedSegment() - a.query_name = "read_12345" - a.query_sequence = "ACGT" * 10 - a.flag = 0 - a.reference_id = 0 - a.reference_start = 20 - a.mapping_quality = 20 + a = self.buildRead() a.cigartuples = ((5, 2), (0, 35), (5, 3)) - a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), # No seq, no seq pos [(qpos, refpos) for (qpos, refpos) in zip( @@ -268,15 +255,8 @@ class TestAlignedSegment(ReadTest): range(0, 0 + 35), range(20, 20 + 35))]) def test_get_aligned_pairs_skip(self): - a = pysam.AlignedSegment() - a.query_name = "read_12345" - a.query_sequence = "ACGT" * 10 - a.flag = 0 - a.reference_id = 0 - a.reference_start = 20 - a.mapping_quality = 20 - a.cigartuples = ((0, 2), (3, 100), (0, 38)) - a.query_qualities = pysam.fromQualityString("1234") * 10 + a = self.buildRead() + a.cigarstring = "2M100D38M" self.assertEqual(a.get_aligned_pairs(), [(0, 20), (1, 21)] + [(None, refpos) for refpos in range(22, 22 + 100)] + @@ -291,15 +271,8 @@ class TestAlignedSegment(ReadTest): range(20 + 2 + 100, 20 + 2 + 100 + 38))]) def test_get_aligned_pairs_match_mismatch(self): - a = pysam.AlignedSegment() - a.query_name = "read_12345" - a.query_sequence = "ACGT" * 10 - a.flag = 0 - a.reference_id = 0 - a.reference_start = 20 - a.mapping_quality = 20 + a = self.buildRead() a.cigartuples = ((7, 20), (8, 20)) - a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), [(qpos, refpos) for (qpos, refpos) in zip( range(0, 0 + 40), range(20, 20 + 40))]) @@ -308,21 +281,68 @@ class TestAlignedSegment(ReadTest): range(0, 0 + 40), range(20, 20 + 40))]) def test_get_aligned_pairs_padding(self): - a = pysam.AlignedSegment() - a.query_name = "read_12345" - a.query_sequence = "ACGT" * 10 - a.flag = 0 - a.reference_id = 0 - a.reference_start = 20 - a.mapping_quality = 20 + a = self.buildRead() a.cigartuples = ((7, 20), (6, 1), (8, 19)) - a.query_qualities = pysam.fromQualityString("1234") * 10 def inner(): a.get_aligned_pairs() # padding is not bein handled right now self.assertRaises(NotImplementedError, inner) + def test_get_aligned_pairs(self): + a = self.buildRead() + a.query_sequence = "A" * 9 + a.cigarstring = "9M" + a.set_tag("MD", "9") + self.assertEqual( + a.get_aligned_pairs(with_seq=True), + [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'), + (3, 23, 'A'), (4, 24, 'A'), (5, 25, 'A'), + (6, 26, 'A'), (7, 27, 'A'), (8, 28, 'A')]) + + a.set_tag("MD", "4C4") + self.assertEqual( + a.get_aligned_pairs(with_seq=True), + [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'), + (3, 23, 'A'), (4, 24, 'c'), (5, 25, 'A'), + (6, 26, 'A'), (7, 27, 'A'), (8, 28, 'A')]) + + a.cigarstring = "5M2D4M" + a.set_tag("MD", "4C^TT4") + self.assertEqual( + a.get_aligned_pairs(with_seq=True), + [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'), + (3, 23, 'A'), (4, 24, 'c'), + (None, 25, 'T'), (None, 26, 'T'), + (5, 27, 'A'), (6, 28, 'A'), (7, 29, 'A'), (8, 30, 'A')] + ) + + a.cigarstring = "5M2D2I2M" + a.set_tag("MD", "4C^TT2") + self.assertEqual( + a.get_aligned_pairs(with_seq=True), + [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'), + (3, 23, 'A'), (4, 24, 'c'), + (None, 25, 'T'), (None, 26, 'T'), + (5, None, None), (6, None, None), + (7, 27, 'A'), (8, 28, 'A')] + ) + + def testNoSequence(self): + '''issue 176: retrieving length without query sequence + with soft-clipping. + ''' + a = self.buildRead() + a.query_sequence = None + a.cigarstring = "20M" + self.assertEqual(a.query_alignment_length, 20) + a.cigarstring = "20M1S" + self.assertEqual(a.query_alignment_length, 20) + a.cigarstring = "1S20M" + self.assertEqual(a.query_alignment_length, 20) + a.cigarstring = "1S20M1S" + self.assertEqual(a.query_alignment_length, 20) + class TestTags(ReadTest): @@ -443,5 +463,125 @@ class TestTags(ReadTest): after = entry.get_tags() self.assertEqual(after, before) + def testMDTag(self): + a = self.buildRead() + + # Substitutions only + a.cigarstring = "21M" + a.query_sequence = "A" * 21 + a.set_tag('MD', "5C0T0G05C0G0T5") + self.assertEqual( + "AAAAActgAAAAAcgtAAAAA", + a.get_reference_sequence()) + + a.cigarstring = "21M" + a.query_sequence = "A" * 21 + a.set_tag('MD', "5CTG5CGT5") + self.assertEqual( + "AAAAActgAAAAAcgtAAAAA", + a.get_reference_sequence()) + + a.cigarstring = "11M" + a.query_sequence = "A" * 11 + a.set_tag('MD', "CTG5CGT") + self.assertEqual( + "ctgAAAAAcgt", + a.get_reference_sequence()) + + # insertions are silent + a.cigarstring = "5M1I5M" + a.query_sequence = "A" * 5 + "C" + "A" * 5 + a.set_tag('MD', "11") + self.assertEqual( + a.query_sequence, + a.get_reference_sequence()) + + a.cigarstring = "1I10M" + self.assertEqual( + a.query_sequence, + a.get_reference_sequence()) + + a.cigarstring = "10M1I" + self.assertEqual( + a.query_sequence, + a.get_reference_sequence()) + + a.cigarstring = "5M1D5M" + a.query_sequence = "A" * 10 + a.set_tag('MD', "5^C5") + self.assertEqual( + "A" * 5 + "C" + "A" * 5, + a.get_reference_sequence()) + + a.cigarstring = "5M1D5M" + a.query_sequence = "A" * 10 + a.set_tag('MD', "5^CCC5") + self.assertEqual( + "A" * 5 + "C" * 3 + "A" * 5, + a.get_reference_sequence()) + + # softclipping + a.cigarstring = "5S5M1D5M5S" + a.query_sequence = "G" * 5 + "A" * 10 + "G" * 5 + a.set_tag('MD', "10") + self.assertEqual( + "A" * 10, + a.get_reference_sequence()) + + # all together + a.cigarstring = "5S5M1D5M1I5M5S" + a.query_sequence = "G" * 5 + "A" * 16 + "G" * 5 + a.set_tag('MD', "2C2^T10") + self.assertEqual( + "AAcAATAAAAAAAAAA", + a.get_reference_sequence()) + + # all together + a.cigarstring = "5S5M1D2I5M5S" + a.query_sequence = "G" * 5 + "A" * 11 + "G" * 5 + a.set_tag('MD', "2C2^TC5") + self.assertEqual( + "AAcAATCAAAAA", + a.get_reference_sequence()) + + +class TestCopy(ReadTest): + + def testCopy(self): + a = self.buildRead() + b = copy.copy(a) + # check if a and be are the same + self.assertEqual(a, b) + + # check if they map to different objects + a.query_name = 'ReadA' + b.query_name = 'ReadB' + self.assertEqual(a.query_name, 'ReadA') + self.assertEqual(b.query_name, 'ReadB') + + def testDeepCopy(self): + a = self.buildRead() + b = copy.deepcopy(a) + # check if a and be are the same + self.assertEqual(a, b) + + # check if they map to different objects + a.query_name = 'ReadA' + b.query_name = 'ReadB' + self.assertEqual(a.query_name, 'ReadA') + self.assertEqual(b.query_name, 'ReadB') + + +class TestAsString(unittest.TestCase): + + def testAsString(self): + with open(os.path.join(DATADIR, "ex2.sam")) as samf: + reference = [x for x in samf if not x.startswith("@")] + + with pysam.AlignmentFile( + os.path.join(DATADIR, "ex2.bam"), "r") as pysamf: + for s, p in zip(reference, pysamf): + self.assertEqual(s, p.tostring(pysamf)) + if __name__ == "__main__": unittest.main() diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py index 751fad0..30fed5b 100644 --- a/tests/AlignmentFile_test.py +++ b/tests/AlignmentFile_test.py @@ -166,12 +166,12 @@ class BasicTestBAMFromFetch(unittest.TestCase): self.reads[3].query_sequence, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG")) def testARqual(self): - self.assertEqual(pysam.toQualityString(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", - "quality string mismatch in read 1: %s != %s" % (pysam.toQualityString(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")) - self.assertEqual(pysam.toQualityString(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % ( - pysam.toQualityString(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<")) - self.assertEqual(pysam.toQualityString(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", - "quality string mismatch in read 3: %s != %s" % (pysam.toQualityString(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")) + self.assertEqual(pysam.qualities_to_qualitystring(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", + "quality string mismatch in read 1: %s != %s" % (pysam.qualities_to_qualitystring(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")) + self.assertEqual(pysam.qualities_to_qualitystring(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % ( + pysam.qualities_to_qualitystring(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<")) + self.assertEqual(pysam.qualities_to_qualitystring(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", + "quality string mismatch in read 3: %s != %s" % (pysam.qualities_to_qualitystring(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")) def testARquery(self): self.assertEqual( @@ -195,22 +195,22 @@ class BasicTestBAMFromFetch(unittest.TestCase): def testARqqual(self): self.assertEqual( - pysam.toQualityString(self.reads[0].query_alignment_qualities), + pysam.qualities_to_qualitystring(self.reads[0].query_alignment_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "qquality string mismatch in read 1: %s != %s" % - (pysam.toQualityString(self.reads[0].query_alignment_qualities), + (pysam.qualities_to_qualitystring(self.reads[0].query_alignment_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")) self.assertEqual( - pysam.toQualityString(self.reads[1].query_alignment_qualities), + pysam.qualities_to_qualitystring(self.reads[1].query_alignment_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "qquality string mismatch in read 2: %s != %s" % - (pysam.toQualityString(self.reads[1].query_alignment_qualities), + (pysam.qualities_to_qualitystring(self.reads[1].query_alignment_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<")) self.assertEqual( - pysam.toQualityString(self.reads[3].query_alignment_qualities), + pysam.qualities_to_qualitystring(self.reads[3].query_alignment_qualities), "<<<<<<<<<<<<<<<<<:<9/,&,22", "qquality string mismatch in read 3: %s != %s" % - (pysam.toQualityString(self.reads[3].query_alignment_qualities), + (pysam.qualities_to_qualitystring(self.reads[3].query_alignment_qualities), "<<<<<<<<<<<<<<<<<:<9/,&,22")) def testPresentOptionalFields(self): @@ -312,7 +312,7 @@ class BasicTestCRAMFromFetch(BasicTestBAMFromFetch): (self.reads[1].opt('MF'), 18)) -class BasicTestSAMFromFile(BasicTestBAMFromFetch): +class BasicTestSAMFromFilename(BasicTestBAMFromFetch): def setUp(self): self.samfile = pysam.AlignmentFile( @@ -321,7 +321,7 @@ class BasicTestSAMFromFile(BasicTestBAMFromFetch): self.reads = [r for r in self.samfile] -class BasicTestCRAMFromFile(BasicTestCRAMFromFetch): +class BasicTestCRAMFromFilename(BasicTestCRAMFromFetch): def setUp(self): self.samfile = pysam.AlignmentFile( @@ -330,7 +330,7 @@ class BasicTestCRAMFromFile(BasicTestCRAMFromFetch): self.reads = [r for r in self.samfile] -class BasicTestBAMFromFile(BasicTestBAMFromFetch): +class BasicTestBAMFromFilename(BasicTestBAMFromFetch): def setUp(self): self.samfile = pysam.AlignmentFile( @@ -339,6 +339,33 @@ class BasicTestBAMFromFile(BasicTestBAMFromFetch): self.reads = [r for r in self.samfile] +class BasicTestBAMFromFile(BasicTestBAMFromFetch): + + def setUp(self): + f = open(os.path.join(DATADIR, "ex3.bam")) + self.samfile = pysam.AlignmentFile( + f, "rb") + self.reads = [r for r in self.samfile] + + +class BasicTestSAMFromFile(BasicTestBAMFromFetch): + + def setUp(self): + f = open(os.path.join(DATADIR, "ex3.sam")) + self.samfile = pysam.AlignmentFile( + f, "r") + self.reads = [r for r in self.samfile] + + +class BasicTestCRAMFromFile(BasicTestCRAMFromFetch): + + def setUp(self): + f = open(os.path.join(DATADIR, "ex3.cram")) + self.samfile = pysam.AlignmentFile( + f, "rc") + self.reads = [r for r in self.samfile] + + ################################################## # # Test of basic File I/O @@ -370,7 +397,7 @@ class TestIO(unittest.TestCase): If *use_template* is set, the header is copied from infile using the template mechanism, otherwise target names and - lengths are passed explicitely. + lengths are passed explicitly. The *checkf* is used to determine if the files are equal. @@ -619,6 +646,25 @@ class TestIO(unittest.TestCase): samfile.close() self.assertRaises(ValueError, samfile.fetch, 'chr1', 100, 120) + def testFetchFromClosedFileObject(self): + + f = open(os.path.join(DATADIR, "ex1.bam")) + samfile = pysam.AlignmentFile(f, "rb") + f.close() + self.assertTrue(f.closed) + # access to Samfile should still work + self.checkEcho("ex1.bam", + "ex1.bam", + "tmp_ex1.bam", + "rb", "wb") + + f = open(os.path.join(DATADIR, "ex1.bam")) + samfile = pysam.AlignmentFile(f, "rb") + self.assertFalse(f.closed) + samfile.close() + # python file needs to be closed separately + self.assertFalse(f.closed) + def testClosedFile(self): '''test that access to a closed samfile raises ValueError.''' @@ -763,10 +809,10 @@ class TestIteratorRowBAM(unittest.TestCase): str(a), str(d))) qual = d[10] self.assertEqual( - pysam.toQualityString(a.query_qualities), + pysam.qualities_to_qualitystring(a.query_qualities), qual, "line %i: quality mismatch: %s != %s, \n%s\n%s\n" % - (line, pysam.toQualityString(a.query_qualities), qual, + (line, pysam.qualities_to_qualitystring(a.query_qualities), qual, str(a), str(d))) def testIteratePerContig(self): @@ -1013,7 +1059,7 @@ class TestTagParsing(unittest.TestCase): a.next_reference_id = 0 a.next_reference_start = 200 a.template_length = 0 - a.query_qualities = pysam.fromQualityString("1234") * 3 + a.query_qualities = pysam.qualitystring_to_array("1234") * 3 # todo: create tags return a @@ -1074,7 +1120,7 @@ class TestTagParsing(unittest.TestCase): def c(r, l): r.tags = [('ZM', l)] - self.assertEqual(r.opt("ZM"), list(l)) + self.assertEqual(list(r.opt("ZM")), list(l)) # signed integers c(r, (-1, 1)) @@ -1113,10 +1159,10 @@ class TestClipping(unittest.TestCase): if read.query_name == "r001": self.assertEqual(read.query_sequence, 'AAAAGATAAGGATA') self.assertEqual(read.query_alignment_sequence, 'AGATAAGGATA') - self.assertEqual(pysam.toQualityString(read.query_qualities), + self.assertEqual(pysam.qualities_to_qualitystring(read.query_qualities), None) self.assertEqual( - pysam.toQualityString(read.query_alignment_qualities), + pysam.qualities_to_qualitystring(read.query_alignment_qualities), None) elif read.query_name == "r002": @@ -1124,10 +1170,10 @@ class TestClipping(unittest.TestCase): self.assertEqual(read.query_sequence, 'GCCTAAGCTAA') self.assertEqual(read.query_alignment_sequence, 'AGCTAA') self.assertEqual( - pysam.toQualityString(read.query_qualities), + pysam.qualities_to_qualitystring(read.query_qualities), '01234567890') self.assertEqual( - pysam.toQualityString(read.query_alignment_qualities), + pysam.qualities_to_qualitystring(read.query_alignment_qualities), '567890') elif read.query_name == "r003": @@ -1135,10 +1181,10 @@ class TestClipping(unittest.TestCase): self.assertEqual(read.query_sequence, 'GCCTAAGCTAA') self.assertEqual(read.query_alignment_sequence, 'GCCTAA') self.assertEqual( - pysam.toQualityString(read.query_qualities), + pysam.qualities_to_qualitystring(read.query_qualities), '01234567890') self.assertEqual( - pysam.toQualityString(read.query_alignment_qualities), + pysam.qualities_to_qualitystring(read.query_alignment_qualities), '012345') elif read.query_name == "r004": @@ -1146,10 +1192,10 @@ class TestClipping(unittest.TestCase): self.assertEqual(read.query_sequence, 'TAGGC') self.assertEqual(read.query_alignment_sequence, 'TAGGC') self.assertEqual( - pysam.toQualityString(read.query_qualities), + pysam.qualities_to_qualitystring(read.query_qualities), '01234') self.assertEqual( - pysam.toQualityString(read.query_alignment_qualities), + pysam.qualities_to_qualitystring(read.query_alignment_qualities), '01234') @@ -1347,7 +1393,7 @@ class TestContextManager(unittest.TestCase): with pysam.AlignmentFile(os.path.join(DATADIR, 'ex1.bam'), 'rb') as samfile: samfile.fetch() - self.assertEqual(samfile._isOpen(), False) + self.assertEqual(samfile.closed, True) class TestExceptions(unittest.TestCase): @@ -1482,7 +1528,7 @@ class TestDeNovoConstruction(unittest.TestCase): a.next_reference_id = 0 a.next_reference_start = 199 a.template_length = 167 - a.query_qualities = pysam.fromQualityString( + a.query_qualities = pysam.qualitystring_to_array( "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") a.tags = (("NM", 1), ("RG", "L1")) @@ -1498,7 +1544,7 @@ class TestDeNovoConstruction(unittest.TestCase): b.next_reference_id = 1 b.next_reference_start = 499 b.template_length = 412 - b.query_qualities = pysam.fromQualityString( + b.query_qualities = pysam.qualitystring_to_array( "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") b.tags = (("MF", 18), ("RG", "L2")) @@ -1625,12 +1671,35 @@ class TestTruncatedBAM(unittest.TestCase): iterall = lambda x: len([a for a in x]) self.assertRaises(IOError, iterall, s.fetch()) +COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, + 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, + 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, + 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, + 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, + 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, + 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, + 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, + 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, + 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, + 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, + 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, + 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, + 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, + 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, + 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, + 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, + 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, + 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, + 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, + 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, + 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, + 223, 111, 103, 0, 5, 225, 0, 95] class TestBTagSam(unittest.TestCase): '''see issue 81.''' - compare = [[100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95], + compare = [COMPARE_BTAG, [-100, 200, -300, -400], [-100, 12], [12, 15], @@ -1638,26 +1707,47 @@ class TestBTagSam(unittest.TestCase): filename = os.path.join(DATADIR, 'example_btag.sam') - def testRead(self): + read0 = [('RG', 'QW85I'), + ('PG', 'tmap'), + ('MD', '140'), + ('NM', 0), + ('AS', 140), + ('FZ', array.array('H', COMPARE_BTAG)), + ('XA', 'map2-1'), + ('XS', 53), + ('XT', 38), + ('XF', 1), + ('XE', 0)] + + def testReadTags(self): s = pysam.AlignmentFile(self.filename) for x, read in enumerate(s): + tags = read.tags if x == 0: - self.assertEqual(read.tags, [('RG', 'QW85I'), ('PG', 'tmap'), ('MD', '140'), ('NM', 0), ('AS', 140), ('FZ', [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95]), ('XA', 'map2-1'), ('XS', 53), ('XT', 38), ('XF', 1), ('XE', 0)] - ) - - fz = dict(read.tags)["FZ"] + self.assertEqual(tags, self.read0) + + fz = list(dict(tags)["FZ"]) self.assertEqual(fz, self.compare[x]) - self.assertEqual(read.opt("FZ"), self.compare[x]) - - def testWrite(self): + self.assertEqual(list(read.opt("FZ")), self.compare[x]) + self.assertEqual(tags, read.get_tags()) + for tag, value in tags: + self.assertEqual(value, read.get_tag(tag)) + + def testReadWriteTags(self): s = pysam.AlignmentFile(self.filename) for read in s: before = read.tags - read.tags = read.tags - after = read.tags - self.assertEqual(after, before) + read.tags = before + self.assertEqual(read.tags, before) + + read.set_tags(before) + self.assertEqual(read.tags, before) + + for tag, value in before: + read.set_tag(tag, value) + self.assertEqual(value, read.get_tag(tag)) class TestBTagBam(TestBTagSam): @@ -1861,18 +1951,50 @@ class TestPileup(unittest.TestCase): fastafile=self.fastafile) self.checkEqual(refs, iterator) - def count_coverage_python(self, bam, chr, start, stop, read_callback, quality_threshold=15): + +class TestCountCoverage(unittest.TestCase): + + samfilename = "pysam_data/ex1.bam" + fastafilename = "pysam_data/ex1.fa" + + def setUp(self): + + self.samfile = pysam.AlignmentFile(self.samfilename) + self.fastafile = pysam.Fastafile(self.fastafilename) + + samfile = pysam.AlignmentFile( + "test_count_coverage_read_all.bam", 'wb', + template=self.samfile) + for ii, read in enumerate(self.samfile.fetch()): + # if ii % 2 == 0: # setting BFUNMAP makes no sense... + #read.flag = read.flag | 0x4 + if ii % 3 == 0: + read.flag = read.flag | 0x100 + if ii % 5 == 0: + read.flag = read.flag | 0x200 + if ii % 7 == 0: + read.flag = read.flag | 0x400 + samfile.write(read) + samfile.close() + pysam.index("test_count_coverage_read_all.bam") + + def count_coverage_python(self, bam, chrom, start, stop, + read_callback, + quality_threshold=15): l = stop - start count_a = array.array('L', [0] * l) count_c = array.array('L', [0] * l) count_g = array.array('L', [0] * l) count_t = array.array('L', [0] * l) - for p in bam.pileup(chr, start, stop, truncate=True, stepper='nofilter'): + for p in bam.pileup(chrom, start, stop, truncate=True, + stepper='nofilter'): rpos = p.reference_pos - start for read in p.pileups: - if not read.is_del and not read.is_refskip and read_callback(read.alignment): + if not read.is_del and not read.is_refskip and \ + read_callback(read.alignment): try: - if read.alignment.query_qualities[read.query_position] > quality_threshold: + if read.alignment.query_qualities[read.query_position] \ + >= quality_threshold: letter = read.alignment.query[read.query_position] if letter == 'A': count_a[rpos] += 1 @@ -1887,51 +2009,59 @@ class TestPileup(unittest.TestCase): return count_a, count_c, count_g, count_t def test_count_coverage(self): - chr = 'chr1' + chrom = 'chr1' start = 0 stop = 2000 - manual_counts = self.count_coverage_python(self.samfile, chr, start, stop, - lambda read: True, - quality_threshold=0) - fast_counts = self.samfile.count_coverage(chr, start, stop, - read_callback=lambda read: True, - quality_threshold=0) - self.assertEqual(fast_counts[0], manual_counts[0]) - self.assertEqual(fast_counts[1], manual_counts[1]) - self.assertEqual(fast_counts[2], manual_counts[2]) - self.assertEqual(fast_counts[3], manual_counts[3]) + manual_counts = self.count_coverage_python( + self.samfile, chrom, start, stop, + lambda read: True, + quality_threshold=0) + fast_counts = self.samfile.count_coverage( + chrom, start, stop, + read_callback=lambda read: True, + quality_threshold=0) + + self.assertEqual(list(fast_counts[0]), list(manual_counts[0])) + self.assertEqual(list(fast_counts[1]), list(manual_counts[1])) + self.assertEqual(list(fast_counts[2]), list(manual_counts[2])) + self.assertEqual(list(fast_counts[3]), list(manual_counts[3])) def test_count_coverage_quality_filter(self): - chr = 'chr1' + chrom = 'chr1' start = 0 stop = 2000 - manual_counts = self.count_coverage_python(self.samfile, chr, start, stop, - lambda read: True, - quality_threshold=0) - fast_counts = self.samfile.count_coverage(chr, start, stop, - read_callback=lambda read: True, - quality_threshold=15) + manual_counts = self.count_coverage_python( + self.samfile, chrom, start, stop, + lambda read: True, + quality_threshold=0) + fast_counts = self.samfile.count_coverage( + chrom, start, stop, + read_callback=lambda read: True, + quality_threshold=15) # we filtered harder, should be less for i in range(4): for r in range(start, stop): self.assertTrue(fast_counts[i][r] <= manual_counts[i][r]) def test_count_coverage_read_callback(self): - chr = 'chr1' + chrom = 'chr1' start = 0 stop = 2000 - manual_counts = self.count_coverage_python(self.samfile, chr, start, stop, - lambda read: read.flag & 0x10, - quality_threshold=0) - fast_counts = self.samfile.count_coverage(chr, start, stop, - read_callback=lambda read: True, - quality_threshold=0) + manual_counts = self.count_coverage_python( + self.samfile, chrom, start, stop, + lambda read: read.flag & 0x10, + quality_threshold=0) + fast_counts = self.samfile.count_coverage( + chrom, start, stop, + read_callback=lambda read: True, + quality_threshold=0) for i in range(4): for r in range(start, stop): self.assertTrue(fast_counts[i][r] >= manual_counts[i][r]) - fast_counts = self.samfile.count_coverage(chr, start, stop, - read_callback=lambda read: read.flag & 0x10, - quality_threshold=0) + fast_counts = self.samfile.count_coverage( + chrom, start, stop, + read_callback=lambda read: read.flag & 0x10, + quality_threshold=0) self.assertEqual(fast_counts[0], manual_counts[0]) self.assertEqual(fast_counts[1], manual_counts[1]) @@ -1939,35 +2069,23 @@ class TestPileup(unittest.TestCase): self.assertEqual(fast_counts[3], manual_counts[3]) def test_count_coverage_read_all(self): - samfile = pysam.AlignmentFile( - "test_count_coverage_read_all.bam", 'wb', template=self.samfile) - for ii, read in enumerate(self.samfile.fetch()): - # if ii % 2 == 0: # setting BFUNMAP makes no sense... - #read.flag = read.flag | 0x4 - if ii % 3 == 0: - read.flag = read.flag | 0x100 - if ii % 5 == 0: - read.flag = read.flag | 0x200 - if ii % 7 == 0: - read.flag = read.flag | 0x400 - samfile.write(read) - samfile.close() - pysam.index("test_count_coverage_read_all.bam") samfile = pysam.AlignmentFile("test_count_coverage_read_all.bam") - chr = 'chr1' + chrom = 'chr1' start = 0 stop = 2000 def filter(read): return not (read.flag & (0x4 | 0x100 | 0x200 | 0x400)) - fast_counts = samfile.count_coverage(chr, start, stop, - read_callback='all', - #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)), - quality_threshold=0) - manual_counts = samfile.count_coverage(chr, start, stop, - read_callback=lambda read: not( - read.flag & (0x4 | 0x100 | 0x200 | 0x400)), - quality_threshold=0) + fast_counts = samfile.count_coverage( + chrom, start, stop, + read_callback='all', + #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)), + quality_threshold=0) + manual_counts = samfile.count_coverage( + chrom, start, stop, + read_callback=lambda read: not( + read.flag & (0x4 | 0x100 | 0x200 | 0x400)), + quality_threshold=0) os.unlink("test_count_coverage_read_all.bam") os.unlink("test_count_coverage_read_all.bam.bai") diff --git a/tests/SamFile_test.py b/tests/SamFile_test.py index 889ff96..79b11a5 100644 --- a/tests/SamFile_test.py +++ b/tests/SamFile_test.py @@ -13,6 +13,7 @@ import sys import collections import subprocess import logging +import array from TestUtils import checkBinaryEqual, checkURL IS_PYTHON3 = sys.version_info[0] >= 3 @@ -355,7 +356,7 @@ class TestIO(unittest.TestCase): If *use_template* is set, the header is copied from infile using the template mechanism, otherwise target names and - lengths are passed explicitely. + lengths are passed explicitly. ''' @@ -1080,7 +1081,7 @@ class TestContextManager(unittest.TestCase): with pysam.Samfile(os.path.join(DATADIR, 'ex1.bam'), 'rb') as samfile: samfile.fetch() - self.assertEqual(samfile._isOpen(), False) + self.assertEqual(samfile.closed, True) class TestExceptions(unittest.TestCase): @@ -1523,12 +1524,36 @@ class TestEmptyHeader(unittest.TestCase): s = pysam.Samfile(os.path.join(DATADIR, 'example_empty_header.bam')) self.assertEqual(s.header, {'SQ': [{'LN': 1000, 'SN': 'chr1'}]}) +COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, + 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, + 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, + 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, + 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, + 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, + 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, + 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, + 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, + 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, + 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, + 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, + 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, + 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, + 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, + 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, + 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, + 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, + 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, + 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, + 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, + 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, + 223, 111, 103, 0, 5, 225, 0, 95] + class TestBTagSam(unittest.TestCase): '''see issue 81.''' - compare = [[100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95], + compare = [COMPARE_BTAG, [-100, 200, -300, -400], [-100, 12], [12, 15], @@ -1536,26 +1561,47 @@ class TestBTagSam(unittest.TestCase): filename = os.path.join(DATADIR, 'example_btag.sam') - def testRead(self): + read0 = [('RG', 'QW85I'), + ('PG', 'tmap'), + ('MD', '140'), + ('NM', 0), + ('AS', 140), + ('FZ', array.array('H', COMPARE_BTAG)), + ('XA', 'map2-1'), + ('XS', 53), + ('XT', 38), + ('XF', 1), + ('XE', 0)] + + def testReadTags(self): s = pysam.Samfile(self.filename) for x, read in enumerate(s): + tags = read.tags if x == 0: - self.assertEqual(read.tags, [('RG', 'QW85I'), ('PG', 'tmap'), ('MD', '140'), ('NM', 0), ('AS', 140), ('FZ', [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95]), ('XA', 'map2-1'), ('XS', 53), ('XT', 38), ('XF', 1), ('XE', 0)] - ) - - fz = dict(read.tags)["FZ"] + self.assertEqual(tags, self.read0) + + fz = list(dict(tags)["FZ"]) self.assertEqual(fz, self.compare[x]) - self.assertEqual(read.opt("FZ"), self.compare[x]) - - def testWrite(self): + self.assertEqual(list(read.opt("FZ")), self.compare[x]) + self.assertEqual(tags, read.get_tags()) + for tag, value in tags: + self.assertEqual(value, read.get_tag(tag)) + + def testReadWriteTags(self): s = pysam.Samfile(self.filename) for read in s: before = read.tags - read.tags = read.tags - after = read.tags - self.assertEqual(after, before) + read.tags = before + self.assertEqual(read.tags, before) + + read.set_tags(before) + self.assertEqual(read.tags, before) + + for tag, value in before: + read.set_tag(tag, value) + self.assertEqual(value, read.get_tag(tag)) class TestBTagBam(TestBTagSam): diff --git a/tests/TestUtils.py b/tests/TestUtils.py index 3533f00..5cc048a 100644 --- a/tests/TestUtils.py +++ b/tests/TestUtils.py @@ -68,9 +68,9 @@ def checkSamtoolsViewEqual(filename1, filename2, l1 = sorted(l1[:-1].split("\t")) l2 = sorted(l2[:-1].split("\t")) if l1 != l2: - print "mismatch in line %i" % n - print l1 - print l2 + print ("mismatch in line %i" % n) + print (l1) + print (l2) return False else: return False diff --git a/tests/faidx_test.py b/tests/faidx_test.py index c454e83..ee448c3 100644 --- a/tests/faidx_test.py +++ b/tests/faidx_test.py @@ -9,9 +9,9 @@ class TestFastaFile(unittest.TestCase): sequences = { 'chr1': - b"CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGAACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTGTGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTCTTCTTA", + "CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGAACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTGTGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTCTTCTTA", 'chr2': - b"TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATAAAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACCTCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGATGATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATAGCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAAAAAATATTTACAGTAACT", + "TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATAAAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACCTCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGATGATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATAGCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAAAAAATATTTACAGTAACT", } def setUp(self): @@ -27,25 +27,20 @@ class TestFastaFile(unittest.TestCase): # test 0:x self.assertEqual(seq[:x], self.file.fetch(id, None, x)) - # unknown sequence returns "" - # change: should be an IndexError - self.assertEqual(b"", self.file.fetch("chr12")) + # unknown sequence raises IndexError + self.assertRaises(KeyError, self.file.fetch, "chr12") def testOutOfRangeAccess(self): '''test out of range access.''' # out of range access returns an empty string for contig, s in self.sequences.items(): - self.assertEqual(self.file.fetch(contig, len(s), len(s) + 1), b"") - - self.assertEqual(self.file.fetch("chr3", 0, 100), b"") + self.assertEqual(self.file.fetch(contig, len(s), len(s) + 1), "") def testFetchErrors(self): self.assertRaises(ValueError, self.file.fetch) - self.assertRaises(IndexError, self.file.fetch, "chr1", -1, 10) + self.assertRaises(ValueError, self.file.fetch, "chr1", -1, 10) self.assertRaises(ValueError, self.file.fetch, "chr1", 20, 10) - - # does not work yet - # self.assertRaises( KeyError, self.file.fetch, "chrX" ) + self.assertRaises(KeyError, self.file.fetch, "chr3", 0, 100) def testLength(self): self.assertEqual(len(self.file), 2) @@ -58,30 +53,82 @@ class TestFastaFile(unittest.TestCase): self.file.close() -class TestFastqFile(unittest.TestCase): +class TestFastxFileFastq(unittest.TestCase): + + filetype = pysam.FastxFile + filename = "faidx_ex1.fq" + persist = True def setUp(self): - self.file = pysam.FastqFile(os.path.join(DATADIR, "ex1.fq")) + self.file = self.filetype(os.path.join(DATADIR, self.filename), + persist=self.persist) + self.has_quality = self.filename.endswith('.fq') + + def checkFirst(self, s): + # test first entry + self.assertEqual(s.sequence, "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC") + self.assertEqual(s.name, "B7_589:1:101:825:28") + if self.has_quality: + self.assertEqual(s.quality, "<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;") + self.assertEqual(list(s.get_quality_array()), + [ord(x) - 33 for x in s.quality]) + self.assertEqual(str(s), + "@B7_589:1:101:825:28\n" + "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC\n" + "+\n" + "<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;") + + else: + self.assertEqual(s.quality, None) + self.assertEqual(s.get_quality_array(), None) + self.assertEqual(str(s), + ">B7_589:1:101:825:28\n" + "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC") + + def checkLast(self, s): + self.assertEqual(s.sequence, "TAATTGAAAAATTCATTTAAGAAATTACAAAATAT") + self.assertEqual(s.name, "EAS56_65:8:64:507:478") + if self.has_quality: + self.assertEqual(s.quality, "<<<<<;<<<<<<<<<<<<<<<;;;<<<;<<8;<;<") + self.assertEqual(list(s.get_quality_array()), + [ord(x) - 33 for x in s.quality]) + else: + self.assertEqual(s.quality, None) + self.assertEqual(s.get_quality_array(), None) def testCounts(self): self.assertEqual(len([x for x in self.file]), 3270) def testMissingFile(self): - self.assertRaises(IOError, pysam.FastqFile, "nothere.fq") + self.assertRaises(IOError, self.filetype, "nothere.fq") def testSequence(self): - s = self.file.__next__() - # test first entry - self.assertEqual(s.sequence, b"GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC") - self.assertEqual(s.quality, b"<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;") - self.assertEqual(s.name, b"B7_589:1:101:825:28") - - for s in self.file: + first = self.file.__next__() + self.checkFirst(first) + for last in self.file: pass - # test last entry - self.assertEqual(s.sequence, b"TAATTGAAAAATTCATTTAAGAAATTACAAAATAT") - self.assertEqual(s.quality, b"<<<<<;<<<<<<<<<<<<<<<;;;<<<;<<8;<;<") - self.assertEqual(s.name, b"EAS56_65:8:64:507:478") + self.checkLast(last) + + # test for persistence + if self.persist: + self.checkFirst(first) + else: + self.checkLast(first) + + +# Test for backwards compatibility +class TestFastqFileFastq(TestFastxFileFastq): + filetype = pysam.FastqFile + + +# Test for backwards compatibility +class TestFastxFileFasta(TestFastxFileFastq): + filetype = pysam.FastqFile + filename = "faidx_ex1.fa" + + +class TestFastxFileFastqStream(TestFastxFileFastq): + persist = False if __name__ == "__main__": unittest.main() diff --git a/tests/pysam_data/Makefile b/tests/pysam_data/Makefile index 8b0964a..6166fd2 100644 --- a/tests/pysam_data/Makefile +++ b/tests/pysam_data/Makefile @@ -48,7 +48,7 @@ ex1.bam:ex1.sam.gz ex1.fa.fai samtools index $< ex1.pileup.gz:ex1.bam ex1.fa - samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz + samtools mpileup -f ex1.fa ex1.bam | gzip > ex1.pileup.gz ex2_truncated.bam: ex2.bam head -c 124000 ex2.bam > ex2_truncated.bam diff --git a/tests/pysam_data/faidx_ex1.fa b/tests/pysam_data/faidx_ex1.fa new file mode 100644 index 0000000..d16e255 --- /dev/null +++ b/tests/pysam_data/faidx_ex1.fa @@ -0,0 +1,6540 @@ +>B7_589:1:101:825:28 +GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC +>B7_589:1:101:825:28 +TGTGTCCATGTACACACGCTGTCCTATGTACTTAT +>B7_589:1:110:543:934 +AAGAATTGTAAAAGTCAAAATTAAAGTTCAATACT +>B7_589:1:110:543:934 +ACAAGCCAGAAGAGATTGGATCTAATTTTTGGACT +>B7_589:1:122:337:968 +ACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACT +>B7_589:1:122:337:968 +GCTTTACTGTCTAAACTATGAAGAGACTATTGCCA +>B7_589:1:122:77:789 +ACTATATTTATGCTATTCAGTTCTAAATATAGAAA +>B7_589:1:122:77:789 +GGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAA +>B7_589:1:168:69:249 +ATATGCTGTTTACAAGAAACTCATTAATAAAGACA +>B7_589:1:168:69:249 +TTCAGCAAGAAGATATAACCATCCTACTAAATACA +>B7_589:1:29:529:379 +CAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT +>B7_589:1:29:529:379 +GACTCTATCCCAAATTCCCAATTACGTCCTATCTT +>B7_589:2:30:644:942 +TACCTAATTGGTACAATGTACAATATTCTGATGAT +>B7_589:2:30:644:942 +TATATCAGATAAAGCACACTTTAAATCAACAACAG +>B7_589:2:73:730:487 +AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTT +>B7_589:2:73:730:487 +TAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTA +>B7_589:2:9:49:661 +TACACACAAAAGTACAAAACTCACAGGTTTTATAA +>B7_589:2:9:49:661 +TGCTAAACTAAGCATCATAAATGAAGCGGAAATAA +>B7_589:3:71:478:175 +ACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGT +>B7_589:3:71:478:175 +TAGACATCTAAATGAAAGAGGCTCAAAGAATGCCA +>B7_589:3:82:13:897 +ATACAGTCATCTATAAAGGAAATCCCAGCAGAATA +>B7_589:3:82:13:897 +CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT +>B7_589:4:54:989:654 +ACTTATCATGACTCTATCCCAAATTCCCAATTACG +>B7_589:4:54:989:654 +TCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTT +>B7_589:5:147:405:738 +AGGGATTAAATTCCCCCACTTAAGAGATATAGATT +>B7_589:5:147:405:738 +ATCAGATAAAGCACACTTTAAATCAACAACAGTAA +>B7_589:5:198:564:731 +ACAAAGGAGGTCATCATACAATGATAAAAAGATCA +>B7_589:5:198:564:731 +ATAGATTGGCAGAACAGATTTAAAAACATGAACTA +>B7_589:5:50:950:562 +CTATTTTTGTCTTGACACCCTACTAATATTTGTCT +>B7_589:5:50:950:562 +GCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGC +>B7_589:5:68:440:424 +ACACTTTAAATCAACAACAGTAAAATAAAACAAAG +>B7_589:5:68:440:424 +TGGTACAATGTACAATATTCTGATGATGGTTACAC +>B7_589:6:108:958:42 +AAAGTACAAAACTCACAGGTTTTATAAAACAATTA +>B7_589:6:108:958:42 +TATGCCCTGCTAAACTAAGCATCATAAATGAAGGG +>B7_589:6:114:714:317 +AACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGC +>B7_589:6:114:714:317 +TGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCA +>B7_589:6:120:14:944 +CAAAAGGTGATGTGTGTTCTCATCAACCTCATACA +>B7_589:6:120:14:944 +CAAAGATGAAACGCGTAACTGCGCTCTCATTCACT +>B7_589:6:33:356:636 +TTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTT +>B7_589:7:112:203:90 +CCTGTCACCCAATGGACCTGTGATATCTGGATTCT +>B7_589:7:112:203:90 +CTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGA +>B7_589:7:154:26:712 +ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC +>B7_589:7:154:26:712 +TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA +>B7_589:7:72:916:763 +CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT +>B7_589:7:72:916:763 +GTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGT +>B7_589:7:76:306:561 +GGTGGAAAAAGATGTTCTACGCAAACAGAAACCAA +>B7_589:7:76:306:561 +TACAGAGCAACTAGGTAAAAAATTAACATTACAAC +>B7_589:7:93:634:323 +CTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT +>B7_589:7:93:634:323 +TAAAGTTCAATACTCACCATCATAAATACACACAA +>B7_589:8:113:968:19 +GAAAACTATATTTATGCTATTCAGTTCTAAATATA +>B7_589:8:118:829:36 +AGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTC +>B7_589:8:118:829:36 +TAATTGGTACAATGTACAATATTCTGATGATGGTT +>B7_589:8:139:727:808 +AAGTAAATAAAACACATAGCTAAAACTAAAAAAGC +>B7_589:8:139:727:808 +ACAAATACTACTAGACCTAAGAGGGATGAGAAATT +>B7_589:8:157:935:374 +CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA +>B7_589:8:157:935:374 +TCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCA +>B7_589:8:2:434:715 +AGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCT +>B7_589:8:2:434:715 +CTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTT +>B7_589:8:74:674:124 +CACTGAACTTCCACGTCTCATCTAGGGGAACAGGG +>B7_589:8:74:674:124 +TTCTTATCTGCACATTACTACCCTGCAATTAATAT +>B7_591:1:191:462:705 +CAGATCCAGATTGCTTGTGGTCTGACAGGCTGCAAC +>B7_591:1:191:462:705 +CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAG +>B7_591:1:60:837:923 +CATCAACCGCATACACTCACATGGTTTAGGGGTATA +>B7_591:1:60:837:923 +TTCACGCCAGCTCCCTGTCACCCAATGGACCTCTGA +>B7_591:2:123:924:645 +TATATCAGATAAAGCACACTTTAAATCAACAACAGT +>B7_591:2:123:924:645 +TGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGA +>B7_591:2:134:868:252 +AAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAAC +>B7_591:2:134:868:252 +ATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGA +>B7_591:2:13:100:876 +ACAGGGATTCCTGAGGAAAAAGAAAAAGTGAGAAGT +>B7_591:2:13:100:876 +AGAATATATAAAGTCAACATGAAGGAAAAAAATTCT +>B7_591:2:223:583:968 +AATATAGTTGAAAGCTCTAACAATAGACTAAACCAA +>B7_591:2:223:583:968 +TATGAGGCACAGGTATTCCTGAGGAAAAAGAAAAAG +>B7_591:2:240:603:890 +GCTCCCAAGAGGGAAAGCTTTCAACGCTTCTAGCCA +>B7_591:2:240:603:890 +TCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAA +>B7_591:2:279:124:41 +GAATTAACCCAGTCAGACAAAAANNAAGAAAAAAGA +>B7_591:2:279:124:41 +GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA +>B7_591:2:27:280:592 +AATAACAATGGGCTTCTCAGCGGAAACCTTACAAGC +>B7_591:2:27:280:592 +AGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGA +>B7_591:2:309:798:997 +TTTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT +>B7_591:2:323:639:311 +AAGCCGTTCTATTTGTAATGAAAACTATATTTAGGC +>B7_591:2:323:639:311 +TACCAAATGTGTTTATTACCAGAGGGATGGAGGGAA +>B7_591:2:46:220:58 +CAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAA +>B7_591:2:46:220:58 +TTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTT +>B7_591:3:168:69:605 +TACCCGAGGGATGGAGGGTAGAGGGACGCTGAAGTG +>B7_591:3:168:69:605 +TCTGACAGGCGGCAACTGTGAGCCATCACAATGAAC +>B7_591:3:179:496:161 +AAAAACATGAACTAACTATATGCTGTTTACAAGAAA +>B7_591:3:179:496:161 +AAGTACAAAACTCACAGGTTTTATAAAACAATTAAT +>B7_591:3:277:458:330 +AATGTCAGGGAAGGAGCCTTTTGTCAGTTACCAAAT +>B7_591:3:277:458:330 +TGATATCTGGATTCTGGGAAATTCTTCATCCTGGAC +>B7_591:3:291:404:199 +TATAAAACAATTAATTGAGACTACAGAGCAACTAGG +>B7_591:3:291:404:199 +TGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGC +>B7_591:3:305:565:952 +GTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGT +>B7_591:3:305:565:952 +TAATACTATGTTTCTTATCTGCACATTACTACCCTG +>B7_591:3:45:294:380 +ATAATTGTGTCCATGTACACACGATGTCATATGTAC +>B7_591:3:45:294:380 +CCTCGTCCACACTGGTTCGCTTGAAAGCTTGGGCTG +>B7_591:4:103:111:720 +CAGTTACCAAATGTGTTTATTACCAGAGGGATGGAG +>B7_591:4:103:111:720 +TCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAG +>B7_591:4:159:508:571 +CAAAACCTCATATATCAATATTAACTTTGAATAAAA +>B7_591:4:159:508:571 +TGGAAAAAGATGTTCTACGCAAACAGAAACCAAATG +>B7_591:4:216:650:516 +GAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAA +>B7_591:4:216:650:516 +TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT +>B7_591:4:329:339:408 +CAATCCAGAAGAGATTGGATCTAATTTTTGGACTTC +>B7_591:4:329:339:408 +TAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +>B7_591:4:92:411:955 +GGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG +>B7_591:4:92:411:955 +TACTAAATACATATGCACCTAACACAAGACTACCCA +>B7_591:5:124:978:501 +AATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTC +>B7_591:5:124:978:501 +ATGTAACAAATCTGCGCTTGTACTTCTAAATCTATA +>B7_591:5:134:751:831 +AGCTCCCTGTCACCCAATGGACCTGTGATATCTGGA +>B7_591:5:134:751:831 +ATACACACACATGGTTTAGGGGTATAATACCTCTAC +>B7_591:5:243:557:560 +AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA +>B7_591:5:243:557:560 +CTAAGCAGAAACCTTACAAGCCAGAAGAGATTGGAT +>B7_591:5:254:542:848 +CCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAA +>B7_591:5:254:542:848 +CTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC +>B7_591:5:289:132:526 +CACCCGGTCCCTGCCCCATCTCTTGTAATCTCTCTC +>B7_591:5:289:132:526 +TCTATTTTTGTCTTGACACCCAACTAATATTTGTCT +>B7_591:5:42:540:501 +CTATATTTATGCTATTCAGTTCTAAATATAGAAATT +>B7_591:5:90:828:633 +CTGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAG +>B7_591:5:90:828:633 +GGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATG +>B7_591:6:11:646:628 +GTACTTATCATGACTCTATCCCAAATTCCCAATTAC +>B7_591:6:11:646:628 +TTCTGCCCCCAGCATGGTTGTACTGGGCAATACATG +>B7_591:6:155:12:674 +CTATATTTATGCTATTCAGTTCTAAATATAGAAATT +>B7_591:6:181:191:418 +AAACTATATTTATGCTATTCAGTTCTAAATATAGAA +>B7_591:6:181:191:418 +AGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCT +>B7_591:6:190:42:671 +TATTGCCAGATGAACCACACATTAATACTATGTTTC +>B7_591:6:190:42:671 +TGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA +>B7_591:6:29:575:453 +TACTACCCTGCAATTAATATAATTGTGTCCATTTAC +>B7_591:6:29:575:453 +TCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTG +>B7_591:7:116:814:89 +ACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAA +>B7_591:7:116:814:89 +CCTAATTGGTACAATGTACAATATTCTGATGATGGT +>B7_591:7:129:956:115 +AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATAC +>B7_591:7:129:956:115 +GCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCA +>B7_591:7:157:447:758 +AAAGATGTTCTACGCAAACAGAAACCAAATGAGAGA +>B7_591:7:157:447:758 +ACAAAACCTCATATATCAATATTAACTTTGAATAAA +>B7_591:7:200:192:373 +AGTGCCTTTGTTCACATAGACCCCCTTGCAACAACC +>B7_591:7:200:192:373 +CTCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC +>B7_591:7:22:632:176 +AAAGGAGGTCATCATACAATGATAAAAAGATCAATT +>B7_591:7:22:632:176 +AGATATAGATTGGCAGAACAGATTTAAAAACATGAA +>B7_591:7:68:242:834 +AAATAAAAAAGCAAAAACAAAAACTATGCTAAGTAT +>B7_591:7:68:242:834 +TACTACTAGACCTAAGAGGGATGAGAAATTACCTAA +>B7_591:7:89:67:709 +TTTTTTTTTTTGTCTTCTCTTTTTTTTTTTTTTTTT +>B7_591:8:4:841:340 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAA +>B7_593:1:12:158:458 +CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCT +>B7_593:1:12:158:458 +TAATAATGCTACATGGATGATTATGAAATCAATGTT +>B7_593:1:189:876:833 +CAAGACTACCCAGATTCATAAAACAAATACTACTAG +>B7_593:1:189:876:833 +TTAAAATTTAACAAAAGTAAATAAAACACATAGCTA +>B7_593:1:19:695:59 +AACAGGAACAAAACCTCATATATCAATATTAACTTT +>B7_593:1:19:695:59 +GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAAT +>B7_593:1:200:559:765 +GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTAT +>B7_593:1:200:559:765 +TGGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT +>B7_593:1:215:861:605 +GAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTC +>B7_593:1:215:861:605 +NAAGACAAGTCTCTTATGAATTAACCCAGTCAGACA +>B7_593:1:36:485:632 +AAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCA +>B7_593:1:36:485:632 +GGCATTTGCCTTCAGACCCTACACGAATGCGTCTCT +>B7_593:1:85:361:418 +AGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTT +>B7_593:1:85:361:418 +GAAGATACATTGCAAGACAGACTTCATCAAGATATG +>B7_593:2:104:744:280 +CATATGGAAAGGTTGTTGGGATTTTTTTAATGATTC +>B7_593:2:104:744:280 +TGGGCTGTAATGATGCCCCTTGTCCATCACCCGGTC +>B7_593:2:125:875:553 +AACAGTAAAATAAAACAAAGGAGGTCATCATACAAT +>B7_593:2:125:875:553 +TAATTGGTACAATGTACAATATTCTGATGATGGTTA +>B7_593:2:128:555:941 +AACCAAAAGAGAGAAGGAGTAGTTATACACATATCA +>B7_593:2:133:460:542 +CCTATAAGCCGTTCTATTTGTAATGAAAACTATATT +>B7_593:2:133:460:542 +TTACCAAATGTGTTTATTACCAGAGGGATGGAGGGA +>B7_593:2:259:467:737 +CTATGTACTTATCATGACTCTATCCCAAATTCCCAA +>B7_593:2:259:467:737 +TCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCA +>B7_593:2:270:430:269 +AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA +>B7_593:2:270:430:269 +CCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA +>B7_593:2:273:348:37 +AGAAATGCGCAAAAGAATTGTAAAAGTCAAAATTAA +>B7_593:2:273:348:37 +GAATAACAATGGGCTTCTCAGCAGAAACCTTACACG +>B7_593:2:313:531:169 +GAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATG +>B7_593:2:313:531:169 +GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATT +>B7_593:2:43:239:977 +TGAACTTCTGTAATTGAAAAATTCATTTAAGAAATT +>B7_593:2:68:140:542 +AAAAACAAAAACTATGCTAAGTATTGGTAAAGATGT +>B7_593:2:68:140:542 +GGGATGAGAAATTACCTAATTGGTACAATGTACAAT +>B7_593:2:68:692:347 +TATCAATTTGGTGTTCTGTGTAAAGTCTCATGGAGC +>B7_593:2:68:692:347 +TGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTT +>B7_593:2:81:435:410 +AGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT +>B7_593:2:81:435:410 +ATTAATTGAGACTACAGAGCAACTAGGTAAAAAATT +>B7_593:3:102:856:670 +AAACCTCATATATCAATATTAACTTTGAATAAAAAG +>B7_593:3:102:856:670 +AGAGAAGGAGTAGCTATACTTATATCAGATAAAGCA +>B7_593:3:115:649:259 +ATTAATTGAGAATACAGAGCAACTAGGTAAAAAATT +>B7_593:3:115:649:259 +GGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG +>B7_593:3:148:437:481 +CGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGA +>B7_593:3:148:437:481 +GTACTGGGCAATACATGAGATTATTAGGAAATGCTT +>B7_593:3:180:89:582 +ATGCTAAGATAATTCATCATCACTAAACCAGTCCTA +>B7_593:3:180:89:582 +TAAAAAATTAACATTACAACAGGAACAAAACCTCAT +>B7_593:3:194:168:684 +AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT +>B7_593:3:194:168:684 +CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTT +>B7_593:3:196:11:27 +AAGACCCAGTTAGCCGTCCGTGTCCTCCCATCTGGC +>B7_593:3:196:11:27 +CTATGTTTCTTATCTGCNCATTACTACCCTGCAATT +>B7_593:3:303:131:673 +ACAAGAAACTCATTAATAAAGACATGAGTTCAGGTA +>B7_593:3:303:131:673 +ATAAAAAGATCAATTCAGCAAGAAGATATAACCATC +>B7_593:3:310:193:629 +CATACAATGATAAAAAGATCAATTCAGCAAGAAGAT +>B7_593:3:310:193:629 +TACACTAAAAGCCCATACTTTACTGCTACTCAATAT +>B7_593:4:104:153:698 +CCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAG +>B7_593:4:104:153:698 +CTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTT +>B7_593:4:106:316:452 +CTATATTTATGCTATTCAGTTCTAAATATAGAAATT +>B7_593:4:142:63:937 +GAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATC +>B7_593:4:142:63:937 +TTCTTTTGGCATTTGCCTTCAGACCCTACACGAATG +>B7_593:4:28:781:723 +AATACCTCTACATGGCTGATTATGAAAACAATGTTC +>B7_593:4:28:781:723 +ACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG +>B7_593:4:29:794:282 +CACATTAATACTATGTTTCTTATCTGCACATTACTA +>B7_593:4:29:794:282 +TAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTG +>B7_593:4:30:117:411 +TCTTGACACCCAACTAATATTTGTCTGAGCAAAACA +>B7_593:4:30:117:411 +TCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTC +>B7_593:4:30:812:345 +TATGTTTCTTATCTGCACATTACTACCCTGCAATTA +>B7_593:4:30:812:345 +TCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTC +>B7_593:4:315:201:673 +AAGATATGTAGTCATCAGACTATCTAAAGTCAACAT +>B7_593:4:315:201:673 +TTGGACTTATTAAAGAAAAAAAAACCTGTCAAACAC +>B7_593:5:171:343:758 +ACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGA +>B7_593:5:171:343:758 +GCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG +>B7_593:5:267:71:603 +TTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAA +>B7_593:5:267:71:603 +TTTAATGATTCCTCAATGTTAAAATGTCTATTTTTG +>B7_593:5:299:743:762 +AAAATTAAAATTTAACAAAAGTAAATAAAACACATA +>B7_593:5:299:743:762 +CAAGACTACCCAGATTCATAAAACAAATACTACTAG +>B7_593:5:30:599:589 +CTACGCAAACAGAAACCAAATGAGAGAAGGAGCAGC +>B7_593:5:30:599:589 +TCATAAAACAAATACTACTAGACCTAAGAGGGATGA +>B7_593:6:118:121:760 +GAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAA +>B7_593:6:119:428:415 +GCTATACTTATATCAGATAAAGCACACTTTAAATCA +>B7_593:6:119:428:415 +TAAATTCCCCCACTTAAGAGATATAGATTGGCAGAA +>B7_593:6:185:96:948 +CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATA +>B7_593:6:185:96:948 +TTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGAT +>B7_593:6:38:332:54 +CCATCATAAATACACACAAAAGTACAAAACTCACAG +>B7_593:6:38:332:54 +TGGCAGAACAGATTTAAAAACATGAACTAACTATAT +>B7_593:6:61:628:681 +CAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTT +>B7_593:6:61:628:681 +GCCATCACAATGAACAACAGGAAGAAAAGGTCTTTC +>B7_593:7:15:244:876 +AAAAGTAAACTCTCAAATATTGCTAGTGTGAGTATA +>B7_593:7:15:244:876 +GTACAATATTCTGATGATGGTTACACTAAAAGCCCA +>B7_593:7:189:530:40 +AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATAC +>B7_593:7:189:530:40 +CCCCACTTAAGAGATATAGATTGGCAGAACAGATTT +>B7_593:7:256:354:173 +CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGA +>B7_593:7:256:354:173 +TCAATTCAGCAAGAAGATATAACCATCCTACTAAAT +>B7_593:7:283:186:707 +AATACATATGCACCTAACACAAGACTACCCAGATTC +>B7_593:7:283:186:707 +CGCTTGTACTTCTAAATCTATAACAAAATTAAAATT +>B7_593:7:307:481:625 +AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG +>B7_593:7:307:481:625 +TCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA +>B7_593:7:67:302:762 +GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCC +>B7_593:7:67:302:762 +TTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA +>B7_593:7:6:585:132 +GCCCCTTGACCACCACCCAGTCCCTGCCCCATCTCT +>B7_593:7:6:585:132 +TGTACTTATCATGTTTCTTTCCTAATTTTTCAATTA +>B7_593:7:87:89:696 +TGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCC +>B7_593:7:87:89:696 +TTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGAT +>B7_595:1:209:345:87 +AAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT +>B7_595:1:209:345:87 +TTACTTGTTGTTGGTTTTCTGTTTCTTTTTTTGAT +>B7_595:1:209:653:400 +AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA +>B7_595:1:209:653:400 +CTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCT +>B7_595:1:252:19:955 +AGCCAGTTCTTTTGGCATTTGCCTTCAGACCCTCC +>B7_595:1:252:19:955 +TGAACAAAAGGAAGAAAAGGTCTTTCAAAAGGTGA +>B7_595:1:81:1000:375 +ACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGC +>B7_595:1:81:1000:375 +NATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAA +>B7_595:2:178:77:424 +CTACCCTGCAATTAATATAATTGTGTCCATGTACA +>B7_595:2:178:77:424 +TGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTG +>B7_595:2:251:121:479 +GGCTGCAACTGTGAGCCATCACAATGAACAACAGG +>B7_595:2:251:121:479 +GGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTG +>B7_595:2:29:729:70 +AAACAGAAACCAAATGAGAGAAGGAGTAGCTATAC +>B7_595:2:29:729:70 +ANTATTANCTTTGANNAAAAAGGGATTAAATTCCC +>B7_595:3:229:543:583 +ATAACCATCCTACTAAATACATATGCACCTAACAC +>B7_595:3:229:543:583 +TCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAA +>B7_595:3:297:637:86 +CAATGTTCCCCAGATACCATCCCTGTCTTACTTCC +>B7_595:3:297:637:86 +TCTCAGCTAGGGGAACAGGGAGGTGCACTAATGCG +>B7_595:3:57:735:151 +CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC +>B7_595:3:57:735:151 +TAAACTCTCACCTTATTGCTGCATCCCTGTCTTCC +>B7_595:3:85:964:950 +AACAGATTTAAAAACATGAACTAACTATATGCTGT +>B7_595:3:85:964:950 +GAGGTCATCATACAATGATAAAAAGATCAATTCAG +>B7_595:4:12:402:843 +AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC +>B7_595:4:12:402:843 +ATATAATTGTGTCCATGTACACACGCTGTCCTATG +>B7_595:4:319:250:718 +AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTG +>B7_595:4:319:250:718 +AGTTGAAAGCTCTAACAATAGACTAAACCAAGCAG +>B7_595:4:58:703:72 +GTACACACGCTGTCCTATGTACTTATCATGACTCT +>B7_595:4:58:703:72 +TCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCC +>B7_595:4:84:802:737 +CATAGACCCCCTTGCAACAACCTTGAGAACCCCAG +>B7_595:4:84:802:737 +CTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAG +>B7_595:5:184:912:258 +ATTCTAAAATCAGCAAGAGAAAAGCATACAGTCAT +>B7_595:5:184:912:258 +GTTATGCCCTGCTAAACTAAGCATCATAAATGAAG +>B7_595:5:36:649:554 +AAGAGATTGGATCTAATTTTTGGACTTCTTAAAGA +>B7_595:5:36:649:554 +CAGGAAGATACATTGCAAGACAGACTTCATCAAGA +>B7_595:5:84:91:614 +GAACCACACATTAATACTATGTTTCTTATCTGCAC +>B7_595:5:84:91:614 +TTTCCCATCATGAAGCACTGATCTTCCACGTCTCA +>B7_595:6:119:730:190 +AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA +>B7_595:6:119:730:190 +AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGA +>B7_595:6:137:811:130 +AAAATTTAACAAAAGTAAATAAAACACATAGCTAA +>B7_595:6:137:811:130 +AGACTACCCAGATTCATAAAACAAATACTACTAGA +>B7_595:6:290:270:557 +ACATTACTACCCTGCAATTAATATAATTGTGTCCA +>B7_595:6:290:270:557 +GGAACAGGGAGGTGCACTAATGCGCTCCACGCCCA +>B7_595:6:47:720:789 +CCCTTGGCCATCACCCGGTCCCGGCCCCTTCTCTT +>B7_595:6:47:720:789 +TCCTCAATGTTAAAATGTCTATTTTTGTCTTGACA +>B7_595:6:52:751:360 +AAAAACTATTTGAGGAAGTAATTGGGGAAAACCTC +>B7_595:6:52:751:360 +AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC +>B7_595:6:99:557:427 +AACAAAATTAAAATTTAACAAAAGTAAATAAAACA +>B7_595:6:99:557:427 +ATTCATAAAACAAATACTACTAGACCTAAGAGGGA +>B7_595:7:123:610:472 +GTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTC +>B7_595:7:123:610:472 +TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT +>B7_595:7:149:123:265 +AAGAGGGATGAGAAATTACCTAATTGGTACAATGT +>B7_595:7:149:123:265 +AGCAAAAACAAAAACTATGCTAAGTATTGGTAAAG +>B7_595:7:166:203:416 +AATTACGTCCTATCTTCTTCTTAGGGAAGAACAGC +>B7_595:7:166:203:416 +ATGAGATTATTAGGAAATGCTTTACTGTCATAACT +>B7_595:7:188:802:71 +ATGCTATTCAGTTCTAAATATAGAAATTGAAACAG +>B7_595:7:188:802:71 +TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGC +>B7_595:7:190:481:295 +GAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTG +>B7_595:7:190:481:295 +TCATAACTATGAAGAGACTATTGCCAGATGAACCA +>B7_595:7:242:4:593 +ATATACACACGCTGTCCTATGTACTTATCATGACT +>B7_595:7:242:4:593 +TCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCC +>B7_595:8:26:242:35 +ATAAAACAAAGGAGGTCATCATACAATGATAAAAA +>B7_595:8:26:242:35 +ATATTTTGATGATGGTTACACTAAAAGCCCATACT +>B7_597:2:100:563:301 +GAACTTCTGTAATTGAAAAATTCATTTAAGAAATT +>B7_597:2:132:493:921 +ACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTG +>B7_597:2:132:493:921 +GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC +>B7_597:2:165:431:857 +GAACTTTGATGCCCTCTTCTTCCAAAGATGAAACG +>B7_597:2:165:431:857 +TTGGGGTCTGACAGGCTGCAACTGTGAGCCATCAC +>B7_597:2:168:829:88 +ACAGACTTCATCAAGATATGTAGTCATCAGACTAT +>B7_597:2:168:829:88 +TAACTGAACCTATGAGTCACAGGTATTCCTGAGGA +>B7_597:2:42:28:552 +AAGAAGATATAACCATCCTACTAAATACATATGCA +>B7_597:2:42:28:552 +ACTCAATATATCCATGTAACAAATCTGCGCTTGTA +>B7_597:3:10:394:392 +TCATCAAGATATGTAGTCATCAGACTATCTAAATT +>B7_597:3:10:394:392 +TTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACA +>B7_597:3:115:646:430 +CACAGGTTTTATAAAACAATTAATTGAGACTACAG +>B7_597:3:115:646:430 +GTTATGCCCTGCTAAACTTAGCATCATAAATGAAG +>B7_597:3:133:707:886 +ACCTAATAAATACATATGCACCTAACACAAGACTA +>B7_597:3:133:707:886 +AGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAAC +>B7_597:3:157:361:309 +CCTGCTAAACTAAGCATCATAAATGAAGGGGAAAT +>B7_597:3:157:361:309 +TACACACAAAAGTACAAAACTCACAGGTTTTATAA +>B7_597:3:39:966:551 +ACATTAATACTATGTTTCTTATCTGCACATTACTA +>B7_597:3:39:966:551 +AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTC +>B7_597:3:46:981:766 +TCTTTTGGCATTTGCCTTCAGACCCTACACGAATG +>B7_597:3:46:981:766 +TGCCCCCAGCATGGTTGTACTGGGCAATACATGAG +>B7_597:3:53:616:842 +CTTATCATGACTCTATCCCAAATTCCCACTTACGT +>B7_597:3:53:616:842 +TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC +>B7_597:3:67:620:344 +AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT +>B7_597:3:67:620:344 +CCCCCGCCCAAGCCCTTCTCACAGTTTCTGCCCCC +>B7_597:3:73:273:488 +AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGG +>B7_597:3:73:273:488 +CTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTG +>B7_597:4:138:211:582 +CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG +>B7_597:4:138:211:582 +TGAGACTACAGAGCAAATAGGTAAAAAATTAACAT +>B7_597:4:144:492:61 +AAATACTCACCATCATAAATACACACAAAAGTACA +>B7_597:4:144:492:61 +AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC +>B7_597:4:146:961:63 +TACTTATATCAGATAAAGCACACTTTAAATCAACA +>B7_597:4:146:961:63 +TGAATAAAAAGGGCTTAAATTCCCCCACTTAAGGG +>B7_597:4:38:999:463 +GCGGAAACCTTACAAGCCAGAAGAGATTGGATCTA +>B7_597:4:38:999:463 +TAGACATCTAAATGAAAGNNGCNNNAAGAATGCCA +>B7_597:5:125:957:753 +TTTTTTTTTTTTCTCTCCTCTTTTTTTTTTTTTTT +>B7_597:5:160:434:853 +ATATAATTGTGTCCATGTACACACGCTGTCCTATG +>B7_597:5:160:434:853 +GCTTGGGCTGTAATGATGCCCCTTGGCCATCACCC +>B7_597:5:58:684:520 +AGACAGACTTCATCAAGATATGTAGTCATCAGACT +>B7_597:5:58:684:520 +ATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCA +>B7_597:5:6:882:784 +CATGGCTGATTATGAAAACAATGTTCCCCAGATAC +>B7_597:5:6:882:784 +CTGGATTCTGGGAAATTCTTCATCCTGGACCCTGA +>B7_597:5:98:995:929 +GTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGT +>B7_597:5:98:995:929 +TATAACAATATTAACTTTGAATAAAAAGGGATTAA +>B7_597:6:106:595:322 +GAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGC +>B7_597:6:106:595:322 +GCAAGAGAAAAGCATACAGTCATCTATAAAGGAAA +>B7_597:6:193:661:771 +AAACTCATTAATAAAGACATGAGTTCAGGTAAAGG +>B7_597:6:193:661:771 +GCAAGAAGATATAACCATCCTACTAAATACATATG +>B7_597:6:20:592:496 +CTCAAAGAATGCCAGGAAGATACATTGCAAGACAG +>B7_597:6:20:592:496 +TCTCAGCGGAAACCTTACAAGCCAGAAGAGATTGG +>B7_597:6:29:249:878 +ATTGTGTCCATGTACACACGCTGTCCTATGTACTT +>B7_597:6:29:249:878 +TCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGG +>B7_597:6:73:420:812 +CTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAA +>B7_597:6:73:420:812 +CTAATTGGTACAATGTACAATATTCTGATGATGGT +>B7_597:7:103:731:697 +AATTAACCCAGTCAGACAAAAATAAAGAAAAAAGA +>B7_597:7:103:731:697 +CTTGCTAGAGATTTAGACATCTAAATGAAAGAGGC +>B7_597:7:113:408:211 +AAACTCATTAATAAAGACATGAGTTCAGGTAAAGG +>B7_597:7:113:408:211 +GAGACTACAGAGCAACTAGGTAAAAAATTAACATT +>B7_597:7:31:948:254 +CCTCTACATGGCTGATTATGAAAACAATGTTCCCC +>B7_597:7:31:948:254 +TGAAGCACTGAACTTCCACGTCTCATCTAGGGGAA +>B7_597:7:41:34:211 +CACCTAACACAAGACTACCCAGATTCATAAAACAA +>B7_597:7:41:34:211 +GTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG +>B7_597:7:5:753:806 +AAAATATAGTTGAAAGCTCTAACAATAGACTAAAC +>B7_597:7:5:753:806 +ATGTAAAGTAACTGAACCTATGAGTCACAGGTATT +>B7_597:7:94:273:165 +AGAAAGAAGATATAACCATCCTACTAAATACATAT +>B7_597:7:94:273:165 +TTACAAGAAACTCATTAATAAAGACATGAGTTCAG +>B7_597:8:147:360:141 +TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA +>B7_597:8:147:360:141 +TTTGGTAATTTAGTTTTTTTTTTTTCTTTTCTCTT +>B7_597:8:186:850:838 +GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT +>B7_597:8:186:850:838 +GTTCTATTTGTAATGAAAACTATATTTATGCTATT +>B7_597:8:35:118:589 +TCATAAAACAAATACTACTAGACCTAAGAGGGATG +>B7_597:8:35:118:589 +TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA +>B7_597:8:48:805:860 +AAACCTGTCAAACACGAATGTTATGCCCTGCTAAA +>B7_597:8:48:805:860 +AAAGTACAAAACTCACAGGTTTTATAAAACAATTA +>B7_610:1:12:88:200 +ACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTT +>B7_610:1:12:88:200 +GAAGATATAACCATCCTACTAAATACATATGCACC +>B7_610:1:139:152:856 +AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT +>B7_610:1:139:152:856 +CTAAGCCGTTCTATTTGTAATGAAAACTATATTTA +>B7_610:1:37:652:403 +CCCCTCTAAGCCGTTCTATTTGTAATGAAAACTAT +>B7_610:1:37:652:403 +TTTTGTCAGTTACCAAATGTGTTTATTACCAGAGG +>B7_610:2:189:831:878 +AGAAAAAAAAACCTGTCAAACACGAATGTTATGCC +>B7_610:2:189:831:878 +AGGAAGATACATTGCAAGACAGACTTCATCAAGAT +>B7_610:2:194:688:289 +TCAGACCCTACACGAATGCGTCTCTACCACAGGGG +>B7_610:2:194:688:289 +TGTGTGTTCTCATCAACCTCATACACACACATGGT +>B7_610:2:6:529:366 +CATACAATGATAAAAAGATCAATTCAGCAAGAAGA +>B7_610:2:6:529:366 +GCTACTCAATATATCCATGTAACAAATCTGCGCTT +>B7_610:2:75:887:149 +TACAACAGGAACAAAACCTCATATATCAATATTAA +>B7_610:2:75:887:149 +TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC +>B7_610:3:102:825:507 +TGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACT +>B7_610:3:102:825:507 +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAAC +>B7_610:3:120:63:653 +AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG +>B7_610:3:120:63:653 +TCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCT +>B7_610:3:137:895:681 +CTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATG +>B7_610:3:137:895:681 +GCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGC +>B7_610:3:148:340:479 +TCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTC +>B7_610:3:148:340:479 +TTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGG +>B7_610:3:182:23:585 +AGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTA +>B7_610:3:182:23:585 +ATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGT +>B7_610:3:5:863:302 +ACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +>B7_610:3:5:863:302 +TGAAAACAATGTTCCCCAGATACCATCCCTGTCTT +>B7_610:3:82:998:566 +ATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCT +>B7_610:3:82:998:566 +GCACATTACTACCCTGCAATTAATATAATTGTGTC +>B7_610:3:84:101:328 +AATGGACCTGTGATATCTGGATTCTGGGAAATTCT +>B7_610:3:84:101:328 +TATAATACCTCTACATGGCTGATTATGAAAACAAT +>B7_610:3:85:219:371 +GACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT +>B7_610:3:85:219:371 +TAACATTACAACAGGAACAAAACCTCATATATCAA +>B7_610:4:139:989:144 +ACTAAAAAAGCAAAAACAAAAACTATGCTAAGTAT +>B7_610:4:139:989:144 +ACTACTAGACCTAAGAGGGATGAGAAATTACCTAA +>B7_610:4:15:805:420 +ATGAAGAGACTATTCACATGTGAACCACACATTTA +>B7_610:4:15:805:420 +GAACAGTTTAGGTATCAATTTGGTGTTCTTTGTAA +>B7_610:4:198:59:675 +AATCTGCGCTTGTACTTCTAAATCTATAACAAAAT +>B7_610:4:198:59:675 +ACTAAATACATATGCACCTAACACAAGACTATCCT +>B7_610:4:67:317:249 +CTACATGGCTGATTATGAAATCTATGTTCCCCATA +>B7_610:4:67:317:249 +TTCCCATCATGACGCACCGAACTTCCACGTCTCAT +>B7_610:5:102:915:87 +AACAATAGACTAAACCAAGCAGAAGAAAGAGGTTC +>B7_610:5:102:915:87 +CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA +>B7_610:5:120:596:847 +AACCTATGAGTCACAGGTATTCCTGAGGAAAAAGA +>B7_610:5:120:596:847 +TCAACATGAAGGAAAAAAATTCTAAAATCAGCAAG +>B7_610:5:136:260:254 +AAATTAACATTACAACAGGAACAAAACCTCATATA +>B7_610:5:136:260:254 +GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAA +>B7_610:5:147:68:353 +AACAACCTTGAGAACCCCAGGGAATTTGTCAATGT +>B7_610:5:147:68:353 +CCTTTGATATCTGGATTCTGGGAAATTCTTCATCC +>B7_610:5:51:904:391 +ACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAA +>B7_610:5:51:904:391 +TATCTAAAGTCAACATGAAGGAAAAAAATTCTAAA +>B7_610:5:7:761:623 +CCGGCATGGTTGTACTGGGCAATACATGAGATTAT +>B7_610:5:7:761:623 +CTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTG +>B7_610:6:107:252:533 +AGAGAAGGAGTAGCTATACTTATATCAGATAAAGC +>B7_610:6:107:252:533 +CAAGACTACCCAGATTCATAAAACAAATACTACTA +>B7_610:6:111:379:700 +ACACTAAAAGCCCATACTTTACTGCTACTCAATAT +>B7_610:6:111:379:700 +CGCACTGGCAATATTTGTGTGTTTACTTTTTTGCA +>B7_610:6:143:620:158 +ACTTTACTGCTACTCAATATATCCATGTAACAAAT +>B7_610:6:143:620:158 +CAATGATAAAAAGATCAATTCAGCAAGAAGATATA +>B7_610:6:148:776:486 +AACTGTGAGCCATCACAATGAACAACAGGAAGAAA +>B7_610:6:148:776:486 +AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT +>B7_610:7:116:157:612 +GTAAAAGTCAAAATTAAAGTTCAATACTCACCATC +>B7_610:7:116:157:612 +TTAAGAGATATAGATTGGCAGTACAGATTTAAAAA +>B7_610:7:117:857:942 +AGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCT +>B7_610:7:117:857:942 +GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC +>B7_610:7:158:943:467 +AAAGCTCTAACAATAGACTAAACCAAGCAGAAGAA +>B7_610:7:158:943:467 +AGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTT +>B7_610:7:15:696:693 +TCAAACACGAATGTTAATCCCTGCTAAACTAATCA +>B7_610:7:15:696:693 +TCTAAAGTCAACATGAAGGAAAAAAATTCTAAAAT +>B7_610:7:177:469:800 +AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA +>B7_610:7:177:469:800 +TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAA +>B7_610:7:26:749:174 +CAGATTCATAAAACAAATACTACTAGACCTAAGAG +>B7_610:7:26:749:174 +TAACAAAAGTAAATAAAACACATAGCTAAAACTAA +>B7_610:7:34:144:868 +AATTACCTAATTGGTACAATGTACAATATTCTGAT +>B7_610:7:34:144:868 +AGCTAAGGAATGGGAAAGGTGTGGGGAAAAAAGTA +>B7_610:7:35:378:681 +GCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCAT +>B7_610:7:35:378:681 +GTGTGTTCTCATCAACCTCATACACACACATGGTT +>B7_610:8:163:757:432 +CCAGATGAACCACACATTAATACTATGTTTCTCAT +>B7_610:8:163:757:432 +GGTTTCCCATCATGAAGCACTGAACTTCCACGTCT +>B7_610:8:68:570:705 +AACAGATTTAAAAACATGAACTAACTATATGCTGT +>B7_610:8:68:570:705 +CATCATACAATGATAAAAAGATCAATTCAGCAAGA +>B7_610:8:95:426:791 +CCAAATGTGTTTATTACCAGAGGGATGGAGGGAAG +>B7_610:8:95:426:791 +GNTCCAGATTGCTTGTGGTCTGACAGGCTGCAACT +>EAS112_32:7:113:809:364 +GATGCCCTCTTCTTCCAAAGATGAAACGCGTAACT +>EAS112_32:7:113:809:364 +TATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTG +>EAS112_32:7:135:401:735 +AATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC +>EAS112_32:7:135:401:735 +CCTTACAAGCCAGAAGAGATTGGATCTAATTTTTG +>EAS112_32:7:168:117:441 +TCATAACTATGAAGAGACTATTGCCAGATGAACCA +>EAS112_32:7:168:117:441 +TCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTG +>EAS112_32:7:272:328:400 +CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA +>EAS112_32:7:272:328:400 +CTAAATACATATGCACCTAACACAAGACTACCCAG +>EAS112_32:7:322:391:742 +ATCAATATTAACTTTGAATAAAAAGGGATTAAATT +>EAS112_32:7:322:391:742 +CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA +>EAS112_32:7:42:804:114 +ATACTTTACTGCTACTCAATATATCCATGTAACAA +>EAS112_32:7:42:804:114 +TCAAATATTGCTAGTGGGAGTATAAATTGTTTTCC +>EAS112_32:8:88:90:59 +ATAATACCTCTACATGTCTGATTATGAAAACAATG +>EAS112_32:8:88:90:59 +TGCACCTCCCTGTTCACCTAGATGCTAGGAGGACA +>EAS112_32:8:89:254:332 +AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT +>EAS112_32:8:89:254:332 +GAATTAACCCAGTCAGACAAAAATAAAGAAAAAAG +>EAS112_34:4:12:273:89 +AGTCTTTCCTGACAAGCAAATGCTAAGATAATTCA +>EAS112_34:4:12:273:89 +CCATCAGAATAACAATGGGCTTCTCAGCGGAAACC +>EAS112_34:4:17:989:186 +TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA +>EAS112_34:4:17:989:186 +TTATGAATTAACCCAGTCAGACAAAAATAAAGAAA +>EAS112_34:4:22:206:150 +AAAAAAGAGCAACTAGGTAAAAAATTAACATTACA +>EAS112_34:4:22:206:150 +GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC +>EAS112_34:4:74:570:695 +CACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGA +>EAS112_34:4:74:570:695 +TTGGGAGATTTTTAATGATTCCTCAATGTTAAAAT +>EAS112_34:4:92:412:435 +AGATTCATAAAACAAATACTACTAGACCTAAGAGG +>EAS112_34:4:92:412:435 +CTACGCAAACAGAAACCAANTGAGAGAAGGAGTAG +>EAS112_34:6:127:153:861 +CTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTC +>EAS112_34:6:127:153:861 +TTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACAT +>EAS112_34:6:130:865:838 +AAAGCATACAGTCATCTATAAAGGAAATCCCATCA +>EAS112_34:6:130:865:838 +AATGCTAAGATAATTCATCATCACTAAACCAGTCC +>EAS112_34:6:145:144:263 +TTTCGTTTTTTTTTTTTTTTTTTCCCCTTTCTTTT +>EAS112_34:6:43:47:279 +AAAACTATGCTAAGTATTGGTAAAGATGTGGGGAA +>EAS112_34:6:43:47:279 +TAGACCTAAGAGGGATGAGAAGTTACCTAATTGGT +>EAS112_34:6:71:85:629 +CCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGA +>EAS112_34:6:71:85:629 +TTGACACCCAACTAATATTTGTCTGAGCAAAACAG +>EAS112_34:6:75:615:555 +AAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGG +>EAS112_34:6:75:615:555 +TGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAA +>EAS112_34:7:118:523:591 +GAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTC +>EAS112_34:7:118:523:591 +GGTCTGACAGGCTGCAACTGTGAGCCATCACAATG +>EAS112_34:7:141:80:875 +AGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAA +>EAS112_34:7:141:80:875 +AGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCA +>EAS112_34:7:142:457:584 +GGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGT +>EAS112_34:7:142:457:584 +TGAAGAGACTATTTCCAGATGAACCACACATTAAT +>EAS112_34:7:71:62:254 +AAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGT +>EAS112_34:7:71:62:254 +GGTTACACTAAAAGCCCATACTTTCCTGCTACTCA +>EAS112_34:7:86:498:373 +CACTAATGCGCTCCACGCCCAAGCCCTTCTCACAG +>EAS112_34:7:86:498:373 +GATACCATCCCTGTCTTACTTCCAGCTCACCAGAG +>EAS112_34:7:96:489:453 +AAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT +>EAS112_34:7:96:489:453 +AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA +>EAS112_34:8:103:812:255 +ATGTTAAAATGTCTATTTTTGTCTTGACACCCAAC +>EAS112_34:8:103:812:255 +TGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCC +>EAS112_34:8:174:557:872 +GGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAAT +>EAS112_34:8:174:557:872 +TGGTACAATGTACAATATTCTGATGATGGTTACAC +>EAS112_34:8:179:13:782 +GACAGTCTACAACTGTGAGCCATCACAATGAACAA +>EAS112_34:8:179:13:782 +TGATGCCCTCTTCTTCCAAAGATGAAACGCGTAAC +>EAS112_34:8:30:816:90 +ACTACCCTGCAATTAATATAATTGTGTCCATGTAC +>EAS112_34:8:30:816:90 +AGGGAGGTGCACTAATGCGCTCCACGCCCCAGCCC +>EAS112_34:8:45:800:733 +ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA +>EAS112_34:8:45:800:733 +ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG +>EAS112_34:8:4:841:339 +ACCTCATACACACACATGGTTTAGGGGTATAATAC +>EAS112_34:8:4:841:339 +CTTCAGACCCTACACGAATGCGTCTCTACCACAGG +>EAS114_26:1:113:367:659 +AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA +>EAS114_26:1:113:367:659 +CACAGGTATTCCTGAGGAAAAAGAAAAAGCGAGAA +>EAS114_26:1:155:807:19 +AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG +>EAS114_26:1:155:807:19 +CAACAGTAAAATAAAACAAAGGAGGTCATCATACA +>EAS114_26:1:171:527:247 +AACAAATGCTACTAGACCTAAGAGGGATGAGAAAT +>EAS114_26:1:171:527:247 +AGAAGGAGTAGCTAGACTTATATCAGATAAAGCAC +>EAS114_26:1:324:238:736 +AGACAGACTTCATCAAGATATGTAGTCATCAGACT +>EAS114_26:1:324:238:736 +TCAAGAAGTATGAGATTATGTAAAGTAACTGAACC +>EAS114_26:1:35:522:294 +GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA +>EAS114_26:1:35:522:294 +TTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTT +>EAS114_26:1:99:212:522 +ACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT +>EAS114_26:1:99:212:522 +AGAGACTATTGCCAGATGAACCACACATTAATACT +>EAS114_26:2:130:609:467 +AAATTCCCCCACTTAAGAGATATAGATTGGCAGAA +>EAS114_26:2:130:609:467 +CAATACTCACCATCATAAATACACACAAAAGTACA +>EAS114_26:2:214:950:32 +ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT +>EAS114_26:2:214:950:32 +AGAAGATATAACCATCCTACTAAATACATATGCAC +>EAS114_26:2:237:497:165 +GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC +>EAS114_26:2:237:497:165 +TACAGTCATCTATAAAGGAAATCCCATCAGAATAA +>EAS114_26:2:315:219:7 +GAAAGAGGCTCAAAGAATGCCAGGAAGATACATTG +>EAS114_26:2:329:458:365 +GTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGT +>EAS114_26:2:329:458:365 +TTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGG +>EAS114_26:2:73:513:102 +GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC +>EAS114_26:2:73:513:102 +TGAAAACAATGTTCCCCAGATACCATCCCTGTCTT +>EAS114_26:3:117:284:589 +ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA +>EAS114_26:3:117:284:589 +GAACCTATGAGTCACAGGTATTCCTGAGGAAAAAG +>EAS114_26:3:284:261:124 +ACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTG +>EAS114_26:3:287:665:495 +GAATTGTAAAAGTCAAAATTAAAGTTCAATACTCA +>EAS114_26:3:287:665:495 +TGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAA +>EAS114_26:4:100:238:596 +ATGAGAAATTACCTAATTGGTACAATGTACAATAT +>EAS114_26:4:100:238:596 +CAAAAACTATTCTAAGTATTGGTAAAGATGTGGGG +>EAS114_26:4:110:840:431 +CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA +>EAS114_26:4:110:840:431 +GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA +>EAS114_26:4:123:1001:580 +AGGTTTTATAAAACAATTAATTGAGACTACAGAGC +>EAS114_26:4:123:1001:580 +GGGAANTAAAGTCAAGTCTTTCCTGACAAGCAAAT +>EAS114_26:4:253:285:104 +CTCTCATTCACTCCAGCTCCCTGTCACCCAATGGA +>EAS114_26:4:253:285:104 +GTGATGTGTGTTCTCATCAACCTCATACACACACA +>EAS114_26:4:306:388:342 +CCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACT +>EAS114_26:4:306:388:342 +GGGGAAGACATAATCCCACGCTTCCTATGGAAAGG +>EAS114_26:4:40:352:151 +ATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCT +>EAS114_26:4:40:352:151 +TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT +>EAS114_26:5:139:331:63 +GACACCCAACTAATATTTGTCTGAGCAAAACAGTC +>EAS114_26:5:139:331:63 +TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT +>EAS114_26:5:228:189:826 +AAGCCCTTCTCACAGTTTCTGCCCCCCGCATGGTT +>EAS114_26:5:228:189:826 +ATCATGACTCTATCCCAAATTCCCAATTACGTCCT +>EAS114_26:5:238:31:968 +ACACCATCCCTGTCTTACTTCCAGCTCCCCAGAGG +>EAS114_26:5:238:31:968 +ATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTG +>EAS114_26:5:43:114:617 +AATCATAAATACACACAAAAGTACAAAACTCACAG +>EAS114_26:5:43:114:617 +AATGTTATGCCCTGCTAAACTAAGCATCATAAATG +>EAS114_26:6:129:694:359 +CCCTGAGAGATTCTGCAGCCCAGATCCAGATTGCT +>EAS114_26:6:129:694:359 +TGTCAGTTACCAAATGTGTTTATTACCCGAGGGAT +>EAS114_26:6:140:253:322 +AATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTC +>EAS114_26:6:140:253:322 +GAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAA +>EAS114_26:6:183:697:555 +AAAGAATGCCAGGAAGATACATTGCCAGACAGACT +>EAS114_26:6:183:697:555 +AGAAATCTTAGAAGCCAGAAGAGATTGGATCTAAT +>EAS114_26:6:46:13:880 +AAAACCTCTTTAGTCTTGCTAGAGATTTAGACATC +>EAS114_26:6:46:13:880 +AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA +>EAS114_26:7:13:172:720 +AATTCATTTAAGAAATTACAAAATATAGTTGAAAG +>EAS114_26:7:157:876:302 +AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA +>EAS114_26:7:157:876:302 +CAACTAGGTAAAAAATTAACATTACAACACGAACA +>EAS114_26:7:218:858:445 +AAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTT +>EAS114_26:7:218:858:445 +GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATA +>EAS114_26:7:245:323:744 +ACCTCTACATGGCTGATTATGAAAACAATGTTCCC +>EAS114_26:7:245:323:744 +GATTCTGGGAAATTCTTCATCCTGGACCCTGAGAG +>EAS114_26:7:37:79:581 +TTAAAATTTAAAAAAAGTAAATAAAACACATAGCT +>EAS114_26:7:37:79:581 +TTTTTTTTTTTTTTTTTTTTTTTCATGCCAGAAAA +>EAS114_26:7:86:308:648 +GAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTG +>EAS114_26:7:86:308:648 +TATTAGGAAATGCTTTACTGTCATAACTATGAAGA +>EAS114_28:1:144:242:602 +ATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG +>EAS114_28:1:144:242:602 +ATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAG +>EAS114_28:1:168:389:889 +GAAATTACAAAATATAGTTGAAAGCTCTAACAATAG +>EAS114_28:1:168:389:889 +TAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +>EAS114_28:1:168:609:646 +GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG +>EAS114_28:1:168:609:646 +GGTTACACTAAAAGCCCATACTTTACTGCTACTCAA +>EAS114_28:1:220:801:282 +AACCTCATATATCAATATTAACTTTGAATAAAAAGG +>EAS114_28:1:220:801:282 +AATTCATCATCACTAAACCAGTCCTATAAGAAATGC +>EAS114_28:1:232:351:909 +ACATGGCTGATTATGAAATCAATGTTCCCCAGATGC +>EAS114_28:1:232:351:909 +CCATCATGAAGCGCTGAACTTCCACGTCTCATCTAG +>EAS114_28:1:28:708:463 +CCCAATGGACCTGTGATATCTGGATTCTGGGAAATT +>EAS114_28:1:28:708:463 +GTATAATACCTCTACATGGCTGATTATGAAAACAAT +>EAS114_28:2:114:938:216 +CTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT +>EAS114_28:2:114:938:216 +GAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGA +>EAS114_28:2:141:7:963 +CACTTTAAATCAACAACAGTAAAATAAAACAAAGGA +>EAS114_28:2:141:7:963 +TACAATGTACAATATTCTGATGATGGTTACACTAAA +>EAS114_28:2:149:650:44 +CGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCC +>EAS114_28:2:149:650:44 +CTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTC +>EAS114_28:2:167:905:852 +AGAAAAGCATACAGTCATCTATAAAGAAAATCCCAT +>EAS114_28:2:167:905:852 +CAAATGCTAAGATAATTCATCATCACTAAACCAGTC +>EAS114_28:2:251:819:772 +TCCCCCACTTAAGAGATATAGATTGGCAGAACAGAT +>EAS114_28:2:251:819:772 +TTCAATACTCACCATCATAAATACACACAAAAGTAC +>EAS114_28:2:28:474:566 +ACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG +>EAS114_28:2:28:474:566 +TGAACCACACATTAATACTATGTTTCTTATCTGCAC +>EAS114_28:2:329:437:643 +AAGATACATTGCAAGACAGACTTCATCAAGATATGT +>EAS114_28:2:329:437:643 +TTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAAC +>EAS114_28:2:55:562:403 +CAACAGGAACAAAACCTCATATATCAATATTAACTT +>EAS114_28:2:55:562:403 +CAAGCAAATGCTAAGATAATTCATCATCACTAAACC +>EAS114_28:3:110:984:98 +AAGGAGGTCATCATACAATGATAAAAAGATCAATTC +>EAS114_28:3:110:984:98 +ACTAAAACCCCATACTTTACTGCTACTCAATATATC +>EAS114_28:3:173:627:465 +GTAAACTCTCAAATATTGCTAGTGGGAGTATAAATT +>EAS114_28:3:173:627:465 +TGATGGTTACACTAAAAGCCCATACTTTACTGCTAC +>EAS114_28:3:176:402:458 +AAATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA +>EAS114_28:3:176:402:458 +CCTAAGAGGGATGAGAAATTACCTAATTGGTACAAT +>EAS114_28:3:202:275:776 +CAAATACTACTAGACCTAAGAGGGATGAGAAATTAC +>EAS114_28:3:202:275:776 +TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAG +>EAS114_28:3:250:628:423 +CAATGGGCTTCTCAGCGGAAACCTTACAAGCCAGAA +>EAS114_28:3:250:628:423 +CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG +>EAS114_28:3:279:763:945 +CTGCACATTACTACCCTGCAATTAATATAATTGTGT +>EAS114_28:3:279:763:945 +GCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACT +>EAS114_28:3:308:509:948 +AATATATCCATGTAACAAATCTGCGCTTGTACTTCT +>EAS114_28:3:308:509:948 +AATTCAGCAAGAAGATATAACCATCCTACTAAATAC +>EAS114_28:3:32:492:907 +CAAACACGAATGTTATGCCCTGCTAAACTAAGCATC +>EAS114_28:3:32:492:907 +TGTAGTCATCAGACTATCTAAAGTCAACATGAAGGA +>EAS114_28:3:78:773:660 +ATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGA +>EAS114_28:3:78:773:660 +CCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCC +>EAS114_28:4:13:701:55 +AAACCAAATGAGAGAAGGAGTAGCTATACTTATATC +>EAS114_28:4:13:701:55 +TTCATAAAACAAATACTACTAGACCTAAGAGGGATG +>EAS114_28:4:149:572:877 +ATGTAAAGTAACTGAACCTATGAGTCACAGGTATTC +>EAS114_28:4:149:572:877 +GAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA +>EAS114_28:4:215:246:640 +AAAAGCATACAGTCATCTATAAAGGAAATCCCATCA +>EAS114_28:4:215:246:640 +AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT +>EAS114_28:4:305:707:258 +AAAAAGATGTTCTACGCAAGCAGAAACCAAATGAGA +>EAS114_28:4:305:707:258 +GAACAAAACCTCATATATCAATATTAACTTTGAATA +>EAS114_28:4:322:631:245 +CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC +>EAS114_28:4:322:631:245 +TATTTTTGTCTTGACACCCAACTAATATTTGTCTGA +>EAS114_28:4:9:55:730 +ATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGC +>EAS114_28:4:9:55:730 +CAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCT +>EAS114_28:5:104:350:749 +AAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTT +>EAS114_28:5:104:350:749 +TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCG +>EAS114_28:5:11:868:62 +TCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGT +>EAS114_28:5:11:868:62 +TTTACTGTCATAACTATGAAGAGACTATTGCCAGAT +>EAS114_28:5:163:832:715 +TAAAAACATGAACTAACTATATGCTGTTTACAAGAA +>EAS114_28:5:163:832:715 +TAAAACAAAGGAGGTCATCATACAATGATAAAAAGA +>EAS114_28:5:206:671:49 +ACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAA +>EAS114_28:5:206:671:49 +GCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCA +>EAS114_28:5:209:778:588 +AACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT +>EAS114_28:5:209:778:588 +TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT +>EAS114_28:5:23:944:377 +AATGTTATGCCCTGCTAAACTAAGCATCATAAATGA +>EAS114_28:5:23:944:377 +AGTACAAAACTCACAGGTTTTATAAAACAATTAATT +>EAS114_28:6:11:151:750 +GTTTTTATTTTTTTCCTCTCTCTTTTTTTTTTTTTT +>EAS114_28:6:155:68:326 +CCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAA +>EAS114_28:6:155:68:326 +GTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTG +>EAS114_28:6:175:705:982 +CATGGTTTAGGGGTATAATACCTCTACATGGCTGAT +>EAS114_28:6:175:705:982 +CTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAG +>EAS114_28:6:185:87:475 +AAGAAACTCATTAATAAAGACATGAGTTCAGGTAAA +>EAS114_28:6:185:87:475 +ATTGAGACTACAGAGCAACTAGGTAAAAAATTAACA +>EAS114_28:6:187:996:432 +TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTC +>EAS114_28:6:187:996:432 +TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAG +>EAS114_28:6:51:506:878 +TAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCT +>EAS114_28:6:51:506:878 +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACT +>EAS114_28:6:54:263:585 +TGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATC +>EAS114_28:6:54:263:585 +TGTCCATGTACACACGCTGTCCTATGTACTTATCAT +>EAS114_28:7:133:514:754 +AGCTATACTTATATCAGATAAAGCACACTTTAAATC +>EAS114_28:7:133:514:754 +TAAATTTGAATAAAAAGGGATTAAATTCCCCCACTT +>EAS114_28:7:157:786:424 +GCTTTACTGTCATAACTATGAAGAGACTATTGCCAG +>EAS114_28:7:157:786:424 +TTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAG +>EAS114_28:7:178:276:693 +GTTCAGAACTTGAAGACAAGTCTCTTATGAATTAAC +>EAS114_28:7:178:276:693 +TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAAC +>EAS114_28:7:215:863:521 +ACTCACCATCATAAATACACACAAAAGTACAAAACT +>EAS114_28:7:215:863:521 +TAAGAGATATAGATTGGCAGAACAGATTTAAAAACA +>EAS114_28:7:242:354:637 +AACTATATTTATGCTATTCAGTTCTAAATATAGAAA +>EAS114_28:7:242:354:637 +CCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT +>EAS114_28:7:287:492:169 +CGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGG +>EAS114_28:7:287:492:169 +GTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAAC +>EAS114_28:7:57:324:546 +GTCATCTATAAAGGAAATCCCATCAGAATAACAATG +>EAS114_28:7:57:324:546 +TAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATT +>EAS114_30:1:134:379:893 +AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATA +>EAS114_30:1:134:379:893 +CGCTGTCCTATGTACTTATCATGACTCTATCCCAA +>EAS114_30:1:154:818:165 +GAACAGGGAGGTGCACTAATGCGCTCCACGCCCAA +>EAS114_30:1:154:818:165 +TTCTTATCTGCACATTACTACCCTGCAATTAATAT +>EAS114_30:1:176:168:513 +ATTTGTAATGAAAACTATATTTATGCTATTCAGTT +>EAS114_30:1:176:168:513 +TTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTA +>EAS114_30:1:188:863:790 +CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTC +>EAS114_30:1:188:863:790 +TTAATTGAGACTACAGAGCAACTAGGTAAAAAATT +>EAS114_30:1:243:10:911 +TATTCAGTTCTAAATATAGAAATTGAAACAGCTGT +>EAS114_30:1:64:526:339 +ACATTACAACAGGAACAAAACCTCATATATCAATA +>EAS114_30:1:64:526:339 +CAAATGAGAGAAGGAGTATCTATACTTATATCAGA +>EAS114_30:2:111:142:21 +ATCAGAATAACAATGGGCTTCACAGCGGAAACCTT +>EAS114_30:2:111:142:21 +CTTGCTAGAGATTTAGACATCTAAATGAAAGAGGC +>EAS114_30:2:226:885:729 +AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG +>EAS114_30:2:226:885:729 +GCTGAACTTACATCAGATAAAGCACACTTTAAATC +>EAS114_30:2:272:750:698 +GTGTTTATTACCAGAGGGATGGAGGGATGACGGAC +>EAS114_30:2:272:750:698 +TGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAG +>EAS114_30:2:297:949:26 +ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA +>EAS114_30:2:297:949:26 +CATATATCAATATTAACTTTGAATAAAAAGGGATT +>EAS114_30:2:303:428:326 +AAAATTAAAATTTAACAAAAGTAAATAAAACACAT +>EAS114_30:2:303:428:326 +TTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT +>EAS114_30:2:30:887:404 +CAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTG +>EAS114_30:2:30:887:404 +TTGCCTTCAGACCCTGCACGAATGCGTCTCTACCA +>EAS114_30:2:315:412:921 +GTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATA +>EAS114_30:2:315:412:921 +TTCTGATGATGGTTACACTACAAGCCCATACTGTA +>EAS114_30:2:82:963:128 +ATTAAAGTTCAATACTCACCATCATAAATACACAC +>EAS114_30:2:82:963:128 +GGCAGAACAGATTTAAAAACATGAACTAACTATAT +>EAS114_30:3:139:117:262 +AGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTG +>EAS114_30:3:139:117:262 +GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC +>EAS114_30:3:14:697:541 +TAAAAGCAGCAAGAGAAAAGCATACAGTCATCTAT +>EAS114_30:3:14:697:541 +TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAA +>EAS114_30:3:161:366:544 +CTTCTGTAATTGAAAAATTCATTTAAGAAATTACA +>EAS114_30:3:181:582:435 +CAAGTCTTTCCTGACAAGCAAATGCTAAGATAATT +>EAS114_30:3:181:582:435 +GAAATCCCATCAGAATAACAATGGGCTTCTCAGCA +>EAS114_30:3:187:294:947 +ACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACT +>EAS114_30:3:187:294:947 +AGAGACTATTGCCAGATGAACCACACATTAATACT +>EAS114_30:3:215:840:760 +AAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACT +>EAS114_30:3:215:840:760 +CTGATGATGGTTACACTAAAAGCCCATACTTTCCT +>EAS114_30:3:24:195:604 +TCACAGTTTCTGCCCCCAGCATGGTTGTACTGTGC +>EAS114_30:3:24:195:604 +TGTCCTATGTACTTATCATGACTCTATCCCAAATT +>EAS114_30:3:302:288:657 +AGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGG +>EAS114_30:3:302:288:657 +CCAGATGAACCACACATTAATACTATGTTTCTTAT +>EAS114_30:3:35:361:546 +TGCACTAATGCGCTCCACGCCCAAGCCCTTCTCAC +>EAS114_30:3:35:361:546 +TTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACG +>EAS114_30:3:39:348:594 +AAGCATCATAAATGAAGGGGAAATAAAGTCAAGTC +>EAS114_30:3:39:348:594 +CTAAAATCAGCAAGAGAAAAGCATACAGTCATCTA +>EAS114_30:4:183:852:253 +ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT +>EAS114_30:4:183:852:253 +CCATTTCTTTTGGCATTTGCCTTCAGACCCTACAC +>EAS114_30:4:317:378:535 +AGCTTGGGCTGTAATGATGCCCCTTGGCCATCACC +>EAS114_30:4:317:378:535 +GCTGTCCTATGTACTTATCATGACTCTATCCCAAA +>EAS114_30:4:327:795:103 +AACCTTGAGAACCCCAGGGAATTTGTCAATGTCAG +>EAS114_30:4:327:795:103 +ACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGC +>EAS114_30:4:328:537:640 +AAGTATGAGATTATGTAAAGTAACTGAACCTATGA +>EAS114_30:4:328:537:640 +GGAAGATACATTGCAAGACAGACTTCATCAAGATA +>EAS114_30:5:327:991:508 +ACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCA +>EAS114_30:5:327:991:508 +TCTGGATTCTGGGAAATTCTTCATCCTGGACCCTG +>EAS114_30:5:32:461:154 +ACAGGTTTTATAAAACAATTAATTGAGACTACAGA +>EAS114_30:5:32:461:154 +TTACAAGAAACTCATTAATAAAGACATGAGTTCAG +>EAS114_30:6:137:741:866 +ACACTAAAAGCCCATACTTTACTGCTACTCAATAT +>EAS114_30:6:137:741:866 +GATGAGGGGAAAAAAGTAAACTCTCAAATATTGCT +>EAS114_30:6:157:42:763 +TCTGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCG +>EAS114_30:6:157:42:763 +TTTCTTATCTGCACATTACTACCCTGCAATTATTA +>EAS114_30:6:163:312:891 +CCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGC +>EAS114_30:6:163:312:891 +TTCCCCAGATACCGTCCCTGTCTTACTTCCAGCTC +>EAS114_30:6:214:565:337 +AAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +>EAS114_30:6:214:565:337 +CAGAATAACAATGGGCTTCTCAGCAGAAACCTTAC +>EAS114_30:6:220:809:850 +GGGGGGAAAAAGATGTGCTACACAAAAAGATTCCA +>EAS114_30:6:220:809:850 +TTCATAAAACAAATACTACTAGACCTAAGAGGGAT +>EAS114_30:6:238:803:383 +ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA +>EAS114_30:6:238:803:383 +ACTCATTAATAAAGACATGAGTTCAGGTAAAGGGG +>EAS114_30:6:243:209:110 +AAAACATGAACTAACTATATGCTGTTTACAAGAAA +>EAS114_30:6:243:209:110 +CACAGGTTTTATAAAACAATTAATTGAGACTACAG +>EAS114_30:6:277:397:932 +TTTCTTTTCACTTTTTTTTTTTTTTTTTTTTACTT +>EAS114_30:6:290:146:36 +CTTTCCCATCCCCCGGTCCCTGCCCCATCTCTTGT +>EAS114_30:6:290:146:36 +TTATCATGACTCTATCCCAAATTCCCAATTACGTC +>EAS114_30:6:326:309:149 +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCA +>EAS114_30:6:326:309:149 +CTCCCTGTCACCCAATGGACCTGTGATATCTGGAT +>EAS114_30:6:41:461:436 +TAAAGTAACTGAACCTATGAGTCACAGGTATTCCT +>EAS114_30:6:41:461:436 +TAGTCATCAGACTATCTAAAGTCAACATGAAGGAA +>EAS114_30:6:49:656:507 +AAGGAAATCCCATCAGAATAACAATGGGCTTCTCA +>EAS114_30:6:49:656:507 +TCCTGACAAGCAAATGCTAAGATAATTCATCATCA +>EAS114_30:6:4:665:771 +GAATAAAAAGGGATTAAATTCCCCCACTTAAGAGA +>EAS114_30:6:4:665:771 +GTGCTTTATCTGATATCAATGCCGATAAACTGCCT +>EAS114_30:6:62:386:959 +AAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGC +>EAS114_30:6:62:386:959 +AATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGT +>EAS114_30:7:269:944:220 +ATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAA +>EAS114_30:7:269:944:220 +TGGGCAATACATGAGATTATTAGGAAATGCTTTAC +>EAS114_30:7:283:799:560 +ACATAGACCCCCTTGCAACAACCTTGAGAACCCCA +>EAS114_30:7:283:799:560 +GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGAC +>EAS114_30:7:310:155:312 +CAGCAAGAGAAAAGCATACAGTCATCTATAAAGGA +>EAS114_30:7:310:155:312 +CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT +>EAS114_30:7:319:11:255 +TACCCAGATTCATAAAACAAATACTACTAGACCTA +>EAS114_30:7:319:11:255 +TCTATAAAAAAATTAAAATTTAACAAAAGTAAATA +>EAS114_30:7:59:871:351 +GTAAAAAATTAACATTACAACAGGAACAAAACCTC +>EAS114_30:7:59:871:351 +TAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAG +>EAS114_30:7:71:311:202 +TATGAGATTATGTAAAGTAACTGAACCTATGAGTC +>EAS114_30:7:71:311:202 +TCAAGATATGTAGTCATCAGACTATCTAAAGTCAA +>EAS114_32:1:199:760:42 +ACCCAATTAATATTTTTCTTAGCAAAACAGTCTAG +>EAS114_32:1:199:760:42 +CTCTCTAATTTTTGCTGCTTCCATGTCTTACTCTG +>EAS114_32:1:208:971:600 +AAAAAAACCTGTCAAACACGAATGTTATGCCCTGC +>EAS114_32:1:208:971:600 +AGATATGTAGTCATCAGACTATCTAAAGTCAACAT +>EAS114_32:2:163:618:570 +AGGCTGCAACTGTGAGCCATCACAATGAACAACAG +>EAS114_32:2:163:618:570 +GGAAAGCTGTCAACGCTTCTAGCCATTTCTTTTGG +>EAS114_32:2:197:170:559 +CTCATTCACTCCAGCTCCCTGTCACCCAATGGACC +>EAS114_32:2:197:170:559 +TTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAACA +>EAS114_32:2:247:900:123 +AAAACATGAACTAACTATATGCTGTTTACAAGAAA +>EAS114_32:2:247:900:123 +AATTCAGCAAGAAGATATAACCATCCTACTAAATA +>EAS114_32:2:283:577:398 +ACAATGGGCTTCTCAGCGGAAACCTTACAAGCCAG +>EAS114_32:2:283:577:398 +CTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA +>EAS114_32:2:306:119:56 +CTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACT +>EAS114_32:2:306:119:56 +TCCATGTACACACGCTGTCCTATGTACTTATCATG +>EAS114_32:3:236:475:254 +AGATAAAGCACACTTTAAATCAACAACAGTAAAAT +>EAS114_32:3:236:475:254 +TTCCCCCACTTAAGAGATATAGATTGGCAGAACAG +>EAS114_32:3:307:113:346 +AATTCAGCAAGAAGATATAACCATCCTACTAAATA +>EAS114_32:3:307:113:346 +ATGCTGTTTACAAGAAACTCATTAATAAAGACATG +>EAS114_32:4:156:21:69 +AAAAATGAACAGAGCTTTCAAGAAGTATGAGATTA +>EAS114_32:4:156:21:69 +TTGCAAGACAGACTTCATCAAGATATGTAGTCATC +>EAS114_32:4:20:41:138 +CATTTCTTTTGGCATTTGCCTTCAGACCCTACACG +>EAS114_32:4:20:41:138 +GTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCT +>EAS114_32:4:228:587:504 +GCACATTACGACCCGGCAAGGTGTATAATTGTGTC +>EAS114_32:4:228:587:504 +GTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCA +>EAS114_32:4:246:647:765 +GATCAATTCAGCAAGAAGATATAACCATCCTACTA +>EAS114_32:4:246:647:765 +TATGCTGTTTACAAGAAACTCATTAATAAAGACAT +>EAS114_32:4:42:923:169 +ACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAA +>EAS114_32:4:42:923:169 +TTCCTATGTACTTATCATGAATCTATCCCAAATTC +>EAS114_32:4:5:396:292 +ATACATATGCACCTAACACAAGACTACCCAGATTC +>EAS114_32:4:5:396:292 +TAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGA +>EAS114_32:4:7:282:424 +CAAAAACAAAAACTATGCTAAGTATTGTTAAAGAT +>EAS114_32:4:7:282:424 +TAATTGGTACAATGTACAATATTCTGATGATGGTT +>EAS114_32:5:109:199:592 +ACGAATATTATGCCCTGCTAAACTAAGCATCATAA +>EAS114_32:5:109:199:592 +AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA +>EAS114_32:5:182:313:319 +AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT +>EAS114_32:5:182:313:319 +GATGAACCACACATTAATACTATGTTTCTTATCTG +>EAS114_32:5:267:170:250 +CATAAAACAAATACTACTAGACCTAAGAGGGATGA +>EAS114_32:5:267:170:250 +CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA +>EAS114_32:5:78:583:499 +TTTACGCTATTCAGTACTAAATATAGAAATTGAAA +>EAS114_32:6:122:342:296 +AAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCA +>EAS114_32:6:122:342:296 +TCCTATGTACTTATCATGACTCTATCCCAAATTCC +>EAS114_32:6:178:342:866 +AACAAATCTGCGCTTGTACTTCTAAATCTATAAAA +>EAS114_32:6:178:342:866 +ATACATATGCACCTAACACAAGACTACCCAGATTC +>EAS114_32:6:179:735:569 +ATGTTAAAATGTCTATTTTTGTCTTGACACCCAAC +>EAS114_32:6:179:735:569 +CATCACCCGGTCCCTGCCCCATCTCTTGTAATCTC +>EAS114_32:6:199:818:124 +AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +>EAS114_32:6:199:818:124 +ACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTG +>EAS114_32:6:78:909:394 +ATTGCTTGGTGTCTGACAGGCTGCAACTGTGAGCC +>EAS114_32:6:78:909:394 +TACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGA +>EAS114_32:6:88:162:587 +GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC +>EAS114_32:6:88:162:587 +TTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCT +>EAS114_32:7:174:597:66 +TCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCT +>EAS114_32:7:174:597:66 +TCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAAT +>EAS114_32:7:201:959:19 +CTCTACATGGCTGATTATTAAAACAATGTTCCCCA +>EAS114_32:7:201:959:19 +TATCTGGATTCTGGGAAATTCTTCATCCTGGACCC +>EAS114_32:7:256:407:470 +AACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTC +>EAS114_32:7:256:407:470 +CAGCATGGTTGTACTGGGCAATACATGAGATTATT +>EAS114_39:1:12:884:219 +GAGCCATCACAATGAACAACAGGAAGAAAAGGTCT +>EAS114_39:1:12:884:219 +GCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTT +>EAS114_39:1:28:350:895 +ATATAGTTGAAAGCTCTAACAATAGACTAAACCAA +>EAS114_39:1:28:350:895 +TATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAA +>EAS114_39:1:43:1120:878 +ACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGT +>EAS114_39:1:43:1120:878 +TAATTGTGTCCATGTACACACGCTGTCCTATGTAC +>EAS114_39:1:70:147:84 +ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG +>EAS114_39:1:70:147:84 +CCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGA +>EAS114_39:1:71:636:533 +GCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGC +>EAS114_39:1:71:636:533 +GTGGAAGACATAATCCCACGCTTCCTATGGAAAGG +>EAS114_39:1:73:302:1574 +AAGGTTGTTGGGAGATTTTTAATGATTCCTCAATG +>EAS114_39:1:73:302:1574 +CCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCT +>EAS114_39:1:98:641:1040 +CAGAACAGATTTAAAAACATGAACTAACTATATGC +>EAS114_39:1:98:641:1040 +TAAAACAAAGGAGGTCATCATACAATGATAAAAAG +>EAS114_39:2:18:967:582 +AAGCCGTTCTATTTGTAATGAAAACTATATTTATG +>EAS114_39:2:18:967:582 +ACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAG +>EAS114_39:2:38:670:564 +CAGGTTTTATAAAACAATTAATTGAGACTACAGAG +>EAS114_39:2:38:670:564 +CTAACTATATGCTGTTTACAAGAAACTCATTAATA +>EAS114_39:2:41:576:1016 +CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC +>EAS114_39:2:41:576:1016 +TGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTT +>EAS114_39:2:57:1064:925 +TGACAAGCAAATGCTAAGATAATTCATCATCACTA +>EAS114_39:2:5:1219:137 +AAAACTAAAAAAGCAAAAACAAAAACTATGCTAAG +>EAS114_39:2:5:1219:137 +ACCTAAGAGGGATGAGAAATTACATAATTGGTACA +>EAS114_39:3:11:1238:1728 +AGAGATTTAGACATCTAAATGAAAGAGGCTCAAAG +>EAS114_39:3:11:1238:1728 +TCCCATCAGAATAACAATGGGCTTCTCAGCGGAAA +>EAS114_39:3:55:464:146 +AAAAAGATCAATTCAGCAAGAAGATATAACCATCC +>EAS114_39:3:55:464:146 +CTCAATATATCCATGTAACAAATCTGCGCTTGTAC +>EAS114_39:3:6:1064:1805 +TAAAATTTAACAAAAGTAAATAAAACACATAGCTA +>EAS114_39:3:6:1064:1805 +TTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTT +>EAS114_39:3:88:84:1558 +AGTAACTGAACCTATGAGTCACAGGTATTCCTGTG +>EAS114_39:3:88:84:1558 +ATCAGACTATCTAAAGTCAACATGAAGGAAAAAAA +>EAS114_39:4:10:1312:1558 +AACTAACTATATGCTGTTTACAAGAAACTCATTAA +>EAS114_39:4:10:1312:1558 +AGGTTTTATAAAACAATTAATTGAGACTACAGAGC +>EAS114_39:4:30:432:228 +ATCCTACTAAATACATATGCACCTAACACAAGACT +>EAS114_39:4:30:432:228 +GACATGAGTTCAGGGAAAGGGGTGGAAAAAGATGT +>EAS114_39:4:30:570:902 +AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA +>EAS114_39:4:30:570:902 +ATACTCACCATCATAAATACGCACAAAAGTACAAA +>EAS114_39:4:43:1047:1626 +GATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGT +>EAS114_39:4:43:1047:1626 +GTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAA +>EAS114_39:4:58:271:612 +AGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTA +>EAS114_39:4:58:271:612 +ATGAAGGAAAAAAATTCTAAAATCAGCAAGAGCAA +>EAS114_39:4:93:77:1338 +GCTGCTTACAAGAAGCGCATTAATAAAGACATGAG +>EAS114_39:4:93:77:1338 +GTCATCATACAATGAAAAAAAGATCAATTCAGCAA +>EAS114_39:5:17:1222:783 +AAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCGT +>EAS114_39:5:17:1222:783 +TGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATT +>EAS114_39:5:42:1223:1087 +CAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTT +>EAS114_39:5:42:1223:1087 +TTGTCTTGACACCCAACTAATATTTGTCTGAGCAA +>EAS114_39:5:50:972:1286 +AGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTT +>EAS114_39:5:50:972:1286 +TTGTGGTCTGACAGGCTGCAACTGTGAGCCATCAC +>EAS114_39:5:61:1000:1534 +CTTGAAGACAAGTCTCTTATGAATTAACCCAGTCA +>EAS114_39:5:61:1000:1534 +GGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGA +>EAS114_39:5:93:312:331 +AACTCATTAATAAAGACATGAGTTCAGGTAAAGGG +>EAS114_39:5:93:312:331 +ATCCTACTAAATACATATGCACCTAACACAAGACT +>EAS114_39:6:13:1034:1144 +AAAGATGAAACGCGTAACTGCGCTCTCATTCACTC +>EAS114_39:6:13:1034:1144 +AATTGAAACAGCTGTGTTTAGTGCCTTTGTTCACA +>EAS114_39:6:34:380:815 +AAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGA +>EAS114_39:6:34:380:815 +ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT +>EAS114_39:6:71:644:1792 +AAAAGTACAAAACTCACAGGTTTTATAAAACAATT +>EAS114_39:6:71:644:1792 +CCTGCTAAACTAAGCATCATAAATGAAGGGGAAAT +>EAS114_39:6:76:282:1668 +AACAAAAACTATGCTAAGTATTGGTAAAGATGTGG +>EAS114_39:6:76:282:1668 +TACAATGTACAATATTCTGATGATGGTTACACTAA +>EAS114_39:6:7:492:1088 +ACAGGTTTTATAAAACAATTAATTGAGACTACAGA +>EAS114_39:6:7:492:1088 +TGAACTAACTATATGCTGTTTACAAGAAACTCATT +>EAS114_39:6:85:1224:625 +GAACTCCCCTGGAGGTCTGATGGCGTTTCTCCCTC +>EAS114_39:6:85:1224:625 +GCTGCATCCCTGTCTTCCTCTGTCTTGATTTCCTT +>EAS114_39:6:94:1273:1462 +AAGATGTTCTACGCAAACAGAAACCAAATGAGAGA +>EAS114_39:6:94:1273:1462 +CCTAACACAAGACTACCCAGATTCATAAAACAAAT +>EAS114_39:7:100:708:1984 +AGATGAACCACACATTAATACTATGTTTCTTATCT +>EAS114_39:7:100:708:1984 +TACCACAGGGGGCTGCGCGGTTTCCCATCATGAAG +>EAS114_39:7:23:1126:1886 +ACACTAAAAGCCCATACTTTACTGCTACTCAATAT +>EAS114_39:7:23:1126:1886 +GGAGGTCATCATACAATGATAAAAAGATCAATTCA +>EAS114_39:7:32:562:1695 +GATGATGGTTACACTAAAAGCCCATACTTTACTGC +>EAS114_39:7:32:562:1695 +TAAAACAAAGGAGGTCATCATACAATGATAAAAAG +>EAS114_39:7:57:1114:2032 +TAACTATATGCTGTTTACAAGAAACTCATTAATAA +>EAS114_39:7:57:1114:2032 +TATTACAATGATAAAAAGATCAATTCAGCAAGAAG +>EAS114_39:7:90:406:631 +CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG +>EAS114_39:7:90:406:631 +TGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTT +>EAS114_45:1:100:979:1863 +ATTACAAAACTCACAGGTTTTATAAAACAATTAAT +>EAS114_45:1:100:979:1863 +TTATGCCCTGCTAAACTAAGCATCATAAATGAAGG +>EAS114_45:1:12:1296:358 +CTTGAAAGCTTGGTCTGTAATGATGCCCCTTGGCC +>EAS114_45:1:12:1296:358 +GTCCATGTACACACGCTGTCCTATGTACTTATCAT +>EAS114_45:1:2:1422:1820 +CACCTAACACAAGACTACCCAGATTCATAAAACAA +>EAS114_45:1:2:1422:1820 +TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC +>EAS114_45:1:30:1882:1210 +ATCATCACTAAACCAGTCCTATAAGAAATGCTCAA +>EAS114_45:1:30:1882:1210 +GCAGAAACCTTACAAGCCAGAAGAGATTGGATCTA +>EAS114_45:1:33:1407:94 +TAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAG +>EAS114_45:1:33:1407:94 +TTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGAT +>EAS114_45:1:77:1000:1780 +AGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAG +>EAS114_45:1:77:1000:1780 +TGAATTAACCCAGTCAGACAAAAATAAAGAAAAAA +>EAS114_45:1:84:275:1572 +AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC +>EAS114_45:1:84:275:1572 +TTATGCTATTCAGTTCTAAATATAGAAATTGAAAC +>EAS114_45:1:95:1530:28 +AAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA +>EAS114_45:1:95:1530:28 +AATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAA +>EAS114_45:1:9:1289:215 +AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC +>EAS114_45:1:9:1289:215 +TATGCTATTCAGTTCTAAATATAGAAATTGAAACA +>EAS114_45:2:13:1507:1146 +AAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGA +>EAS114_45:2:13:1507:1146 +CAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCT +>EAS114_45:2:15:1497:1530 +AATTACGTCCTATCTTCTTCTTAGGGAAGAACAGC +>EAS114_45:2:15:1497:1530 +TAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCC +>EAS114_45:2:1:1140:1206 +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT +>EAS114_45:2:1:1140:1206 +TTTATTACCAGAGGGATGGAGGGAAGAGGGACGCT +>EAS114_45:2:20:413:1334 +CCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTT +>EAS114_45:2:20:413:1334 +TTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAG +>EAS114_45:2:23:1754:796 +CTTCTAAATCTATAAAAAAATTAAAATTTAACAAA +>EAS114_45:2:23:1754:796 +CTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTT +>EAS114_45:2:33:1445:1357 +TATGAATTAACCCAGTCAGACAAAAATAAAGAAAA +>EAS114_45:2:33:1445:1357 +TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA +>EAS114_45:2:41:199:388 +AGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGG +>EAS114_45:2:41:199:388 +TCTAAAGTCAACATGAAGGAAAAAAATTCTAAAAT +>EAS114_45:2:49:163:904 +GCTCTCATTCACTCCAGCTCCCTGTCACCCAATGG +>EAS114_45:2:49:163:904 +TCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTC +>EAS114_45:2:54:1886:719 +CTGTTTACAAGAAACTCATTAATAAAGACATGAGT +>EAS114_45:2:54:1886:719 +TTCAGCAAGAAGATATAACCATCCTACTAAATACA +>EAS114_45:2:59:396:359 +GGGTATAATACCTCTACATGGCTGATTATGAAAAC +>EAS114_45:2:59:396:359 +TCACCCAATGGACCTGTGATATCTGGATTCTGGGA +>EAS114_45:2:76:1765:700 +AAAAAGGGATTAAATTCCCCCACTTAAGAGATATA +>EAS114_45:2:76:1765:700 +GTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGT +>EAS114_45:2:79:554:354 +AAATAAAACAAAGGAGGTCATCATACAATGATAAA +>EAS114_45:2:79:554:354 +CAATGTACAATATTCTGATGATGGTTACACTAAAA +>EAS114_45:3:26:1867:162 +ATATAACCATCCTACTAAATACATATGCACCTAAC +>EAS114_45:3:26:1867:162 +ATATATCCATGTAACAAATCTGCGCTTGTACTTCT +>EAS114_45:3:27:1881:486 +AAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTC +>EAS114_45:3:27:1881:486 +CCAAAGATGAAACGCGTAACTGCGCTCTCATTCAC +>EAS114_45:3:2:1200:1076 +CATTTGCCTTCAGACCCTACACGAATGCGTCTCTA +>EAS114_45:3:2:1200:1076 +GATGTGTGTTCTCATCAACCTCATACACACACATG +>EAS114_45:3:32:1379:738 +TTAAGAAATTACAAAATATAGTTGAAAGCTCTAAC +>EAS114_45:3:35:896:1588 +CTAGACCTAAGAGGGATGAGAAATTACCTAATTGG +>EAS114_45:3:35:896:1588 +GAGTAGCTATACTTATATCAGATAAAGCACACTTT +>EAS114_45:3:39:208:644 +ATTGTAAAAGTCAAAATTAAAGTTCAATACTCACC +>EAS114_45:3:39:208:644 +TTTGAATAAAAAGGGATTAAATTCCCCCACTTAAG +>EAS114_45:3:3:1377:1663 +CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +>EAS114_45:3:3:1377:1663 +GGTGATGTGTGTTCTCATCAACCTCATACACACAC +>EAS114_45:3:3:864:1888 +AATGTTATGCCCTGCTAAACTAAGCATCATAAATG +>EAS114_45:3:3:864:1888 +CAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA +>EAS114_45:3:41:653:1568 +AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +>EAS114_45:3:41:653:1568 +GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA +>EAS114_45:3:44:1578:1674 +CCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT +>EAS114_45:3:44:1578:1674 +GCTGCAACTGTGAGCCATCACAATGAACAACAGGA +>EAS114_45:3:75:217:337 +GACAGGCTGCAACTGTGAGCCATCACAATGAACAA +>EAS114_45:3:75:217:337 +GAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT +>EAS114_45:3:90:1403:1635 +TGTCTTGACACCCAACTAATATTTGTCTGAGCAAA +>EAS114_45:3:90:1403:1635 +TTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCT +>EAS114_45:4:48:310:473 +TGAATTAACCCAGTCAGACAAAAATAAAGAAAAAA +>EAS114_45:4:48:310:473 +TTTAGTCTTGCTAGAGATTTAGACATCTAAATGAA +>EAS114_45:4:73:1208:495 +AGATGAAACGCGTAACTGCGCTCTCATTCACTCCA +>EAS114_45:4:73:1208:495 +TAAATATAGAAATTGAAACAGCTGTGTTTAGTGCC +>EAS114_45:4:7:1347:375 +CTAAAAGCCCATACTTTACTGCTACTCAATATATC +>EAS114_45:4:7:1347:375 +GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGA +>EAS114_45:4:87:323:895 +ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT +>EAS114_45:4:87:323:895 +GGTATAATACCTCTACATGGCTGATTATGAAAACA +>EAS114_45:4:88:55:1187 +GTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAG +>EAS114_45:4:88:55:1187 +GTCATCAGACTATCTAAAGTCAACATGAAGGAAAA +>EAS114_45:5:56:1757:1319 +CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG +>EAS114_45:5:56:1757:1319 +TTTATAAAACAATTAATTGAGACTACAGAGCAACT +>EAS114_45:5:62:841:1994 +ATATCCATGTAACAAATCTGCGCTTGTACTTCTAA +>EAS114_45:5:62:841:1994 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>EAS114_45:5:66:959:1311 +CAGAGCTGCTGGCAAGCTAGAGGCCCATCTGGAGC +>EAS114_45:5:66:959:1311 +GGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTT +>EAS114_45:5:82:843:1838 +CTAGGTAAAAAATTAACATTACAACAGGAACAAAA +>EAS114_45:5:82:843:1838 +TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA +>EAS114_45:5:85:401:1190 +TACACACACATGGTTTAGGGGTATAATACCTCTAC +>EAS114_45:5:85:401:1190 +TCACTCCAGCTCCCTGTCACCCAATGGACCTGTGA +>EAS114_45:5:91:89:666 +GAAAACCTCTTTAGTCTTGCTAGAGATTTAGACAT +>EAS114_45:6:14:1211:1332 +AGGAAGATACATTGCAAGACAGACTTCATCAAGAT +>EAS114_45:6:14:1211:1332 +TTTCAAGAAGTATGAGATTATGTAAAGTAACTGAA +>EAS114_45:6:37:156:134 +AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC +>EAS114_45:6:37:156:134 +GGAAAAACTATTTGAGGAAGTAATTGGGGAAAACC +>EAS114_45:6:39:956:676 +TAAAACAAATACTACTAGACCTAAGAGGGATGAGA +>EAS114_45:6:39:956:676 +TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA +>EAS114_45:6:44:77:1255 +CTCATTAATAAAGACATGAGTTCAGGTAAAGGGGT +>EAS114_45:6:44:77:1255 +TAAAAAGATCAATTCAGCAAGAAGATATAACCATC +>EAS114_45:6:45:1769:1130 +ACCCAATGGACCTGTGATATCTGGATTCTGGGAAA +>EAS114_45:6:45:1769:1130 +TGTTCTCATCAACCTCATACACACACATGGTTTAG +>EAS114_45:6:47:1791:444 +AAGAGGGATGAGAAATTACCTAATTGGTACAATGT +>EAS114_45:6:47:1791:444 +TACTTATATCAGATAAAGCACACTTTAAATCAACA +>EAS114_45:6:59:1548:1096 +CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTG +>EAS114_45:6:59:1548:1096 +GTCCTATGTACTTATCATGACTCTATCCCAAATTC +>EAS114_45:6:5:730:1436 +GAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAA +>EAS114_45:6:5:730:1436 +TATTCAGTTCTAAATATAGAAATTGAAACAGCTGT +>EAS114_45:6:86:693:234 +AAGTAACTGAACCTATGAGTCACAGGTATTCCTGA +>EAS114_45:6:86:693:234 +GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA +>EAS114_45:6:86:859:1779 +TTTTTTTCATTTCTCTTTTTTTTTTTTTTTTTTTT +>EAS114_45:6:90:561:850 +ACAGGAACAAAACCTCATATATCAATATTAACTTT +>EAS114_45:6:90:561:850 +TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC +>EAS114_45:6:93:1475:542 +TGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCAT +>EAS114_45:6:93:1475:542 +TTATCATGACTCTATCCCAAATTCCCAATTACGTC +>EAS114_45:7:14:1256:204 +AAATGTCTATTTTTGTCTTGACACCCAACTAATAT +>EAS114_45:7:14:1256:204 +TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC +>EAS114_45:7:14:978:1296 +ATACAATGATAAAAAGATCAATTCAGCAAGAAGAT +>EAS114_45:7:14:978:1296 +CAATATTCTGATGATGGTTACACTAAAAGCCCATA +>EAS114_45:7:24:1374:211 +AGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTG +>EAS114_45:7:24:1374:211 +TGCAATTAATATAATTGTGTCCATGTACACACGCT +>EAS114_45:7:2:168:1878 +AAAAAACCTGGCAAACACGAATGTTATGACATGTN +>EAS114_45:7:2:168:1878 +TAAATACACACAAAAGTAGAAAACGCACCAGTTTT +>EAS114_45:7:33:1566:588 +ACAGCTTAGGCATCAATTTGGTGTTCTGTGTAAAG +>EAS114_45:7:33:1566:588 +TACTGTCATAACTATGAAGAGCCTATTGCCAGATG +>EAS114_45:7:35:538:1882 +TATGCACCTAACACAAGACTACCCAGATTCATAAA +>EAS114_45:7:35:538:1882 +TCTATAACAAAATTAAAATTTAACAAAAGTAAATA +>EAS114_45:7:37:763:1437 +AAAGATGTTCTACGCAAACAGAAACCAAATGAGAG +>EAS114_45:7:37:763:1437 +TAAAACAAATACTACTAGACCTAAGAGGGATGAGA +>EAS114_45:7:45:1339:1807 +GACATCTAAATGAAAGAGGCTCAAAGAATGCCAGG +>EAS114_45:7:69:1130:832 +ATAGTTGAAAGCTCTAACAATAGACTAAACCAAGC +>EAS114_45:7:69:1130:832 +TCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAA +>EAS114_45:7:6:758:988 +AAACAAAGGAGGTCATCATACAATGATAAAAAGAT +>EAS114_45:7:6:758:988 +ATTCTGATGATGGTTACACTAAAAGCCCATACTTT +>EAS114_45:7:88:451:1773 +ATAAATACACACAAAAGTACAAAACTCACAGGTTT +>EAS114_45:7:88:451:1773 +ATTGGCAGAACAGATTTAAAAACATGAACTAACTA +>EAS114_45:7:97:1584:777 +CCAGATGAACCACACATTAATACTATGTTTCTTAT +>EAS114_45:7:97:1584:777 +GTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCC +>EAS114_45:7:9:512:826 +AACTTCCACGTCTCATCTAGGGGAACAGGGAGGTG +>EAS114_45:7:9:512:826 +ACCACACATTAATACTATGTTTCTTATCTGCACAT +>EAS139_11:1:35:631:594 +ATCATGACTCTATCCCAAATTCCCAATTACGTCCT +>EAS139_11:1:35:631:594 +ATGATGCCCCTTGGCCATCACCCGGTCCCTGCCCC +>EAS139_11:1:59:742:549 +ACAAGCAAATGCTAAGATAATTCATCATCACTAAA +>EAS139_11:1:59:742:549 +TTAACATTACAACAGGAACAAAACCTCATATATCA +>EAS139_11:1:81:1019:558 +ACAAAACTCACAGGTTTTATAAAACAATTAATTGA +>EAS139_11:1:81:1019:558 +TGAACTAACTATATGCTGTTTACAAGAAACTCATT +>EAS139_11:1:84:92:1246 +GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG +>EAS139_11:1:84:92:1246 +GTTACACTAAAAGCCCATACTTTACTGCTACTCAA +>EAS139_11:2:31:628:1820 +AACCAGTCCTATAAGAAATGCTCAAAAGAATTGTA +>EAS139_11:2:31:628:1820 +CAGGAACAAAACCTCATATATCAATATTAACTTTG +>EAS139_11:2:42:333:516 +AGACAAGTCTCTTATGAATTAACCCAGTCAGACAA +>EAS139_11:2:42:333:516 +TCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG +>EAS139_11:2:55:296:1457 +CACTTTAAATCAACAACAGTAAAATAAAACAAAGG +>EAS139_11:2:55:296:1457 +CCCACTTAAGAGATATAGATTGGCAGAACAGATTT +>EAS139_11:2:63:816:921 +AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT +>EAS139_11:2:63:816:921 +TGAAGACAAGTCTCTTATGAATTAACCCAGTCAGA +>EAS139_11:2:6:251:1557 +AAAACAATGTTCCCCAGATACCATCCCTGTCTTAC +>EAS139_11:2:6:251:1557 +CCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTT +>EAS139_11:2:71:83:58 +AAAGAAAAAAGAATTTTAAAAATGAACAGAGCTTT +>EAS139_11:2:71:83:58 +CCAGGAAGATACATTGCAAGACAGACTTCATCAAG +>EAS139_11:3:34:970:1374 +ATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGG +>EAS139_11:3:34:970:1374 +CTGGACCCTGAGAGATTCTGCAGCCCAGATCCAGA +>EAS139_11:3:43:1229:1855 +ATGTACAATATTCTGATGATGGTTACACTAAAAGC +>EAS139_11:3:43:1229:1855 +CAACAGTAAAATAAAACAAAGGAGGTCATCATACA +>EAS139_11:3:65:556:1505 +CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG +>EAS139_11:3:65:556:1505 +TGCCTTCAGACCCTACACGAATGCGTCTCTACCAC +>EAS139_11:3:81:12:1231 +AAAAAGCAAAAACAAAAACTATGCTAAGTATTGGT +>EAS139_11:3:81:12:1231 +TTACCTAATTGGTACAATGTACAATATTCTGATGA +>EAS139_11:4:26:137:1382 +AATGTTATGCCCTGCTAAACTAAGCATCATAAATG +>EAS139_11:4:26:137:1382 +AGACTATCTAAAGTCAACATGAAGGAAAAAAATTC +>EAS139_11:4:36:1184:994 +GCCAGAAGAGATTGGATCTAATTTTTGGACTTCTT +>EAS139_11:4:36:1184:994 +TACATTGCAAGACAGACTTCATCAAGATATGTAGT +>EAS139_11:4:36:1231:1381 +AAGAGATATAGATTGGCAGAACAGATTTAAAAACA +>EAS139_11:4:36:1231:1381 +TAAAAGTCAAAATTAAAGTTCAATACTCACCATCA +>EAS139_11:4:38:557:1441 +GATAAAAATAAAAAAGCAAAAACAAAAACTATGCT +>EAS139_11:4:38:557:1441 +TAAGAGGGATGAGAAATTACCTAATTGGTACAATG +>EAS139_11:4:50:30:15 +AGATTATGTAAAGTAACTTAACCTATGAGTCCAAG +>EAS139_11:4:50:30:15 +TACATTGCAAGACAGTCGTCAGCAAGATATGTAGT +>EAS139_11:4:63:527:1923 +GCTTTACTGTCATAACCATGAAGAGACTATTGCCA +>EAS139_11:4:63:527:1923 +TACACGAATGCGTCTCTACCACAGGGGGCTGCGCG +>EAS139_11:5:32:686:735 +AAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGG +>EAS139_11:5:32:686:735 +CAGAAGAAAGAGGTTCANANNNTGANGACAAGTCT +>EAS139_11:5:41:314:1173 +AAGAAAAAAAAACCTGTCAAACACGAATGTTATGC +>EAS139_11:5:41:314:1173 +AATTAAAGTTCAATACTCACCATCATAAATACACA +>EAS139_11:5:52:1278:1478 +GCTTGTACTTCTAAATCTATAACAAAATTAAAATT +>EAS139_11:5:52:1278:1478 +GTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTT +>EAS139_11:5:61:38:1182 +AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTC +>EAS139_11:5:61:38:1182 +GTTTGATTTGGTGGAAGACATAATCCCACGCTTCC +>EAS139_11:5:64:199:1288 +GTAAAATAAAACAAAGGAGGTCATCATACAATGAT +>EAS139_11:5:64:199:1288 +TACAATGTACAATATTCTGATGATGGTTACACTAA +>EAS139_11:5:78:775:555 +AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG +>EAS139_11:5:78:775:555 +TTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAA +>EAS139_11:6:11:285:1567 +ACATGGCTGATTATGAAAACAATGTTCCCCAGATA +>EAS139_11:6:11:285:1567 +CCCTGAGAGATTCTGCAGCCCAGATCCAGATTGCT +>EAS139_11:6:11:360:1577 +GAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCT +>EAS139_11:6:11:360:1577 +TTTGGCATTTGCCTTCAGACCCTACACGAATGCGT +>EAS139_11:6:13:682:680 +AGAATAACAATGGGCTTCTCAGCGGAAACCTTACA +>EAS139_11:6:13:682:680 +ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA +>EAS139_11:6:17:1179:393 +CTAATTGGTACAATGTACAATATTCTGATGATGGT +>EAS139_11:6:17:1179:393 +TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA +>EAS139_11:6:19:306:982 +ATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCA +>EAS139_11:6:19:306:982 +GACAGACTTCATCAAGATATGTAGTCATCAGACTA +>EAS139_11:6:75:946:1035 +AACCCCCTTGCAACAACCTTGAGAACCCCAGGGAA +>EAS139_11:6:75:946:1035 +AATGGACCTGTGATATCTGGATTCTGGGAAATTCT +>EAS139_11:6:82:164:1924 +GAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTT +>EAS139_11:6:82:164:1924 +GCCCAGCACCAGATTGCTTGTGGTCTGACAGGCTG +>EAS139_11:6:89:1151:1878 +CCATCACAATGAACAACAGGAAGAAAAGGTCTTTC +>EAS139_11:6:89:1151:1878 +CTTTCAACGATTCTAGCCATTTCTTTTGGCATTTG +>EAS139_11:7:24:1345:1627 +AGATTGGCAGAACAGATTTAAAAACATGAACTAAC +>EAS139_11:7:24:1345:1627 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>EAS139_11:7:42:1091:1726 +CAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGT +>EAS139_11:7:42:1091:1726 +TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT +>EAS139_11:7:46:695:738 +CAAAGATGAAACGCGTAACTGCGCTCTCATTCACT +>EAS139_11:7:46:695:738 +TGAAACAGCTGAGTTTAGCGCCTGTGTTCACATAG +>EAS139_11:7:50:1229:1313 +ACATAGCTAAAACTAAAAAAGCAAAAACAAAAACT +>EAS139_11:7:50:1229:1313 +TTTTTTCTTTTTTTTTTTTTTTTTTTTGCATGCCA +>EAS139_11:7:53:458:581 +CTCAATTAATTGTTTTATAAAACCTGTGAGTTTTG +>EAS139_11:7:53:458:581 +TTATGCCCTGCTAAACTAAGCATCATAAATGAAGG +>EAS139_11:7:60:163:1612 +AGCAAGAGAAAAGCATACAGTCATCTATAAAGGAA +>EAS139_11:7:60:163:1612 +GGGAACTAAAGTCAAGTCTTTCCTGACAAGCAAAT +>EAS139_11:7:74:213:877 +AGATGTTCTACGCAAACAGAAACCAAATGAGAGAA +>EAS139_11:7:74:213:877 +TTAACATTACAACAGGAACAAAACCTCATATATCA +>EAS139_11:7:92:367:1495 +ACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGC +>EAS139_11:7:92:367:1495 +CTGTCATAACTATGAAGAGACTATTGCCAGATGAA +>EAS139_11:8:17:437:1378 +ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA +>EAS139_11:8:17:437:1378 +ATCAATATTAACTTTGAATAAAAAGGGATTAAATT +>EAS139_11:8:26:1221:222 +AAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCT +>EAS139_11:8:26:1221:222 +CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +>EAS139_11:8:38:842:395 +GTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCA +>EAS139_11:8:76:205:587 +GAATAACAATGGGCTTCTCAGCGGAAACCTTACGA +>EAS139_11:8:76:205:587 +TGCTAGAGATTTAGACATCTAAATGAAAGAGGCTC +>EAS139_11:8:82:566:1096 +CAAAAGGTGATGTGTGTTCTCATCAACCTCATACA +>EAS139_11:8:82:566:1096 +CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +>EAS139_11:8:96:1314:1448 +CCTATGAGTCACAGGTATTCCTGAGGAAAAATAAA +>EAS139_11:8:96:1314:1448 +GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA +>EAS139_19:1:14:420:712 +TGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCC +>EAS139_19:1:14:420:712 +TGTTGGTTTTCTGTTTCTTTGTTTGATTTTTTTGAAGACA +>EAS139_19:1:1:1598:843 +TCAGCGGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT +>EAS139_19:1:1:1598:843 +TGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATA +>EAS139_19:1:36:481:1079 +GTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAA +>EAS139_19:1:36:481:1079 +TCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAA +>EAS139_19:1:40:1596:1433 +CCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAG +>EAS139_19:1:40:1596:1433 +GCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTT +>EAS139_19:1:47:352:1492 +AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGCCCACTA +>EAS139_19:1:47:352:1492 +TTTGTTTTGTATGGTGGAAGACATAATCCCACGCTTCCTA +>EAS139_19:1:53:463:1132 +ATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCC +>EAS139_19:1:53:463:1132 +ATTTAAAAACATGAACTAACTATATGCTGTTTACANGAAA +>EAS139_19:1:58:726:1746 +AGATTGGCAGAACAGATTTAAAAACATGAACTAACTATAT +>EAS139_19:1:58:726:1746 +CAATTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTC +>EAS139_19:1:82:946:392 +CAATATATCCATGTAACAAATCTGCGCTTGTACTTCAAAA +>EAS139_19:1:82:946:392 +GAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTC +>EAS139_19:1:85:1521:58 +AAATTAACATTACAACAGGAACAAAACCTCATATATCAAT +>EAS139_19:1:85:1521:58 +CTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACC +>EAS139_19:1:87:1222:878 +TATAGGGCCTTTGTTCAAACCCCTTGCAACAACCTTGAGA +>EAS139_19:1:87:1222:878 +TCAGCGCGTCACTCCGCTCTCATTCACCCCAGCTCCCTGT +>EAS139_19:1:99:1632:76 +AAAGAAAAAAAACCCTGTCAAACACGAATGTTATGCCCTG +>EAS139_19:1:99:1632:76 +TTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATA +>EAS139_19:2:12:1335:1372 +GAAGAACAGATTTAAAAACATGAACTAACTATATGCTGTT +>EAS139_19:2:12:1335:1372 +TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTA +>EAS139_19:2:29:1822:1881 +AGAAAAAAGAATTTTAAAAATGAACAGAGCTTTCAAGAAG +>EAS139_19:2:29:1822:1881 +ATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA +>EAS139_19:2:2:1217:398 +CAATTAATTGAGACTACAGAGCAACTAGGTAAAAAATTAA +>EAS139_19:2:2:1217:398 +TAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGC +>EAS139_19:2:33:1193:664 +GCCCCATCTCTTGTAATCTCTCTCCTTTTTTCTGCATCCC +>EAS139_19:2:33:1193:664 +TATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAA +>EAS139_19:2:57:1672:1890 +CCCCCCCCCCCCCCCCCAGCCACTGCGGCCCCCCCAGCCA +>EAS139_19:2:57:1672:1890 +TATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTA +>EAS139_19:2:82:154:1333 +TTAAAATTTAACAAAAGTAAATAAAACACACAGCTAAAAC +>EAS139_19:2:82:154:1333 +TTTTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTT +>EAS139_19:3:10:349:1147 +GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAA +>EAS139_19:3:24:1135:563 +CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGA +>EAS139_19:3:24:1135:563 +GCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACC +>EAS139_19:3:4:1502:1911 +CTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTT +>EAS139_19:3:4:1502:1911 +TACACACACATGGTTTAGGGGTATAATACCTCTACATGGC +>EAS139_19:3:58:923:1915 +GCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA +>EAS139_19:3:58:923:1915 +TATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCC +>EAS139_19:3:5:538:401 +AAGGTGATGTGTGTTCTCATCAACCTCATACACACACATG +>EAS139_19:3:5:538:401 +TTTGCCTTCACACCCTACACGAATGCGTCTCTGCCACAGG +>EAS139_19:3:66:718:481 +AACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATG +>EAS139_19:3:66:718:481 +AAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAAC +>EAS139_19:3:73:1158:535 +AATAAAGATATGTAGTCATCAGACTATCTAAAGTCAACAT +>EAS139_19:3:73:1158:535 +CCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTG +>EAS139_19:3:73:936:1509 +CTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTTATTT +>EAS139_19:3:73:936:1509 +TTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT +>EAS139_19:3:75:732:442 +CGGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC +>EAS139_19:3:75:732:442 +CTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGG +>EAS139_19:3:87:133:930 +CAGTCTCAGGGCGCCGTCCGTTTCCTCCCATCTGGCCTCG +>EAS139_19:3:87:133:930 +TTATCTGCACATTACTACCCTGCAATTAATATAATTGTGT +>EAS139_19:3:88:1656:896 +AGGGAAGAGGGATGCTGAAGAACTTTGATGCCCTCTTCTT +>EAS139_19:3:88:1656:896 +TGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAGGCTGC +>EAS139_19:4:13:1155:631 +ACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTG +>EAS139_19:4:13:1155:631 +AGGGGTATAATACCTCTACATGGCTGATTATGAAAACAAT +>EAS139_19:4:18:1335:1514 +ATTGGTACAATGTACAATATTCTGATGATGGTTACACTAA +>EAS139_19:4:18:1335:1514 +CTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCAT +>EAS139_19:4:1:156:196 +AAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGG +>EAS139_19:4:1:156:196 +ACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTA +>EAS139_19:4:26:1312:1400 +ACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAA +>EAS139_19:4:26:1312:1400 +TATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAA +>EAS139_19:4:26:274:1078 +AATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGAC +>EAS139_19:4:26:274:1078 +GTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCT +>EAS139_19:4:68:1122:79 +ATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCC +>EAS139_19:4:68:1122:79 +TGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCT +>EAS139_19:4:69:1593:819 +ATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTAC +>EAS139_19:4:69:1593:819 +CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA +>EAS139_19:4:77:1780:693 +GGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGAT +>EAS139_19:4:77:1780:693 +TGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAA +>EAS139_19:4:78:806:800 +AAATTAAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS139_19:4:78:806:800 +AACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGC +>EAS139_19:5:29:411:1208 +AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC +>EAS139_19:5:29:411:1208 +CCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAA +>EAS139_19:5:40:758:116 +GTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAG +>EAS139_19:5:40:758:116 +GTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGG +>EAS139_19:5:4:939:2021 +AAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAAT +>EAS139_19:5:4:939:2021 +GGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAAT +>EAS139_19:5:57:366:844 +AAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGAT +>EAS139_19:5:57:366:844 +TAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAAT +>EAS139_19:5:61:1885:163 +AGCAAGAAGATATAACCATCCTACTAAATACATATGCACC +>EAS139_19:5:61:1885:163 +ATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG +>EAS139_19:5:66:1381:181 +GGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGT +>EAS139_19:5:66:1381:181 +TTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACT +>EAS139_19:5:68:306:409 +AACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAG +>EAS139_19:5:68:306:409 +CCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT +>EAS139_19:5:70:318:1631 +TAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGA +>EAS139_19:5:74:668:424 +GACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT +>EAS139_19:5:74:668:424 +GTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAA +>EAS139_19:5:89:525:113 +GACGCTGAAGAACTTTGATTCCCTCTTCTTCCAAAGATGA +>EAS139_19:5:89:525:113 +TATTTATGCTATTCAGTTATAAATATAGAAATTGAAACAG +>EAS139_19:5:95:944:247 +GGTACAATGTACAATATTCTGATGATGGTTACACTAAAAG +>EAS139_19:5:95:944:247 +GTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCT +>EAS139_19:6:21:1601:1666 +GAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAG +>EAS139_19:6:21:1601:1666 +TATTACTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAA +>EAS139_19:6:52:1455:1212 +CCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGT +>EAS139_19:6:52:1455:1212 +TTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCT +>EAS139_19:6:72:308:839 +AGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAG +>EAS139_19:6:72:308:839 +ATCGTGGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT +>EAS139_19:6:75:1503:1399 +CAAGAAGATATAACCATCCTACTAAATACATATGCACCTA +>EAS139_19:6:75:1503:1399 +CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGC +>EAS139_19:6:78:1029:512 +AGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGC +>EAS139_19:6:78:1029:512 +TCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT +>EAS139_19:6:82:1051:921 +GCAAATAGGTAAAAAATTAACATTACAACAGGAACAAAAC +>EAS139_19:6:82:1051:921 +GGGGAAATAAAGTCAAGGCTTTCCTGACAAGCAAATGCTA +>EAS139_19:6:84:438:1505 +ATTAATATAATTGTGTCCATGTACACACTCTGTCCTATGT +>EAS139_19:6:84:438:1505 +GCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTC +>EAS139_19:7:44:1807:833 +ATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTA +>EAS139_19:7:44:1807:833 +CTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCAC +>EAS139_19:7:85:262:751 +CCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAC +>EAS139_19:7:85:262:751 +TACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACC +>EAS139_19:7:92:288:1354 +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATT +>EAS139_19:7:92:288:1354 +TGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGT +>EAS188_4:5:103:870:105 +AAACTAAGCATCATAAATGAAGGGGAAATAAAGTC +>EAS188_4:5:103:870:105 +ATAAAACAATTAATTGAGACTACAGAGCAACTAGG +>EAS188_4:5:166:776:590 +CTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCC +>EAS188_4:5:166:776:590 +TAATTGTGTCCATGTACACACGCTGTCCTATGTAC +>EAS188_4:5:202:326:680 +ACTTATCATGACTCTATCCCAAATTCTCAATTACG +>EAS188_4:5:202:326:680 +GTAATGATGCCCCTTGGCCATCACCCGGTCCCTGC +>EAS188_4:5:295:547:216 +ATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCT +>EAS188_4:5:295:547:216 +TAAAAAATTAACATTACAACAGGAACAAAACCTCA +>EAS188_4:5:302:997:951 +ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA +>EAS188_4:5:308:552:77 +TTTTCTTTTTTTTCTTTTCTCTTTTTTTTTTTTTT +>EAS188_4:5:8:377:655 +CTATTTTTGTCTTGACACCCAACTAATATTTGTCT +>EAS188_4:5:8:377:655 +CTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCT +>EAS188_4:7:282:567:481 +GGAACAGGGAGGCGCACTAATGCGCTCCACGCCCA +>EAS188_4:7:282:567:481 +TGCAATTAATATAATTGTGTCCACGTACACACGCT +>EAS188_4:7:296:401:60 +AATGAAAGAGGCTCAAAGAATGCCAGGAAGATACA +>EAS188_4:7:35:408:348 +AAGAAACGCGTAACTGCGCTCTCATACACTCCAGC +>EAS188_4:7:35:408:348 +GGTTCTCAAGGTTGTTGCAATGGGGTCTATGTGAA +>EAS188_4:7:78:583:670 +CAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTT +>EAS188_4:7:78:583:670 +TAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCT +>EAS188_4:7:96:899:106 +TCTATAAAGGAAATCCCATCAGAATAACAATGGGC +>EAS188_4:7:96:899:106 +TTCCTGACAAGCAAATGCTAAGATAATTCATCATC +>EAS188_7:1:115:683:296 +AACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTC +>EAS188_7:1:115:683:296 +CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC +>EAS188_7:1:177:522:118 +TAAACTAAGCATCATAAATGAAGGGGAAATAAAGT +>EAS188_7:1:177:522:118 +TCTCAATTAATTGTTTTATAAAACCTGTGAGTTTT +>EAS188_7:1:290:286:763 +TTAAAATTTAACAAAAGTAAATAAAACACATAGCT +>EAS188_7:1:290:286:763 +TTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTT +>EAS188_7:1:316:949:122 +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCT +>EAS188_7:1:316:949:122 +TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT +>EAS188_7:1:77:251:446 +CAGCATGGTTGTACTGGGCAATACATGAGATTATT +>EAS188_7:1:77:251:446 +TTATCATGACTCTATCCCAAATGCCCAATTACGTC +>EAS188_7:2:152:765:744 +ACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGT +>EAS188_7:2:152:765:744 +TTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACAT +>EAS188_7:2:172:622:707 +ACATGGCTGATTATGAAAACAATGTTCCCCAGATA +>EAS188_7:2:172:622:707 +TTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCC +>EAS188_7:2:187:227:818 +CAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAG +>EAS188_7:2:187:227:818 +CCCCCTTGCAACAACCTTGAGAACCCCAGGGAATT +>EAS188_7:2:19:736:559 +AAGACTTCATCAAGATATGTAGTCATCAGACTATC +>EAS188_7:2:19:736:559 +TGAACCTATGAGTCACAGGTATTCCTGAGGAAAAA +>EAS188_7:2:218:877:489 +TATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTG +>EAS188_7:2:259:219:114 +GAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCA +>EAS188_7:2:259:219:114 +TTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAA +>EAS188_7:3:100:735:530 +GCACACTTTAAATCAACAACAGTAAAATAAAACAA +>EAS188_7:3:100:735:530 +TGATGATGGTTACACTAAAAGCCCATACTTTACTG +>EAS188_7:3:101:572:491 +CAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTG +>EAS188_7:3:101:572:491 +TTCCAAAGATGAAACGCGTAACTGCGCTCTCATTC +>EAS188_7:3:13:122:187 +AGCATTTTGTCAGTTACCAAATGTGTTTATTACCA +>EAS188_7:3:13:122:187 +GACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCCTT +>EAS188_7:3:15:568:42 +TTTTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS188_7:3:15:568:42 +TTTTTTTTTTTGTTTTTTTTTTTTTTTTTTTTATA +>EAS188_7:3:182:104:921 +ATCAAGATATGTAGTCATCAGACTATCTAAAGTCA +>EAS188_7:3:182:104:921 +CACGAATGTTATGCCCTGCTAAACTAAGCATCATA +>EAS188_7:3:200:712:439 +CGTCACCCGGTCCCTGCCCCATCTCTTGTAATCTC +>EAS188_7:3:200:712:439 +GTTGGGAGATTTTTAATGATTCCTCAATGTTAAAA +>EAS188_7:3:296:224:724 +ATAGTTGAAAGCTCTAACAATAGACTAAACCAAGC +>EAS188_7:3:296:224:724 +TGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC +>EAS188_7:3:76:333:905 +AATTGTGTCCATGTACACACGCTGTCCTATGTACT +>EAS188_7:3:76:333:905 +TTTCTGCCCCCAGCATGGTTGTACTGGGCAATACA +>EAS188_7:4:164:719:947 +AAATTAACATTACAACAGGAACAAAACCTCATATA +>EAS188_7:4:164:719:947 +ACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCT +>EAS188_7:4:171:104:398 +AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC +>EAS188_7:4:171:104:398 +CAATTAATATAATTGTGTCCATGTACACACGCTGT +>EAS188_7:4:21:443:404 +AGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCC +>EAS188_7:4:21:443:404 +TTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCT +>EAS188_7:4:238:441:727 +GTAATTGAAAAATTCATTTAAGAAATTACAAAATA +>EAS188_7:4:259:869:641 +GTTGGGAGATTTTTAATGATTCCTCAATGTTAAAA +>EAS188_7:4:259:869:641 +TGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCA +>EAS188_7:4:92:693:228 +AAGGTTTTATAAAAAAATTAATTGAGACTACAGAG +>EAS188_7:4:92:693:228 +AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCT +>EAS188_7:5:112:51:128 +AGACCCCCTTGCAACAACCTTGAGAACCCCAGGGA +>EAS188_7:5:112:51:128 +CCCAATGGACCTGTGATATCTGGATTCTGGGAAAT +>EAS188_7:5:115:249:673 +TAAAGAAAAAAAAACCTGTCAAACACGAATGTTAT +>EAS188_7:5:115:249:673 +TAAATACACACAAAAGTACAAAACTCACAGGTTTT +>EAS188_7:5:163:982:695 +CTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGA +>EAS188_7:5:163:982:695 +TCAAAGAATGCCAGGAAGATACATTGCAAGACAGA +>EAS188_7:5:308:354:124 +GCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTT +>EAS188_7:5:308:354:124 +TGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT +>EAS188_7:5:74:329:459 +ACAGAGCAACTAGGTAAAAAATTAACATTACAACA +>EAS188_7:5:74:329:459 +TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG +>EAS188_7:6:107:447:488 +TAATTGGTACAATGTACAATATTCTGATGATGGTT +>EAS188_7:6:107:447:488 +TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA +>EAS188_7:6:107:636:642 +AAGTCTTTCCTGACAAGCAAATGCTAAGATAATTC +>EAS188_7:6:107:636:642 +GTCATCTATAAAGGAAATCCCATCAGAATAACAAT +>EAS188_7:6:11:994:584 +GAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAG +>EAS188_7:6:11:994:584 +GGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTT +>EAS188_7:6:191:540:493 +AAAAGCCCATACTTTACTGCTACTCAATATATCCA +>EAS188_7:6:191:540:493 +GTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGT +>EAS188_7:6:194:998:663 +ACCTAACACAAGACTACCCAGATTCATAAAACAAA +>EAS188_7:6:194:998:663 +TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA +>EAS188_7:6:205:873:464 +AGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAG +>EAS188_7:6:205:873:464 +CCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATT +>EAS188_7:6:46:122:479 +AAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTT +>EAS188_7:6:46:122:479 +AAGTGAGAAGTTTGGAAGAACTATTTGAGGAAGTA +>EAS188_7:7:19:886:279 +CCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTA +>EAS188_7:7:19:886:279 +GAAAGGAGCATTTTGTCAGTTACCAAATGTGTTTA +>EAS188_7:7:213:309:373 +TAGACATCTAAATGAAAGAGGCTCAAAGAATGCCA +>EAS188_7:7:213:309:373 +TTAAAAATGAACAGAGCTTTCAAGAAGTATGAGAT +>EAS188_7:7:243:876:758 +AGCCCAGATCCAGATTGCTTGTGGTCTGACAGGCT +>EAS188_7:7:243:876:758 +CCCAGATACCATCCCTGTCTTACTTCCAGCTCCCC +>EAS188_7:7:67:719:786 +GGATGAGAAATTACCTAATTGGTACACTGTACAAT +>EAS188_7:7:67:719:786 +TAAAAAAAAAAAAGCAAAAACAAAAACTATGCTAA +>EAS188_7:8:60:182:718 +GTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTT +>EAS188_7:8:60:182:718 +TGACACCCAACTAATATTTGTCTGAGCAAAACAGT +>EAS188_7:8:64:350:174 +CCTAACACAAGACTACCCAGATTCATAAAACAAAT +>EAS188_7:8:64:350:174 +GTTCTACGCAAACAGAAACCAAATGAGAGAAGGAG +>EAS192_3:1:114:19:769 +AAACACGAATGTTATGCCCTGCTAAACTAAGCATC +>EAS192_3:1:114:19:769 +TAAAGTCAACATGAAGGAAAAAAATTCTAAAATCA +>EAS192_3:1:225:195:543 +AACAACCTTGAGAACCCCAGGGAATTTGTCAATGT +>EAS192_3:1:225:195:543 +GGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTG +>EAS192_3:3:194:378:230 +AATACTACTAGACCTAAGAGGGATGAGAAATTACC +>EAS192_3:3:194:378:230 +ATGAGAGAAGGAGTAGCTATACTTATATCAGATAA +>EAS192_3:3:221:881:916 +TAACACAAGACTACCCAGATTCATAAAACAAATAC +>EAS192_3:3:221:881:916 +TAATTCTAAATCTAGAACAAAATTAAAATTTAACA +>EAS192_3:3:257:611:440 +ACCCTGAGAGATTCTGCAGCCCAGATCCAGATTGC +>EAS192_3:3:257:611:440 +GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC +>EAS192_3:3:27:973:518 +CTGATTATGAAAACAATGTTCCCCAGATACCATCC +>EAS192_3:3:27:973:518 +TCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGT +>EAS192_3:3:285:349:797 +ATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTC +>EAS192_3:3:285:349:797 +GTTTTAAAAAACCAATAATTGAGACTACAGAGCAA +>EAS192_3:3:309:187:267 +ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC +>EAS192_3:3:309:187:267 +GGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAA +>EAS192_3:3:88:866:774 +ATGTAACAAATCTGCTCTTGTACTTCTAAATCTAT +>EAS192_3:3:88:866:774 +TTGTTTTCCACTTTGGAAAACAATTTGGTAATTTC +>EAS192_3:4:184:237:476 +ATCATACAATGATAAAAAGATCAATTCAGCAAGAA +>EAS192_3:4:184:237:476 +ATGAACTAACTATATGCTGTTTACAAGAAACTCAT +>EAS192_3:4:255:549:422 +AAGTCATCTATAAAGGAAATCCCATCAGAATAACA +>EAS192_3:4:255:549:422 +CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAAT +>EAS192_3:4:293:168:240 +ATGAACAGAGCTTTCAAGAAGTATGAGATTATGTA +>EAS192_3:4:293:168:240 +CAAAGAATGCCAGGAAGATACATTGCAAGACAGAC +>EAS192_3:4:312:915:751 +AATAAAGTCAAGTCTTTCCTGACAAGCAAAAGCTA +>EAS192_3:4:312:915:751 +ATCTATAAAGGAAATCCCATCAGAATAACAATGGG +>EAS192_3:4:63:5:870 +AAAGAAAAAAGAATTTTAAAAATGAACAGAGCTTT +>EAS192_3:4:63:5:870 +GAAAGAGGCTCAAAGAATGCCAGGAAGATACATTG +>EAS192_3:5:197:914:256 +ACTAGACCTAAGAGGGATGAGAAATTACCTAATTG +>EAS192_3:5:197:914:256 +TCAGATAAAGCACACTTTAAATCAACAACAGTAAA +>EAS192_3:5:223:142:410 +CTATTCAGTTCTAAATATAGAAATTGAAACAGCTG +>EAS192_3:5:27:577:849 +AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA +>EAS192_3:5:27:577:849 +TTAGACATCTAAATGAAAGAGGCTCAAAGAATGCC +>EAS192_3:5:287:334:110 +GATGAATACTAAGATTGATGTAGCAGCTTTTGCAA +>EAS192_3:5:287:334:110 +TATGTAAAGTAACTGAACCTATGAGTCACAGGTAT +>EAS192_3:6:116:464:261 +CAATGATAAAAAGATCAATTCAGCAAGAAGATATA +>EAS192_3:6:116:464:261 +CTATATGCTGTTTACAAGAAACTCATTAATAAAGA +>EAS192_3:6:170:169:57 +GGCTTGACCTCTGGTGACTGCCAGAGCTGCTGGCC +>EAS192_3:6:170:169:57 +TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA +>EAS192_3:6:175:437:950 +CCAAATTCCCAATTACGTCCTATCTTCTTCTTAGG +>EAS192_3:6:175:437:950 +CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC +>EAS192_3:6:185:868:496 +CCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTT +>EAS192_3:6:185:868:496 +GATTTTTAATGATTCCTCAATGTTAAAATGTCTAT +>EAS192_3:6:201:195:757 +CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC +>EAS192_3:6:201:195:757 +TATGTACTTATCATGACTCTATCCCAAATTCCCAA +>EAS192_3:6:216:292:528 +GGGAGATTTTTAATGATTCCTCAATGTTAAAATGT +>EAS192_3:6:216:292:528 +TAATGATGCCCCTTGGCCATCACCCAGTCCCTGCC +>EAS192_3:6:235:505:553 +GGTGGAAAAAGATGTTCTACGCAAACAGAAACCAA +>EAS192_3:6:235:505:553 +GTAAAAAATTAACATTACAACAGGAACAAAACCTC +>EAS192_3:6:326:887:180 +CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT +>EAS192_3:6:326:887:180 +TGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGC +>EAS192_3:6:45:183:25 +CAGCGGAAACCTTACAAGCCAGAAGAGATTGGATC +>EAS192_3:6:45:183:25 +CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT +>EAS192_3:7:149:354:667 +CTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTT +>EAS192_3:7:149:354:667 +GAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTC +>EAS192_3:7:298:644:697 +CTATGTTTCTTATCTGCACATTACTACCCTGCAAT +>EAS192_3:7:298:644:697 +TCCACGTCTCATCTAGGGGAACAGGGAGGTGCACT +>EAS192_3:7:66:891:294 +AGCACACTTTAAATCAACAACAGTAAAATAAAACA +>EAS192_3:7:66:891:294 +TAATTGGTACAATGTACAATATTCTGATGATGGTT +>EAS192_3:7:78:692:671 +AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA +>EAS192_3:7:78:692:671 +CAGCAAGAGAAAAGCATACAGTCATCTATAAAGGA +>EAS192_3:7:93:945:176 +CCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTC +>EAS192_3:7:93:945:176 +GTGAGCCATCACAATGAACAACAGGAAGAAAAGGT +>EAS192_3:8:6:104:118 +AAGAATTTTAAAAATGAACAGAGCTTTCAAGAAGT +>EAS192_3:8:6:104:118 +TCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGA +>EAS192_3:8:6:237:885 +AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA +>EAS192_3:8:6:237:885 +TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS1_103:1:151:159:43 +AACCTCATACACACACATGGTTTAGGGGTATAATA +>EAS1_103:1:151:159:43 +TCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC +>EAS1_103:1:228:736:747 +AAGATATGTAGTCATCAGACTATCTAAAGTCAACA +>EAS1_103:1:228:736:747 +TTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACA +>EAS1_103:1:274:176:479 +CCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTAT +>EAS1_103:1:274:176:479 +GAAATGCTTTACTGTCATAACTATGAAGAGACTAT +>EAS1_103:1:2:831:692 +GTGTTCTCATCAACCTCATACACACACATGGTTTA +>EAS1_103:1:2:831:692 +TCCAGCTCCCTGTCACCCAATGGACCTGTGATATC +>EAS1_103:2:184:980:396 +AAGGAGGTCATCATACAATGATAAAAAGATCAATT +>EAS1_103:2:184:980:396 +ACATGAACTAACTATATGCTGTTTACAAGAAACTC +>EAS1_103:2:226:302:758 +GGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGG +>EAS1_103:2:226:302:758 +TGCTTGTGGTCTGACAGGCTGCAACTTTGAGCGNT +>EAS1_103:2:234:167:381 +AAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT +>EAS1_103:2:234:167:381 +AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC +>EAS1_103:2:235:805:373 +TATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCA +>EAS1_103:2:235:805:373 +TTTACTGTCATAACTATGAAGAGACTATTTCCAGA +>EAS1_103:2:307:252:632 +ACCATCCTGCTAAATACATATGCACCTAACACAAG +>EAS1_103:2:307:252:632 +ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG +>EAS1_103:3:253:175:31 +CAAACAGAAACCAAATGAGAGAAGGAGTAGCTATA +>EAS1_103:3:253:175:31 +TTCATAAAACAAATACTACTAGACCTAAGAGGGAT +>EAS1_103:3:277:921:474 +AAAATATAGTTGAAAGCTCTAACAATAGACTAAAC +>EAS1_103:3:277:921:474 +AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGA +>EAS1_103:3:320:505:814 +ACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT +>EAS1_103:3:320:505:814 +CTGTCTTGATTTACTTGTTGTTGGTTTTCTTTTTC +>EAS1_103:3:323:196:855 +ACAAGCAAATGCTAAGATAATTCATCATCACTAAA +>EAS1_103:3:323:196:855 +TAAAAAATTAACATTACAACAGGAACAAAACCTCA +>EAS1_103:3:41:474:283 +TGAACCACACATTAATACTATGTTTCTTATCTGCA +>EAS1_103:3:41:474:283 +TTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCC +>EAS1_103:4:143:560:194 +GTTGTACTGGGCAATACATGAGATTATTAGGAAAT +>EAS1_103:4:143:560:194 +TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC +>EAS1_103:4:164:79:134 +ACAATGGGCTTCTCAGCGGAAACCTTACAAGCCAG +>EAS1_103:4:164:79:134 +AGATAATTCATCATCACTAAACCAGTCCTATAAGA +>EAS1_103:4:231:815:626 +GATCAATACAGCAAGAAGATATAACCATCCTACTA +>EAS1_103:4:231:815:626 +GCTGTTTACAAGAAACTCATTAATAAAGACATGAG +>EAS1_103:4:235:899:847 +AGAAACTCATTAATAAAGACATGAGTTCAGGTAAA +>EAS1_103:4:235:899:847 +ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT +>EAS1_103:4:294:525:849 +AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA +>EAS1_103:4:294:525:849 +CTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAA +>EAS1_103:4:61:433:385 +ACTGTGAGCCATCACAATGAACAACAGGAAGAAAA +>EAS1_103:4:61:433:385 +GGATGGAGGGAAGAGGGACGCTGAAGCACTTTGAT +>EAS1_103:5:141:711:813 +TATTACCAGAGGGATGGAGGGAAGAGGGACGCTGA +>EAS1_103:5:141:711:813 +TATTTGTAATGAAAACTATATTTATGCTATTCAGT +>EAS1_103:5:188:20:592 +CTACTAGACCTAAGAGGGATGAGAAATTACCTAAT +>EAS1_103:5:188:20:592 +GAGTAGCTATACTTATATCAGATAAAGCACACTTT +>EAS1_103:5:285:241:560 +GAAATTACAAAATATAGTTGAAAGCTCTAACAATA +>EAS1_103:5:285:241:560 +TAAAGTAACTGAACCTATGAGTCACAGGTATTCCT +>EAS1_103:5:319:165:698 +ATCACCCAGTCCCTGCCCCATATCTTGTAATCTCT +>EAS1_103:5:319:165:698 +TGACACCCAACTAATATTTGTCTGAGCAAAACAGT +>EAS1_103:6:7:858:437 +CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG +>EAS1_103:6:7:858:437 +CCATTTCTTTTGGCATTTGCCTTCAGACCCTACAC +>EAS1_103:7:112:578:782 +AAAATAAAACACATAGCTAAAACTAAAAAAGCAAA +>EAS1_103:7:112:578:782 +CAGATTCATAAAACAAATACTACTAGACCTAAGAG +>EAS1_103:7:139:578:951 +AAAAACATGAACTAACTATATGCTGTTTACAAGAA +>EAS1_103:7:139:578:951 +GAGGTCATCATACAATGATAAAAAGATCAATTCAG +>EAS1_103:7:166:84:766 +ATAACACAAGACTACCCAGATTCATAAAACAAATA +>EAS1_103:7:166:84:766 +GGAAAAAGATGTTCTACGCAAACAGAAACCAAATG +>EAS1_103:7:311:100:539 +AAATTCTTCATCCTGGACCCTGAGAGATTCTGCAG +>EAS1_103:7:311:100:539 +CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA +>EAS1_103:7:313:83:546 +TCATTCACTCCAGCTCCCTGTCACCCAATGGACCT +>EAS1_103:7:313:83:546 +TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA +>EAS1_103:7:53:783:78 +AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA +>EAS1_103:7:53:783:78 +TGCAAGACAGACTTCATCAAGATATGTAGTCATCA +>EAS1_105:1:115:226:443 +AAATCTGCGCTTGTACTTCTAAATCTATAAAAAAA +>EAS1_105:1:115:226:443 +ATATAACCATCCTACTAAATACATATGCACCTAAC +>EAS1_105:1:141:415:738 +AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA +>EAS1_105:1:141:415:738 +TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT +>EAS1_105:1:234:185:359 +AAGGAGTAGCTATACTTATATCAGATAAAGCACAC +>EAS1_105:1:234:185:359 +CAGATTCATAAAACAAATACTACTAGACCTAAGAG +>EAS1_105:1:28:745:352 +ATATGCACCTAACACAAGACTACCCAGATTCATAA +>EAS1_105:1:28:745:352 +CTTCTAAATCTATAACAAAATTAAAATTTAACAAA +>EAS1_105:1:297:283:948 +CCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTG +>EAS1_105:1:297:283:948 +TGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTC +>EAS1_105:1:329:407:872 +TTCGTTTTTTTTTTTTTTTTTTCCCTTTTTTTTTT +>EAS1_105:1:3:903:957 +AAGCCAGAAGAGATTGGATCTAATTTTTGGACTTC +>EAS1_105:1:3:903:957 +ATTCATCATCACTAAACCAGTCCTATAAGAAATGC +>EAS1_105:1:45:239:851 +CTAAGAGGGATGAGAAATTACCTAATTGGTACAAT +>EAS1_105:1:45:239:851 +TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA +>EAS1_105:1:87:430:995 +GAAAAGAGTTAAAAACATGAACTAACTATATGCTG +>EAS1_105:1:87:430:995 +TACTCACCATCATAAATACACACAAAATTACAAAA +>EAS1_105:2:110:584:649 +CCATGTACACACGCTGTCCTATGTACTTATCATGA +>EAS1_105:2:110:584:649 +CTGTAATGATGCCCCTTGGCCATCACCCGGTCCCT +>EAS1_105:2:146:374:692 +AAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAA +>EAS1_105:2:146:374:692 +ATTAAATTCCCCCACTTAAGAGATATAGATTGGCA +>EAS1_105:2:179:532:82 +CCATCACCCGGTCCCTGCCCCATCTCTTGTAATCT +>EAS1_105:2:179:532:82 +TGTACTTATCATGACTCTATCCCAAATTCCCAATT +>EAS1_105:2:280:662:939 +AAGAGAAAAGCATACAGTCATCTATAAAGGAAATC +>EAS1_105:2:280:662:939 +CCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAA +>EAS1_105:2:299:360:220 +ATTCAGTTCTAAATATAGAAATTGAAACAGCTGTG +>EAS1_105:2:299:360:220 +GAAGAACTTAGATGCCCTCTTCTTCCAAAGATGAA +>EAS1_105:2:301:161:195 +ACAGTAAAATAAAACAAAGGAGGTCATCATACAAT +>EAS1_105:2:301:161:195 +GTACAATGTACAATATTCTGATGATGGTTACACTA +>EAS1_105:3:176:431:647 +ATCATGACTCTATCCCAAATTCCCAATTACGTCCT +>EAS1_105:3:176:431:647 +CCATCACCCAGTCCCTGCCCCATCTCTTGTAATCT +>EAS1_105:3:182:404:693 +ACCTCATACACACACATGGTTTAGGGGTATAATAC +>EAS1_105:3:182:404:693 +GCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCAT +>EAS1_105:3:232:364:583 +CAATTAATTGAGACTACAGAGCAACTAGGTAAAAA +>EAS1_105:3:232:364:583 +TCATTAATAAAGACATGAGTTCAGGTAAAGGGGTG +>EAS1_105:3:308:66:538 +CGCTTGTACTTCTAAATCTATAACAAAATTAAAAT +>EAS1_105:3:308:66:538 +TATAACCATCCTACTAAATACATATGCACCTAACA +>EAS1_105:3:329:177:267 +CATGAGATTATTAGGAAATGCTTTACTGTCATAAC +>EAS1_105:3:329:177:267 +TACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA +>EAS1_105:3:7:35:528 +TTTTTTTTTTGTTCTTTACTCTTTTTTTTTTTTTT +>EAS1_105:3:86:823:683 +ACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCG +>EAS1_105:3:86:823:683 +CAGTTCTAAATATAGAAATTGAAACAGCTGTGTTT +>EAS1_105:6:134:853:558 +AATATAATTGTGTCCATGTACACACGCTGTCCTAT +>EAS1_105:6:134:853:558 +GGAACAGGGAGGTGCACTAATGCGCTCCACGCCCA +>EAS1_105:6:162:594:858 +CTACCACAGGGGGCTGCGCGGTTTCCCATCATGAA +>EAS1_105:6:162:594:858 +GTGTTCTCATCAACCTCATACACACACATGGTTTA +>EAS1_105:6:172:827:592 +AATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC +>EAS1_105:6:172:827:592 +TAGATTGGCAGAACAGATTTAAAAACATGAACTAA +>EAS1_105:6:23:885:274 +ACAAAGGAGGTCATCATACAATGATAAAAAGATCA +>EAS1_105:6:23:885:274 +CTACTACTCAATATATCCATGTAACAAATCTGCGC +>EAS1_105:6:267:953:459 +ACCTTACAAGCCAGAAGAGATTGGATCTAATTTTT +>EAS1_105:6:267:953:459 +CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA +>EAS1_105:7:110:355:323 +ACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGG +>EAS1_105:7:110:355:323 +CCCAATGGACCTGTGATATCTGGATTCTGGGAAAT +>EAS1_105:7:168:247:414 +AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATA +>EAS1_105:7:168:247:414 +TAAATTCCCCCACTTAAGAGATATAGATTGGCAGA +>EAS1_105:7:289:472:86 +ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA +>EAS1_105:7:45:462:455 +TCCCTGTCTTACTTCCAGCTCCCCAGACGGAACGC +>EAS1_105:7:45:462:455 +TCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCA +>EAS1_105:7:57:722:347 +ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT +>EAS1_105:7:57:722:347 +CGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTG +>EAS1_105:8:160:130:351 +CAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAAT +>EAS1_105:8:160:130:351 +TGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAA +>EAS1_105:8:179:119:876 +ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT +>EAS1_105:8:179:119:876 +TTAAAAACATGAACTAACTATATGCTGTTTACCAG +>EAS1_105:8:24:718:322 +AACTCATTAATAATGTCATGAGTTCAGGTAAAGGG +>EAS1_105:8:24:718:322 +ACAATTAATTGAGACTACAGACCAATTATGTAAAA +>EAS1_105:8:254:617:73 +AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +>EAS1_105:8:254:617:73 +GCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTC +>EAS1_105:8:256:404:584 +ACTGTCCTATGTACTTATCATGACTCTATCCCAAA +>EAS1_105:8:256:404:584 +GTTTCTGCCCCCAGCATGGTTGTACTGGGCAATAC +>EAS1_105:8:96:720:940 +AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC +>EAS1_105:8:96:720:940 +TAAGATAATTCATCATCACTAAACCAGTCCTATAA +>EAS1_108:1:111:796:737 +ATCATGACTCTATCCCAAATTCCCAATTACGTCCT +>EAS1_108:1:111:796:737 +CCCCAGCATGGTTGTACTGGGCAATACATGTGATT +>EAS1_108:1:131:518:588 +AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA +>EAS1_108:1:131:518:588 +ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG +>EAS1_108:1:148:286:316 +AGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCT +>EAS1_108:1:148:286:316 +CCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGT +>EAS1_108:1:155:809:543 +AAATTTAACAAAAGTAAATAAAACACATAGCTAAA +>EAS1_108:1:155:809:543 +TACATATGCACCTAACACAAGACTACCCAGATTCA +>EAS1_108:1:16:438:245 +TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGA +>EAS1_108:1:189:863:213 +CTACTAGACCTAAGAGGGATGAGAAATTACCTAAT +>EAS1_108:1:189:863:213 +TATACTTATATCAGATAAAGCACACTTTAAATCAA +>EAS1_108:1:242:419:512 +AATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAA +>EAS1_108:1:242:419:512 +CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT +>EAS1_108:1:277:194:143 +TGGGCTGTAATGATGCCCCTTGGCCATCACCCGGT +>EAS1_108:1:277:194:143 +TTTTTAATGATTCCTCAATGTTAAAATGTCTATTT +>EAS1_108:1:328:614:638 +AAAGGTTGTTGGGAGATTTTTAATGATTCCTCAAT +>EAS1_108:1:328:614:638 +ACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGC +>EAS1_108:1:33:779:821 +AATGTTATGCCCTGCTAAACTAAGCATCATAAATG +>EAS1_108:1:33:779:821 +TGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAA +>EAS1_108:1:49:911:980 +ACAATGTACAATATTCTGATGATGGTTACACTAAA +>EAS1_108:1:49:911:980 +GGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGG +>EAS1_108:1:65:787:74 +TGTAATGAAAACTATATTTATGCTATTCAGTTCTA +>EAS1_108:2:102:543:160 +CAACAGGAACAAAACCTCATATATCAATATTAACT +>EAS1_108:2:102:543:160 +CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA +>EAS1_108:2:116:966:193 +ATTAACATTACAACAGGAACAAAACCTCATATATC +>EAS1_108:2:116:966:193 +GACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT +>EAS1_108:2:170:326:433 +CTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTC +>EAS1_108:2:170:326:433 +TTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGG +>EAS1_108:2:176:653:957 +AAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA +>EAS1_108:2:176:653:957 +ACATTACAACAGGAACAAAACCTCATATATCAATA +>EAS1_108:2:204:737:61 +AAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGT +>EAS1_108:2:204:737:61 +TCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG +>EAS1_108:2:240:593:842 +ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT +>EAS1_108:2:240:593:842 +CATGGTTTAGGGGTATAATACCTCTACATGGCTGA +>EAS1_108:2:266:994:429 +ACAGGTTTTATAAAACAATTAATTGAGACTACAGA +>EAS1_108:2:266:994:429 +TGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAG +>EAS1_108:2:316:176:543 +ATGTCTATTTTTGTCTTGACACCCAACTAATATTT +>EAS1_108:2:316:176:543 +CATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCC +>EAS1_108:2:49:271:588 +CACATGGTTTAGGGGTATAATACCTCTACATGGCT +>EAS1_108:2:49:271:588 +GCTGCGCGGTTTCCCATCATGAAGCACTGAACTTC +>EAS1_108:2:62:879:264 +AATGAAAACTATATTTATGCTATTCAGTTCTAAAT +>EAS1_108:2:62:879:264 +GGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAA +>EAS1_108:2:82:879:246 +AAGAGGGACGCTGAAGAATTTTGATGCCCTCTTCT +>EAS1_108:2:82:879:246 +ACAACTGTGAGCCATCACAATGAACAACAGGAAGA +>EAS1_108:2:85:580:481 +AAAAAAGTAAATAAAACACATAGCTAAAACTAAAA +>EAS1_108:2:85:580:481 +CTAACACAAGACTACCCAGATTCATAAAACAAATA +>EAS1_108:3:216:988:883 +AAGCCAACACAATGAACAACAGGAAGAAAAGGTCT +>EAS1_108:3:216:988:883 +AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT +>EAS1_108:3:24:319:429 +GTTATGCCCTGCTAAACTAAGCATCATAAATGAAG +>EAS1_108:3:24:319:429 +TAATAAATACACACAAAAGTACAAAACTCACAGGT +>EAS1_108:3:75:934:439 +AAGAGACTATTGCCAGATGAACCACACATTAATAC +>EAS1_108:3:75:934:439 +CCCATCATGAAGCACTGAACTTCCACGTCTCATCT +>EAS1_108:3:82:356:253 +AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATA +>EAS1_108:3:82:356:253 +GAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGC +>EAS1_108:4:163:611:211 +TAAAGTCAACATGAAGGAAAAAAATTCTAAAATCA +>EAS1_108:4:163:611:211 +TGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC +>EAS1_108:4:248:753:731 +TATGCTATTCAGTTCTAAATATAGAAATTGAAACA +>EAS1_108:4:248:753:731 +TGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGA +>EAS1_108:4:31:622:216 +ATTTAACAAAAGTAAATAAAACACATAGCTAAAAC +>EAS1_108:4:37:604:389 +ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG +>EAS1_108:4:37:604:389 +TCATAAAACAAATACTACTAGACCTAAGAGGGATG +>EAS1_108:4:75:166:463 +GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT +>EAS1_108:4:75:166:463 +TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT +>EAS1_108:4:91:521:517 +CCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATG +>EAS1_108:4:91:521:517 +CCCTGCAATTAATATAATTGTGTCCATGTACACAC +>EAS1_108:5:115:193:231 +GAAGAGATTGGATCTAATTTTTGGACTTCTTAAAG +>EAS1_108:5:115:193:231 +TATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAA +>EAS1_108:5:11:555:330 +CGCTTGTACTTCTAAATCTATAAAAAAATTAAAAT +>EAS1_108:5:11:555:330 +GGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTT +>EAS1_108:5:175:149:296 +AAAAATTAACATTACAACAGGAACAAAACCTCATA +>EAS1_108:5:175:149:296 +AAGGGGAAATAAAGTCAAGCCTTTCCTGACAAGCA +>EAS1_108:5:180:905:36 +CCTCCGTGTCCTCCCATCTGGCCTCGTCCACACTG +>EAS1_108:5:180:905:36 +TACTATGTTTCTTATCTGCACATTACTACCCTGCA +>EAS1_108:5:229:717:121 +ACTATGAAGAGACTATTGCCAGATGAACCACACAC +>EAS1_108:5:229:717:121 +TTCTTCTGAGGGAAGAACAGCTTAGGTATCAATTT +>EAS1_108:5:321:712:224 +AGATAAAGCACACTTTAAATCAACAACAGAAAAAT +>EAS1_108:5:321:712:224 +ATGAGAAATTACCTAATTGGTACAATGTACAATAT +>EAS1_108:5:89:942:84 +AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA +>EAS1_108:5:89:942:84 +TACAAAACTCACAGGTTTTATAAAACAATTAATTG +>EAS1_108:6:159:493:275 +ACAAAACTCACAGGTTTTATAAAACAATTAATTGA +>EAS1_108:6:159:493:275 +TGCTGTTTACAAGAAACTCATTAATAAAGACATGA +>EAS1_108:6:165:464:123 +CATACACACACATGGTTTAGGGGTATAATACCTCT +>EAS1_108:6:165:464:123 +GTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCA +>EAS1_108:6:222:579:961 +AGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAG +>EAS1_108:6:222:579:961 +CAGAAACCTTACAAGCCAGAAGAGATTGGATCTAA +>EAS1_108:6:71:187:824 +AGACAGACTTCATCAAGATATGTAGTCATCAGACT +>EAS1_108:6:71:187:824 +TCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCT +>EAS1_108:6:73:735:329 +AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA +>EAS1_108:6:73:735:329 +TAGCTATACTTATATCAGATAAAGCACACTTTAAA +>EAS1_108:6:77:48:860 +CTCTGTCTTGATTTACTTGTTGTTTGTTTTCTGTT +>EAS1_108:6:77:48:860 +TAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGC +>EAS1_108:6:94:294:387 +ACCATCATAAATACACACAAAAGTACAAAACTCAC +>EAS1_108:6:94:294:387 +GAATGTTATGCCCTGCTAAACTAAGCATCATAAAT +>EAS1_108:6:95:235:746 +CTAAGCATCATAAATGAAGGGGAAATAAAGTCAAG +>EAS1_108:6:95:235:746 +TCTAAAATCAGCAAGAGAAAAGCATACAGACATCT +>EAS1_108:7:108:440:208 +CCCATCCTACTAAATACATATGCACCTAACACAAG +>EAS1_108:7:108:440:208 +TTCAGGTAAAGGGGAGGAAAAAGATGTTCTACGCA +>EAS1_108:7:222:538:267 +ATAATTGTGTCCATGTACACACGCTGTCCTATTTA +>EAS1_108:7:222:538:267 +TCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTT +>EAS1_108:7:266:556:252 +CCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAA +>EAS1_108:7:266:556:252 +GATTTGGTGGAAGACATAATCCCACGCTTCCTATG +>EAS1_108:7:82:926:112 +CAATGTCAGGGAAGGAGCATTTTGTCAGTTGCCAA +>EAS1_108:7:82:926:112 +CTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTC +>EAS1_108:8:118:440:850 +AATTGAAAAATTCATTTAAGAAATTACAAAATATA +>EAS1_108:8:129:477:427 +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTT +>EAS1_108:8:129:477:427 +TACACACACATGGTTTAGGGGTATAATACCTCTAC +>EAS1_108:8:19:929:765 +AAAAACATGAACTAACTATATGCTGTTTACAAGAA +>EAS1_108:8:19:929:765 +ATCAACAACAGTAAAATAAAACAAAGGAGGTCATC +>EAS1_93:1:131:946:353 +TCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTG +>EAS1_93:1:131:946:353 +TGTACACACGCTGTCCTATGTACTTATCATGACTC +>EAS1_93:1:179:629:513 +GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA +>EAS1_93:1:179:629:513 +GTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAG +>EAS1_93:1:20:635:509 +CAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGT +>EAS1_93:1:20:635:509 +TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT +>EAS1_93:1:214:784:690 +AAATCCCATCAGAATAACAATGGGCTTCTCAGCGG +>EAS1_93:1:214:784:690 +GATAATTCATCATCACTAAACCAGTCCTATAAGAA +>EAS1_93:1:216:381:608 +TAATTGTGTCCATGTACACTCGCTGTCCTATGTAC +>EAS1_93:1:216:381:608 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS1_93:1:253:59:242 +CCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCA +>EAS1_93:1:253:59:242 +TTTGTCTTGACACCCAACTAATATTTGTCTGAGCA +>EAS1_93:1:264:988:663 +CGAGGGGAACAGGGAGGTGCACTAATGCGCTCCAC +>EAS1_93:1:264:988:663 +TGGCTGATTATGAAAACAATGTTCCCCAGATACCA +>EAS1_93:1:92:213:217 +ACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT +>EAS1_93:1:92:213:217 +TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT +>EAS1_93:2:173:995:93 +GCTGGAGGGAAGAGGGACGCTGAAGAACTTTGATG +>EAS1_93:2:173:995:93 +TAATGAAAACTATATTTATGCTATTCAGTTCTAAA +>EAS1_93:2:286:923:549 +TCAAATGAACTTCTGTAATTGAAAAATTCATTTAA +>EAS1_93:2:30:466:652 +AAAAATGAACAGAGCTTTCAAGAAGTATGAGATTA +>EAS1_93:2:30:466:652 +AAGAGGCTAAAAGAATGCCAGGAAGATACATTGCA +>EAS1_93:2:313:711:530 +ACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTT +>EAS1_93:2:313:711:530 +TAATTGAGACTACAGAGCAACTAGGTAAAAAATTA +>EAS1_93:3:181:93:694 +ACTCATTAATAAAGACATGAGTTCAGGTAAAGGGG +>EAS1_93:3:181:93:694 +TTAATTGAGACTACAGAGCAACTAGGTAAAAAATT +>EAS1_93:3:79:879:15 +AGACTACAGAGCAACTAGGTAAAAAATTAACATTA +>EAS1_93:3:79:879:15 +AGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA +>EAS1_93:4:160:896:275 +AAAACTATATTTATGCTATTCAGTTCTAAATATAG +>EAS1_93:4:160:896:275 +AGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTC +>EAS1_93:4:321:271:138 +GTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA +>EAS1_93:4:321:271:138 +TTTGGTGGAAGACATAATCCCACGCTTCCTATGGA +>EAS1_93:4:325:352:67 +ATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAA +>EAS1_93:4:325:352:67 +TACAGAGCAACTAGGTAAAAAATTAACATTACAAC +>EAS1_93:5:197:52:58 +AAATGAACAGAGCTTTCAAGAAGTATGAGATTATG +>EAS1_93:5:197:52:58 +TCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGA +>EAS1_93:5:246:177:525 +CATCATAAATACACACAAAAGTAAAAAACTCACAG +>EAS1_93:5:246:177:525 +TCTTAAAGAAAAAAAAACCTGTCAAACACGAATGT +>EAS1_93:5:256:444:399 +CTGCTACTCAATATATCCATGTAACAAATCTGCGC +>EAS1_93:5:256:444:399 +GAAGATATAACCATCCTACTAAATACATATGCACC +>EAS1_93:5:292:122:666 +GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG +>EAS1_93:5:292:122:666 +TACTGTCATAACTATGAAGAGACTATTGTCAGATG +>EAS1_93:5:62:969:12 +TATAAAGGAAATCCCATCAGAATAACAATGGGCTT +>EAS1_93:5:62:969:12 +TCTTGCTAGAGATTTAGACATCTAAATGAAAGAGG +>EAS1_93:5:66:372:343 +ATTACAAAATATAGTTGAAAGATCTAACAATAGAC +>EAS1_93:5:66:372:343 +TATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGG +>EAS1_93:6:132:717:233 +AGAGAACTTCCCTGGAGGTCTGATGGAGTTTCTCC +>EAS1_93:6:132:717:233 +TGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGA +>EAS1_93:6:159:273:253 +TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA +>EAS1_93:6:191:948:257 +AACTAATATTTGTCTGAGCAAAACAGTCTAGATGA +>EAS1_93:6:191:948:257 +CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT +>EAS1_93:6:216:47:302 +AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT +>EAS1_93:6:216:47:302 +AATACTCACCATCATAAATACACACAAAAGTACAA +>EAS1_93:6:218:144:794 +GGGTGCATTGCTATGTTGCGGTCGCTTTGCCTCCT +>EAS1_93:6:218:144:794 +TTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTT +>EAS1_93:6:238:514:194 +AAACTATTTGAGGAAGTAATTGGGGAAAACCTCTT +>EAS1_93:6:238:514:194 +AATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGA +>EAS1_93:6:255:441:47 +AACAACAGTAAAATAAAACAAAGGAGGTCATCATA +>EAS1_93:6:255:441:47 +TGGTACAATGTACAATATTCTGATGATGGTTACAC +>EAS1_93:6:271:244:568 +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTT +>EAS1_93:6:271:244:568 +CTTGCAACAACCTTGAGAACCCCAGGGAATTTGTC +>EAS1_93:6:45:601:439 +AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG +>EAS1_93:6:45:601:439 +ATAAAAAGGGATTAAATTCCCCCACTTAAGAGATA +>EAS1_93:7:14:426:613 +AGGGAGGGAGGGAAGAGGGACGCTGAAGAACTTTG +>EAS1_93:7:14:426:613 +GTAATGAAAACTATATTTATGCTATTCAGTTCTAA +>EAS1_93:7:252:171:323 +GCTATTCAGTTCTAAATATAGAAATTGAAACAGCT +>EAS1_93:7:270:995:918 +AAAATTCATTTAAGAAATTACAAAATATAGTTGAA +>EAS1_93:7:319:280:57 +AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC +>EAS1_93:7:319:280:57 +TGCTAGAGATTTAGACATCTAAATGAAAGAGGCTC +>EAS1_93:8:13:325:483 +AAAAACATGAACTAACTATATGCTGTTTACAAGAA +>EAS1_93:8:13:325:483 +ATCATACAATGATAAAAAGATCAATTCAGCAAGAA +>EAS1_93:8:14:601:624 +AAAAGGTGATGTGTGTTCTCATCAACCTCATACAC +>EAS1_93:8:14:601:624 +CTGCGCTCTCATTCACTCCAGCTCCCTGTCAACCC +>EAS1_95:1:16:823:343 +AGACATAACCCCACGCTTCCTATGGAAAGGTTGTT +>EAS1_95:1:16:823:343 +TCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAA +>EAS1_95:1:196:533:921 +AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAG +>EAS1_95:1:196:533:921 +CCTGAGAGATTCTGCAGCCCAGATCCAGATTGCTT +>EAS1_95:1:202:341:984 +GGAGGTCATCATACAATGATAAAAAGATCAATTCA +>EAS1_95:1:202:341:984 +TGAACTAACTATATGCTGTTTACAAGAAACTCATT +>EAS1_95:1:249:986:224 +CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT +>EAS1_95:1:249:986:224 +TATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGA +>EAS1_95:1:261:504:780 +TCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTT +>EAS1_95:1:261:504:780 +TTTGTCTGAGAAAAACAGTCTAGATGAGAGAGAAC +>EAS1_95:1:301:54:240 +AACAGGGAGGTGCACTAATGCGCTCCACGCCCAAG +>EAS1_95:1:301:54:240 +CCCTGCAATTAATATAATTGTGTCCATGTACACAC +>EAS1_95:1:77:589:741 +AGTAAAATAAAACAAAGGAGGTCATCATACAATGA +>EAS1_95:1:77:589:741 +TGGTTACACTAAAAGCCCATACTTTACTGCTACTC +>EAS1_95:2:142:353:398 +CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA +>EAS1_95:2:142:353:398 +GGTAAAAAATTAACATTACAACAGGAACAAAACCT +>EAS1_95:2:162:503:769 +AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT +>EAS1_95:2:162:503:769 +AGGCTGCAACTGTGAGCCATCACAATGAACAACAG +>EAS1_95:2:198:691:595 +ACACACATGGTTTAGGGGTATAATACCTCTACATG +>EAS1_95:2:198:691:595 +CATGAAGCACTGAACTTCCACGTCTCATCTAGGGG +>EAS1_95:2:211:954:174 +AAAGAAAAAACAAAAACTATGCTAAGTATTGGTAA +>EAS1_95:2:211:954:174 +AGACCTAAGAGGGATGAGAAATTACCTAATTGGTA +>EAS1_95:2:228:915:631 +AAGATATGTAGTCATCAGACTATCTAAAGTCAACA +>EAS1_95:2:228:915:631 +ATTATTAAAGAAAAAAAAACCTGTCAAACACGAAT +>EAS1_95:2:278:918:892 +AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCT +>EAS1_95:2:278:918:892 +CCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAA +>EAS1_95:2:40:918:950 +AATATAATTGTGTCCATGTACACACGCTTTCCTTT +>EAS1_95:2:40:918:950 +GTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCT +>EAS1_95:3:268:523:511 +ACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGAT +>EAS1_95:3:268:523:511 +TGTCCATGTACACACGCTGTCCTATGTACTTATCA +>EAS1_95:3:303:970:243 +AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC +>EAS1_95:3:303:970:243 +CAGACTTCATCAAGATATGTAGTCATCAGACTATC +>EAS1_95:3:308:956:873 +ATAAAAATAAGTGTGTCCATGTACACACGCTGTCC +>EAS1_95:3:308:956:873 +CTCATCTAGGGGAACAGGGAGGTGCACTAATGCGC +>EAS1_95:4:174:157:573 +CAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA +>EAS1_95:4:174:157:573 +TAAAACAAATACTACTAGACCTAAGAGGGATGAGA +>EAS1_95:4:176:971:874 +TAAAATCAGAAGAGAAAAGCATACAGTCATCTATA +>EAS1_95:4:176:971:874 +TGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTG +>EAS1_95:4:184:17:636 +TACACACACATGGTTTAGGGGTATAATACCTCTAC +>EAS1_95:4:184:17:636 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTCACAGGT +>EAS1_95:4:224:592:744 +GATATGTAGTCATCAGACTATCTAAAGTCAACATG +>EAS1_95:4:224:592:744 +TATGAGATTATGTAAAGTAACTGAACCTATGAGTC +>EAS1_95:4:238:124:196 +TTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTT +>EAS1_95:4:61:631:567 +AAGAAGATATAACCATCCTACTAAATACATATGCA +>EAS1_95:4:61:631:567 +CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG +>EAS1_95:4:66:179:118 +CCATGTACACACGCTGTCCTATGTACTTATCATGA +>EAS1_95:4:66:179:118 +TGGGCTGTAATGATGCCCCTTGGCCATCACCCAGT +>EAS1_95:4:71:517:742 +AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +>EAS1_95:4:71:517:742 +AAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAG +>EAS1_95:5:257:654:116 +TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT +>EAS1_95:5:257:654:116 +TCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAA +>EAS1_95:5:263:511:936 +CAAATAGGTAAAAAATTAACATTACAACAGGAACA +>EAS1_95:5:263:511:936 +CAAGTCTTTCCTGACAAGCAAATGCTAAGATAATT +>EAS1_95:5:284:212:932 +CTTTAAATCAACAACAATAAAAAAAAACAAAGGAG +>EAS1_95:5:284:212:932 +TGATGATGGTTACGCTAAAAGTCCATGCTTTACTG +>EAS1_95:6:174:650:125 +AAGCATCATAAATGAAGGGGAAATAAAGTCAAGTC +>EAS1_95:6:174:650:125 +CAGGTTTTATAAAACAATTAATTGAGACTACAGAG +>EAS1_95:6:185:312:167 +CTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATG +>EAS1_95:6:185:312:167 +TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT +>EAS1_95:6:194:696:490 +ACATTAATACTATGTTTCTTATCTGCACATTACTA +>EAS1_95:6:194:696:490 +TTCCACGTCTCATCTAGGGGAACAGGGAGGTGCAC +>EAS1_95:6:53:156:845 +ACACCCAACTAATATTTGTCTGAGCAAAACAGTCT +>EAS1_95:6:53:156:845 +TTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCT +>EAS1_95:6:87:734:888 +ATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGG +>EAS1_95:6:87:734:888 +TGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTG +>EAS1_95:7:155:530:532 +AGCAAGAAGATATAACCATCCTACTAAATACATAT +>EAS1_95:7:155:530:532 +TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA +>EAS1_95:7:280:607:113 +ATCCATGTAACAAATCTGCGCTTGTACTTCTAAAT +>EAS1_95:7:280:607:113 +GGAGTATAAATTGTTTTCCACTTTGGAAAACAATT +>EAS1_95:7:282:817:710 +TGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCC +>EAS1_95:7:282:817:710 +TTTGTAATGAAAACTATATTTATGCTATTCAGTTC +>EAS1_95:7:310:800:761 +AAAGCACACTTTAAATCAACAACAGTAAAATAAAA +>EAS1_95:7:310:800:761 +CAATATTCTGATGATGGTTACACTAAAAGCCCATA +>EAS1_95:7:46:522:426 +AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA +>EAS1_95:7:46:522:426 +GGTAAAAAATTAACATTACAACAGGAACAAAACCT +>EAS1_95:7:55:506:125 +CTTTACTGTCATAACTATGAAGAGACTACTGCCAG +>EAS1_95:7:55:506:125 +TCTACCACAGGGGGCTGCGCGGTTTCCCATCATGA +>EAS1_95:7:61:702:720 +ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA +>EAS1_95:7:61:702:720 +CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC +>EAS1_95:7:74:866:49 +CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTC +>EAS1_95:7:74:866:49 +CCAACCTACTAAATACATATGCACCTAACACAAGA +>EAS1_97:2:128:629:484 +AAAAAAGTAAATAAAACACATAGCTAAAACTAAAA +>EAS1_97:2:128:629:484 +GATTCATAAAACAAATACTACTAGACCTAAGAGGG +>EAS1_97:2:193:420:78 +ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG +>EAS1_97:2:193:420:78 +TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA +>EAS1_97:2:59:882:980 +AATATAATTGTGTCCATGTACACACGCTGTCCTAT +>EAS1_97:2:59:882:980 +GGGCTGTAATGATGCCCCTTGGCCATCACCCGGTC +>EAS1_97:2:96:419:327 +TACTAAATACATATGCACCTAACACAAGACTACCC +>EAS1_97:2:96:419:327 +TCTAAATCTATAACAAAATTAAAATTTAACAAAAG +>EAS1_97:2:9:203:653 +CACCCAACTAATATTTGTCTGAGCAAAACAGTCTA +>EAS1_97:2:9:203:653 +TCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTT +>EAS1_97:3:147:423:584 +GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG +>EAS1_97:3:147:423:584 +GCATACAGTCATCTATAAAGGAAATCCCATCAGAA +>EAS1_97:3:160:173:889 +TATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAA +>EAS1_97:3:160:173:889 +TCAAGATATGTAGTCATCAGACTATCTAAAGTCAA +>EAS1_97:3:277:144:848 +TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT +>EAS1_97:3:277:144:848 +TATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGG +>EAS1_97:3:73:292:429 +GAAAGCTTTCAACGCTTCTAGCCATTTCTTTTTGC +>EAS1_97:3:73:292:429 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS1_97:4:261:267:597 +GGGTAATAAAGTCAAGTCTTTCCTGACAAGCAAAT +>EAS1_97:4:261:267:597 +TTGAGAATAAAGAGCAACTAGGTAAAAAATTAACA +>EAS1_97:4:274:287:423 +CTAAATCTATAAAAAAATTAAAATTTAACAAAAGT +>EAS1_97:4:274:287:423 +TTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT +>EAS1_97:4:290:121:79 +ATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCA +>EAS1_97:4:290:121:79 +TGATGATGGTTACACTAAAAGCCCATACTTTACTG +>EAS1_97:4:77:29:126 +ACAGGGAATTTGTCAATGTCAGGGAAGGAGCATTT +>EAS1_97:4:77:29:126 +GCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCT +>EAS1_97:4:83:731:540 +CTAGGTAAAAAATTAACATTACAACAGGAACAAAA +>EAS1_97:4:83:731:540 +TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG +>EAS1_97:5:154:952:558 +AAAGACTACCCAGATTCATAAAACAAATACTACTA +>EAS1_97:5:154:952:558 +GCAAACAGAAACCAAATGAGAGAAGGAGTAGCTAT +>EAS1_97:5:219:174:684 +AAAAAAACTGTCAAACACGAATGTTATGCCCTGCT +>EAS1_97:5:219:174:684 +AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA +>EAS1_97:5:28:538:148 +AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT +>EAS1_97:5:28:538:148 +TCATCAAGATATGTAGTCATCAGACTATCTAAAGT +>EAS1_97:5:318:177:383 +TACTACCCTGCAATTAATATAATTGTGTCCATGTA +>EAS1_97:5:318:177:383 +TCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGC +>EAS1_97:5:84:927:843 +CCAGCATGGTTGTACTGGGCAATACATGAGATTAT +>EAS1_97:5:84:927:843 +TTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTT +>EAS1_97:6:222:305:337 +TTTTTTTTTTTTTTTTTTCCCTTTTTTTTTTTTTT +>EAS1_97:6:308:667:658 +AAAGATCACTTCAGCAATAAGATATAACCATCCTA +>EAS1_97:6:308:667:658 +TAAAAACATGAACTAACTATATCCTTCTTACAATA +>EAS1_97:6:93:334:858 +CTGCCCCCAGCATGGTTGTACTTGGCAATACATGA +>EAS1_97:6:93:334:858 +GTACTTATCATGACTCTATCCCAAATTCCCAATTA +>EAS1_97:7:20:979:96 +GAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCA +>EAS1_97:7:264:642:506 +AAATATAGAAATTGAAACAGCTGTGTTTATTGTAT +>EAS1_97:7:264:642:506 +ACTTCATCCAAAGATGAAACGCGTAACTGCGCTCT +>EAS1_97:7:28:979:519 +AAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTA +>EAS1_97:7:28:979:519 +CCCATACTTTACTGCTACTCAATATATCCATGTAA +>EAS1_97:7:63:727:203 +AAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGA +>EAS1_97:7:63:727:203 +AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA +>EAS1_97:7:9:648:712 +AAGAAGTATGAGATTATGTAAAGTAACTGAACCTA +>EAS1_97:7:9:648:712 +TACATTGCAAGACAGACTTCATCAAGATATGTAGT +>EAS1_97:8:36:927:478 +AAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATC +>EAS1_97:8:36:927:478 +GACCCTACACGAATGCGTCTCTACCACAGGGGGCT +>EAS1_99:1:17:595:863 +AAGCTACTCAATATATCCATGTAACAAATCTGCGC +>EAS1_99:1:17:595:863 +ATAACCATCCTACTAAATACACATGCACCTAACTC +>EAS1_99:1:187:715:521 +AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT +>EAS1_99:1:187:715:521 +GCATACAGTCATCTATAAAGGAAATCCCATCAGAA +>EAS1_99:1:34:649:318 +AAGAATAACAATGGGCTTCACAGCGGAACCCTTAC +>EAS1_99:1:34:649:318 +ATGAAAGAGGCTCAAAGAATGCCAGGAAGATACAT +>EAS1_99:1:86:871:319 +GGCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGA +>EAS1_99:1:86:871:319 +TGCTAAGATAATTCATCATCACTAAACCAGTCCTA +>EAS1_99:2:152:355:962 +CAGTCATCTATAAAGGAAATCCCATCAGAATAACA +>EAS1_99:2:152:355:962 +TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS1_99:2:162:257:203 +AAAAAGATCAATTCAGCAAGAAGATATAACCATCC +>EAS1_99:2:162:257:203 +ATATCCATGTAACAAATCTGCGCTTGTACTTCTAA +>EAS1_99:2:188:782:483 +CTAAAATCAGCAAGAGAAAAGCATACAGTCATCTA +>EAS1_99:2:188:782:483 +GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG +>EAS1_99:3:118:851:285 +CCCAATTACGTCCTATCTTCTTCTTAGGGAAGAAC +>EAS1_99:3:118:851:285 +TGGGCAATACATGAGATTATTAGGAAATGCTTTAC +>EAS1_99:3:135:543:760 +ATTTGCCTTCAGACCCTACACGAATGCGTCTCTAC +>EAS1_99:3:135:543:760 +TTCAAAAGGTGATGTGTGTTCTCATCAACCTCATA +>EAS1_99:3:187:791:153 +AATACATGAGATTATTAGGAAATGCTTTACTGTCA +>EAS1_99:3:187:791:153 +TACACGAATGCGTCTCTACCACAGGGGGCTGCGCG +>EAS1_99:3:21:423:169 +ACTAAAAGCCCATACTTTACTGCTACTCAATATAT +>EAS1_99:3:21:423:169 +GGAGTATAAATTGTTTTCCACTTTGGAAAACAATT +>EAS1_99:3:61:183:767 +GTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATC +>EAS1_99:3:61:183:767 +TTGCCAGATGAACCACACCTTAATACTATGTTTCT +>EAS1_99:5:147:479:41 +CTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTT +>EAS1_99:5:147:479:41 +TACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA +>EAS1_99:5:191:885:623 +TGCAAGACAGACTTCATCAAGATATGTAGTCATCA +>EAS1_99:5:191:885:623 +TTAAAGAAAAAAAAACCTGTCAAACACGAATGTTA +>EAS1_99:6:135:354:66 +GATACATTGCAAGACAGACTTCATCAAGATATGTA +>EAS1_99:6:135:354:66 +TATGAGATTATGTAAAGTAACTGAACCTATGAGTC +>EAS1_99:6:177:562:806 +ACAGTGTAGATGAGAGAGACCTTCCCTGGAGGTCT +>EAS1_99:6:177:562:806 +TGATTTACTTGTTGTTGGTTTTCTGTTTCTTTTTT +>EAS1_99:6:181:392:500 +GGAAATCCCATCAGAATAACAATGGGCTTCTCAGC +>EAS1_99:6:181:392:500 +TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA +>EAS1_99:6:63:48:631 +CAATACATGAGATTATTAGGAAATGCTTTACTGTC +>EAS1_99:6:63:48:631 +TCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTA +>EAS1_99:7:126:361:250 +AAAAAATTAACATTACAACAGGAACAAAACCTCAT +>EAS1_99:7:126:361:250 +TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA +>EAS1_99:7:171:196:287 +ATAACAATGGGCTTCTCAGCAGAAACCTTACAAGC +>EAS1_99:7:171:196:287 +ATAATTCATCATCACTAAACCAGTCCTATAAGAAA +>EAS1_99:7:183:645:699 +GTGGCCCTCCCCCATTCCCTGCCCCATCTCTTGTA +>EAS1_99:7:183:645:699 +TATCCCAAATTCCCAATTACGTCCTATCTTCTTCT +>EAS1_99:7:37:400:627 +ACATGAGATTATTAGGAAATGCTTTACTGTCATAA +>EAS1_99:7:37:400:627 +TCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTG +>EAS1_99:8:117:578:853 +AATGAACTTCTGTAATTGAAAAATTCATTTAAGAA +>EAS1_99:8:152:778:228 +ATCATAAATACACACAAAAGTACAAAACTCACAGG +>EAS1_99:8:152:778:228 +ATTTAAAAACATGAACTAACTATATGCTGTTTACA +>EAS1_99:8:187:199:369 +TGAAAGAGGCTCAAAGAATGCCAGGAAGATACATT +>EAS1_99:8:27:228:31 +AAATAAAACAAAGGAGGTCATCATACAATGATAAA +>EAS1_99:8:27:228:31 +GGTTACACTAAAAGCCCATACTTTACTGCTACTCA +>EAS1_99:8:99:756:130 +GAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCT +>EAS1_99:8:99:756:130 +GACCCTACACGAATGCGTCTCTACCACAGGGGGCT +>EAS218_1:2:10:686:1024 +ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT +>EAS218_1:2:10:686:1024 +CATACAATGATAAAAAGATCAATTCAGCAAGAAGA +>EAS218_1:2:15:1763:1143 +AAACAAATACTACTAGACCTAAGAGGGATGAGAAA +>EAS218_1:2:15:1763:1143 +TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA +>EAS218_1:2:18:1498:1475 +CTTGGGCTGTAATGATGCCCCTTGGCCATCACCCG +>EAS218_1:2:18:1498:1475 +GAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAA +>EAS218_1:2:19:752:816 +CGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTG +>EAS218_1:2:19:752:816 +TTTGGTGGAAGACATAATCCCACGCTTCCTATGGA +>EAS218_1:2:26:211:481 +ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA +>EAS218_1:2:26:211:481 +CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA +>EAS218_1:2:40:1291:1045 +CTAACACAAGACTACCCAGATTCATAAAACAAATA +>EAS218_1:2:40:1291:1045 +GTAAAGGGGTGGAAAAAGATGTTCTACGCAACAAG +>EAS218_1:2:64:1318:1711 +GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT +>EAS218_1:2:64:1318:1711 +TGAAAACTATATTTATGCTATTCAGTTCTAAATAT +>EAS218_1:4:14:1872:1521 +TCAAAGAATGCCAGGAAGATACATTGCAAGTCAGA +>EAS218_1:4:14:1872:1521 +TCATCAAAAACCTTACAAGCCAGAAGAGATTGGAT +>EAS218_1:4:15:856:340 +CACGCTGTCCTATGTACTTATCATGACTCTATCCC +>EAS218_1:4:15:856:340 +CCCCAGCATGGTTGCACTGGGCAATACATGAGATT +>EAS218_1:4:28:315:310 +AAACTGTTCTCTTGAAAGCTTGGGCTGTAATGATG +>EAS218_1:4:28:315:310 +CATGTACACACGCTGTCCTATGTACTTATCATGAC +>EAS218_1:4:37:1626:862 +ACCCAACTAATATTTGTCTGAGCAAAACAGTCTAG +>EAS218_1:4:37:1626:862 +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCT +>EAS218_1:4:61:1369:440 +AAAGACATGATTTCAGGTAAAGGGGTGGAAAAAGA +>EAS218_1:4:61:1369:440 +CAGGTTTTATAAAACAATTAATTGAGACTACAGAG +>EAS218_1:4:62:561:531 +AGCTATACTTATATCAGATAAAGCACACTTTAAAT +>EAS218_1:4:62:561:531 +TACTAGACCTAAGAGGGATGAGAAATTACCTAATT +>EAS218_1:4:71:832:743 +ACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTC +>EAS218_1:4:71:832:743 +CTATGTACTTATCATGACTCTATCCCAAATTCCCA +>EAS218_1:4:73:42:1038 +AAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT +>EAS218_1:4:73:42:1038 +TCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTT +>EAS218_1:4:75:555:1591 +TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT +>EAS218_1:6:49:905:27 +CCACAGGGGGCTGCGCGGTTTCCCATCCTGAAGCA +>EAS218_1:6:49:905:27 +GAAGAGACTATTGCCAGTTGAACCACACATTAATA +>EAS218_1:6:66:1282:1215 +GTGATGTGTGTTCTCATCAACCTCATACACACACA +>EAS218_1:6:66:1282:1215 +TTCAGACCCTACACGAATGCGTCTCTACCACAGGG +>EAS218_1:6:77:1529:522 +AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA +>EAS218_1:6:77:1529:522 +AAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS218_1:6:88:1413:14 +AATGAAAGAGGCTCAAAGAATGCCAGGAAGATACA +>EAS218_1:8:13:1729:1844 +ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA +>EAS218_1:8:16:1081:1894 +AAGGTGATGTGTGTTCTCATCAACCTCATACACAC +>EAS218_1:8:16:1081:1894 +AGATGAAACGCGTAACTGGGCTCTCATTCACTCCA +>EAS218_1:8:26:785:882 +CAGTTTCTGCCCCAAGCATGGTTGTACTGGGCAAT +>EAS218_1:8:26:785:882 +TACTTATCATGACTCTATCCCAAATTCCCAATTAC +>EAS218_1:8:61:1797:113 +CAGATAGCTTGTGGTCTGACAGGCTGCAACTGTGA +>EAS218_1:8:61:1797:113 +GGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGA +>EAS218_1:8:70:445:1289 +CTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTG +>EAS218_1:8:70:445:1289 +GAATTGTAAAAGTCAAAATTAAAGTTCAATACTCA +>EAS218_1:8:82:1540:77 +ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC +>EAS218_1:8:82:1540:77 +GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC +>EAS218_1:8:90:706:1276 +AATTAACATTACAACAGGAACAAAACCTCATATAT +>EAS218_1:8:90:706:1276 +GTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG +>EAS218_4:1:34:1614:558 +CAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA +>EAS218_4:1:34:1614:558 +GTCAAACACGAATGTTATGCCCTGCTAAACTAAGC +>EAS218_4:1:48:9:409 +CAGCTCCCTGTCACCCAATGGACCTGTGATATCTG +>EAS218_4:1:48:9:409 +GTTTAGTGCCTTTGTTCACATAGACCCCCTTGCAA +>EAS218_4:1:84:1505:1037 +GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA +>EAS218_4:1:84:1505:1037 +TGCCCTGCTAAACTAAGCATCATAAATGAAGGGGA +>EAS218_4:1:9:206:901 +AGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGA +>EAS218_4:1:9:206:901 +CTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTC +>EAS218_4:3:12:630:707 +ATTTAAAAACATGAACTAACTATATGCTGTTTACA +>EAS218_4:3:12:630:707 +CACAAAAGTACAAAACTCACAGGTTTTATAAAACA +>EAS218_4:3:39:1671:1928 +AGCCCATACTTTACTGCTACTCAATATATCCATGT +>EAS218_4:3:39:1671:1928 +CAAATATTGCTAGTGGGAGTATAAATTGTTTTCCA +>EAS218_4:3:41:1281:1785 +ACTATCTAAAGTCAACATGAAGGAAAAAAATTCTA +>EAS218_4:3:41:1281:1785 +GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT +>EAS218_4:3:65:85:1547 +AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA +>EAS218_4:3:65:85:1547 +GTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAA +>EAS218_4:5:41:118:1246 +ACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAA +>EAS218_4:5:41:118:1246 +CTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTG +>EAS218_4:5:63:875:1339 +CCCCAGATACCATCCCTGTCTTACTTCCAGCTCCC +>EAS218_4:5:63:875:1339 +GGGAACAGGGAGGTGCACTAATGCGCTCCACGCCC +>EAS218_4:7:71:31:1973 +AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGC +>EAS218_4:7:72:1288:1211 +ATTGGTACAATGTACAATATTCTGATGATGGTTAC +>EAS218_4:7:72:1288:1211 +GATAAAGCACACTTTAAATCAACAACAGTAAAATA +>EAS218_4:7:85:923:726 +ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA +>EAS218_4:7:85:923:726 +GTAAAGTAACTGAACCTATGAGTCACAGGTATTCC +>EAS218_4:7:87:964:826 +CACAGGGGGCTGCGCGGTTTCCCATCATGAAGCAC +>EAS218_4:7:87:964:826 +TGAAGAGACTATTGCCAGATGAACCACACATTAAT +>EAS218_4:7:89:1487:520 +CACGAATGCGTCTCTACCACAGGGGGCTGCGCGGT +>EAS218_4:7:89:1487:520 +TATGAAGAGACTATTGCCAGATGAACCACACATTA +>EAS218_4:7:90:1873:89 +GAGATTCTGCAGCCCAGATCCAGATTGCTTGTGGT +>EAS218_4:7:90:1873:89 +GCATTTTGTCAGTTACCAAATGTGTTTATTACCAG +>EAS219_1:1:22:490:2011 +ATAACAATGGGCTTCTCAGCGGAAACCTTACAAGC +>EAS219_1:1:22:490:2011 +GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCA +>EAS219_1:1:37:1004:1136 +CCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTT +>EAS219_1:1:37:1004:1136 +GTCACCCAATGGACCTGTGATATCTGGATTCTGGG +>EAS219_1:1:44:1466:425 +GTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGG +>EAS219_1:1:44:1466:425 +TTATCTGCACATTACTACCCTGCAATTAATATAAT +>EAS219_1:1:50:257:341 +AAATTAACATTACAACAGGAACAAAACCTCATATA +>EAS219_1:1:50:257:341 +TGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTA +>EAS219_1:1:5:497:687 +AAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTC +>EAS219_1:1:5:497:687 +TTGCCTTCAGACCCTACACGAATGCGTCTCTACCA +>EAS219_1:1:60:1420:660 +AATGCTAAGATAATTCATCATCACTAAACCAGTCC +>EAS219_1:1:60:1420:660 +GTAAAAAATTAACATTACAACAGGAACAAAACCTC +>EAS219_1:1:63:28:1549 +AAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTA +>EAS219_1:1:63:28:1549 +TACAATATTCTGATGATGGTTACACTAAAAGCCCA +>EAS219_1:1:67:191:668 +ACTATGAAGAGACTATTGCCAGATGAACCACACCT +>EAS219_1:1:67:191:668 +CCAATTACGTCCTATCTTCTTCTTAGGGAAGAACA +>EAS219_1:3:11:706:1030 +ATCTCTTGTAATCTCTCTCATCTTTGCTGCATCCC +>EAS219_1:3:11:706:1030 +ATGTCTATTTTTGTCTTGACACCCAACTAATATTT +>EAS219_1:3:33:1168:1762 +AGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAG +>EAS219_1:3:33:1168:1762 +GTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCA +>EAS219_1:3:4:1620:413 +CATCACAATGAACAACAGGAAGAAAAGGTCTTTCA +>EAS219_1:3:4:1620:413 +TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC +>EAS219_1:3:62:603:1552 +AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT +>EAS219_1:3:62:603:1552 +GAACCACACATTAATACTATGTTTCTTATCTGCAC +>EAS219_1:3:88:465:1877 +AAAGCACACTTTAAATCAACAACAGTAAAATAAAA +>EAS219_1:3:88:465:1877 +TAAGAGGGATGAGAAATTACCTAATTGGTACAATG +>EAS219_1:3:90:219:528 +ACGAATGTTATGCCCTGCTAAACTAAGCATCATAA +>EAS219_1:3:90:219:528 +GTACAAAACTCACAGGTTTTATAAAACAATTAATT +>EAS219_1:5:5:259:250 +GTCTCTTATGAATTAACCCAGTCAGACAAAAATAA +>EAS219_1:5:5:259:250 +TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS219_1:5:6:1067:91 +CTTGACACCCAACTAATATTTGTCTGAGCAAAACA +>EAS219_1:5:6:1067:91 +CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT +>EAS219_1:7:16:1343:1621 +AAATTCTAAAATCAGCAAGAGAAAAGCATACAGTC +>EAS219_1:7:16:1343:1621 +AAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTA +>EAS219_1:7:18:571:1110 +GAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTC +>EAS219_1:7:18:571:1110 +TAATATAATTGTGTCCATGTACACACGCTGTCCTA +>EAS219_1:7:20:1444:328 +AAAAGATGTTCTACGCAAACAGAAACCAAATGAGA +>EAS219_1:7:20:1444:328 +TACTAAATACATATGCACCTAACACAAGACTACCC +>EAS219_1:7:35:392:2042 +ATAAATCTATAAAAAAATTAAAATTTAACAAAAGT +>EAS219_1:7:35:392:2042 +TAACACAAGACTACCCAGATTCATAAAACNAATAC +>EAS219_1:7:50:1339:1154 +CATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCC +>EAS219_1:7:50:1339:1154 +GTCTTGACACCCAACTAATATTTGTCTGAGCAAAA +>EAS219_1:7:62:1076:540 +CCCCCACTTAAGAGATATAGATTGGCAGAACAGAT +>EAS219_1:7:62:1076:540 +TAAATCAACAACAGTAAAATAAAACAAAGGAGGTC +>EAS219_1:7:94:1655:1921 +AAAAGCATACAGTCATCTATAAAGGAAATCCCATC +>EAS219_1:7:94:1655:1921 +TTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAA +>EAS219_FC30151:1:18:1418:237 +CCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGG +>EAS219_FC30151:1:18:1418:237 +CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTC +>EAS219_FC30151:1:53:140:421 +AACCAAATGAGAGAAGGAGTAGCTATACTTATATC +>EAS219_FC30151:1:53:140:421 +GATTCATAAAACAAATACTACTAGACCTAAGAGGG +>EAS219_FC30151:1:54:436:1452 +AAGACAAGTCTCTTATGAATTAACCCAGTCAGACA +>EAS219_FC30151:1:54:436:1452 +AGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCT +>EAS219_FC30151:1:55:8:1412 +ATATAATTGTGTCCATGTACACACGCTGTCCTATG +>EAS219_FC30151:1:55:8:1412 +GCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCAT +>EAS219_FC30151:1:76:34:691 +ATATATCAATATTAACTTTGAATAAAAAGGGATTA +>EAS219_FC30151:1:88:1454:418 +CCAGGAAGATACATTGCAAGACAGACTTCATCAAG +>EAS219_FC30151:1:88:1454:418 +GAAGAGATTGGATCTAATTTTTGGACTTCTTAAAG +>EAS219_FC30151:3:13:674:1717 +AGAAAAGCATGCAGTCATCTATAAAGGAAATCCCA +>EAS219_FC30151:3:13:674:1717 +TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG +>EAS219_FC30151:3:40:1128:1940 +CCCCTTACAACAACCTTGAGAACCCCAGGGAATTT +>EAS219_FC30151:3:40:1128:1940 +CCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACC +>EAS219_FC30151:3:55:74:1040 +CTACCACAGGGGGCTGCGCGGTTTCCCATCATGAA +>EAS219_FC30151:3:55:74:1040 +GGAAATGCTTTACTGTCATAACTATGAAGAGACTA +>EAS219_FC30151:3:73:1458:1337 +AAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGAC +>EAS219_FC30151:3:73:1458:1337 +AGGTAAAAAATTAACATTACAACAGGAACAAAACC +>EAS219_FC30151:3:81:1723:1820 +ATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTT +>EAS219_FC30151:3:81:1723:1820 +CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG +>EAS219_FC30151:3:90:1906:1528 +CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA +>EAS219_FC30151:3:90:1906:1528 +TTACAAAATATAGTTGAAAGCTCTAACAATAGACT +>EAS219_FC30151:3:9:1595:1826 +ACTAATATTTGTCTGAGCAAAACAGTCTAGATGAG +>EAS219_FC30151:3:9:1595:1826 +ATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTC +>EAS219_FC30151:5:29:817:854 +AGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTG +>EAS219_FC30151:5:29:817:854 +GTTCTCAAGGTTGTTGCAAGGGGGTTTATGTGAAC +>EAS219_FC30151:5:54:1351:910 +ACTAAAAGCCCATACTTTACTGCTACTCAATATAT +>EAS219_FC30151:5:54:1351:910 +ACTCTCAAATATTGCTAGTGGGAGTATAAATTGTT +>EAS219_FC30151:5:63:424:1643 +GACCCTACACGAATGCGTCTCTACCACAGGGGGCT +>EAS219_FC30151:5:63:424:1643 +GGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACC +>EAS219_FC30151:5:6:1243:981 +ATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAA +>EAS219_FC30151:5:6:1243:981 +TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT +>EAS219_FC30151:5:70:348:972 +GAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTC +>EAS219_FC30151:5:70:348:972 +TCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTT +>EAS219_FC30151:5:72:1426:1883 +ACATAATCCCACGCTTCCTATGGAAAGGTTGTTGG +>EAS219_FC30151:5:72:1426:1883 +CATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC +>EAS219_FC30151:7:11:1261:1200 +AAAAAAAACCTGTCAAACACGAATGTTATGCCCTG +>EAS219_FC30151:7:11:1261:1200 +TTGCAAGACAGACTTCATCAAGTTATGTAGTCATC +>EAS219_FC30151:7:51:1429:1043 +TATTTGTAATGAAAACTATATTTATGCTATTCAGT +>EAS219_FC30151:7:87:1289:83 +ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA +>EAS219_FC30151:7:87:1289:83 +ATGCCCTGCTAAACTAAGCATCATAAATGAAGGGG +>EAS219_FC30151:7:94:1440:2016 +AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC +>EAS219_FC30151:7:94:1440:2016 +CACAAAAGTACAAAACTCACAGGTTTTATAAAACA +>EAS220_1:2:11:1274:1230 +TGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT +>EAS220_1:2:11:1274:1230 +TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT +>EAS220_1:2:43:656:1866 +TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT +>EAS220_1:2:47:591:698 +CAAATCTGCGCTTGTACTTCTAAATCTATAACAAA +>EAS220_1:2:47:591:698 +TCCTACTAAATACATATGCACCTAACACAAGACTA +>EAS220_1:2:50:513:882 +AAAACAAATACTACTAGACCTAAGAGGGATGAGAA +>EAS220_1:2:50:513:882 +GGAGTAGCTATACTTATATCAGATAAAGCACACTT +>EAS220_1:2:52:1779:1664 +CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTG +>EAS220_1:2:52:1779:1664 +TGTTAAAATGTCTATTTTTGTCTTGACACCCAACT +>EAS220_1:2:54:91:1232 +AAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG +>EAS220_1:2:54:91:1232 +AAAGCTCTAACAATAGACTAAACCAAGCAGAAGAA +>EAS220_1:2:62:1109:804 +TAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTA +>EAS220_1:2:62:1109:804 +TGTCAGTTACCAAATGTGTTTATTACCAGAGGGAT +>EAS220_1:2:63:267:545 +ATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTT +>EAS220_1:2:63:267:545 +CTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTG +>EAS220_1:2:72:1809:1398 +AATACATGAGATTATTAGGAAATGCTTTACTGTCA +>EAS220_1:2:72:1809:1398 +CTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATC +>EAS220_1:4:100:20:1199 +AAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCA +>EAS220_1:4:100:20:1199 +CAGTCATCTATAAAGGAAATCCCATCAGAATAACA +>EAS220_1:4:14:1665:1772 +GGGCTGTAATGATGCCCCTTGGCCATCACCCGGTC +>EAS220_1:4:14:1665:1772 +TATAATGGTGTCCATGTACACACGCTGTCCTATGT +>EAS220_1:4:46:1566:668 +CTACTAAATACATATGCACCTAACACAAGACTACC +>EAS220_1:4:46:1566:668 +TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA +>EAS220_1:4:69:88:1154 +ATAACTATGAAGAGACTATTGCCAGATGAACCACA +>EAS220_1:4:69:88:1154 +CACGAATGCGTCTCTACCACAGGCGGCTGCGCGGT +>EAS220_1:4:6:1178:1105 +GATAATTCATCATCACTAAACCAGTCCTATAAGAA +>EAS220_1:4:6:1178:1105 +GGAACAAAACCTCATATATCAATATTAACTTTGAA +>EAS220_1:4:70:766:2016 +AAAAAAATTCTAAAATCAGCAAGAGAAAAGCATAC +>EAS220_1:4:70:766:2016 +ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG +>EAS220_1:6:24:105:1046 +AGATTCATAAAACAAATACTACTAGACCTAAGAGG +>EAS220_1:6:24:105:1046 +CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA +>EAS220_1:6:7:1547:1933 +AATATTTGACTGAGCAAAACAGTCTAGATGAGAGA +>EAS220_1:6:7:1547:1933 +CTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTG +>EAS220_1:8:18:1757:95 +ATGAGTCGCAGGTATTCCTGAGGAAAAAGAAAAAG +>EAS220_1:8:18:1757:95 +CTTCATCAAGATATGTAGTCATCAGACTATCTAAA +>EAS220_1:8:33:672:473 +ATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAAT +>EAS220_1:8:33:672:473 +TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT +>EAS220_1:8:38:1576:1923 +CACAGGGGGCTGCGCGGTTTCCCATCATGAAGCAC +>EAS220_1:8:38:1576:1923 +CTGTCATAACTATGAAGAGACTATTGCCAGATGAA +>EAS220_1:8:45:178:1321 +AGGTTTTATAAAACAATTAATTGAGACTACAGAGC +>EAS220_1:8:45:178:1321 +CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT +>EAS220_1:8:46:1528:799 +ATGATAAAAAGATCAATTCAGCAAGAAGATATAAC +>EAS220_1:8:46:1528:799 +CATGTAACAAATCTGCGCTTGTACTTCTAAATCTA +>EAS220_1:8:46:485:482 +AGAGATTCTGCAGCCCAGATCCAGATTGCTTGTGG +>EAS220_1:8:46:485:482 +ATTACCAGAGGGATGAAGGGAAGAGGGACGCTGAA +>EAS220_1:8:5:996:2000 +AGTCAACATGAAGGAAAAAAATTCTAAAATCAGCA +>EAS220_1:8:5:996:2000 +CACGAATGTTATGCCCTGCTAAACTAAGCATCATA +>EAS220_1:8:66:1046:167 +ACAATGTACAATATTCTGATGATGGTTACACTAAA +>EAS220_1:8:66:1046:167 +ACACTTTAAATCAACAACAGTAAAATAAAACAAAG +>EAS220_1:8:83:1456:1854 +AAGATCAATTCAGCAAGAAGATATAACCATCCTAC +>EAS220_1:8:83:1456:1854 +AAGCCCATACTTTACTGCTACTCAATATATCCATG +>EAS221_1:2:23:127:880 +CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA +>EAS221_1:2:23:127:880 +TAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +>EAS221_1:2:24:1037:84 +TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGC +>EAS221_1:2:24:1037:84 +TTCAGTTCTAAATATAGAAATTGAAACAGCTGTGT +>EAS221_1:2:29:1486:672 +AATTGAAACAGCTGTGTTTAGTGCCTTTGTTCACA +>EAS221_1:2:3:542:428 +AAGACATGAGTTCAGGTACAGGGGTGGAAAAAGAT +>EAS221_1:2:3:542:428 +AGAGCAACTAGGTAAAAAATTAACATTACAACAGG +>EAS221_1:2:3:945:2005 +AACCAAGCAGAAGAAAGAGGCTCAGAACTTGAAGA +>EAS221_1:2:3:945:2005 +GAAAAACTATTTGAGGAAGTAATTGGGGAAAACCT +>EAS221_1:2:52:1144:509 +AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG +>EAS221_1:2:52:1144:509 +TGTAAAAGTCAAAATTAAAGTTCAATACTCACCAT +>EAS221_1:2:73:955:728 +AATTCATCATCACTAAACCAGTCCTATAAGAAATG +>EAS221_1:2:73:955:728 +TACAACAGGAACAAAACCTCATATATCAATATTAA +>EAS221_1:2:8:327:522 +AACAGGAACAAAACCTCATATATCAATATTAACTT +>EAS221_1:2:8:327:522 +TTCTACGCAAACAGAAACCAAATGAGAGAAGGAGT +>EAS221_1:2:90:986:1224 +CTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAAC +>EAS221_1:2:91:856:504 +CTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGG +>EAS221_1:2:91:856:504 +GTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTT +>EAS221_1:4:36:1402:1709 +AGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTC +>EAS221_1:4:36:1402:1709 +TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATT +>EAS221_1:4:3:248:1491 +TTAAAATTTAACAAAAGTAAATAAAACACATAGCT +>EAS221_1:4:41:519:609 +AACAAAAACTATGCTAAGTATTGGTAAAGATGTGG +>EAS221_1:4:41:519:609 +TACCTAATTGGTACAATGGACAATATTCTGATGAT +>EAS221_1:4:4:1732:88 +GCTGTAATGATGCCCCTTGGCCATCACCCGGTCCC +>EAS221_1:4:4:1732:88 +TGTACACACGCTGTCCTATGTACTTATCATGACTC +>EAS221_1:4:68:64:783 +AAGACATAATCCCACGCTTCCTATGGAAAGGTTGT +>EAS221_1:4:68:64:783 +TCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGT +>EAS221_1:4:87:1375:1303 +AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA +>EAS221_1:4:87:1375:1303 +GAGAGATTCTGCAGCCCAGATCCAGATTGCTTGTG +>EAS221_1:6:38:1071:155 +ATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATC +>EAS221_1:6:38:1071:155 +TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT +>EAS221_1:6:4:1131:104 +ACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTT +>EAS221_1:6:4:1131:104 +ATGTAACAAATCTGCGCTTGTACTTCTAAATCTAT +>EAS221_1:6:57:1342:1166 +AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA +>EAS221_1:6:57:1342:1166 +CAGAGCAACTAGGTAAAAAATTAACATTACAACAG +>EAS221_1:6:60:1037:1146 +AAAAGCATACAGTCATCTATAAAGGAAATCCCATC +>EAS221_1:6:60:1037:1146 +GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT +>EAS221_1:6:69:735:1915 +AATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAG +>EAS221_1:6:69:735:1915 +ACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTT +>EAS221_1:6:89:1164:573 +AAAAAACCTGTCAAACACGAATGTTATGCCCTGCT +>EAS221_1:6:89:1164:573 +AGACTTCATCAAGAGATGTAGTCATCAGACTATCT +>EAS221_1:6:92:1807:1185 +AGCATGGTTGTACTGGGCAATACATGAGATTATTA +>EAS221_1:6:92:1807:1185 +CTCTATCCCAAATTCCCAATTACGTCCTATCTTCT +>EAS221_1:6:96:491:1891 +AGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGG +>EAS221_1:6:96:491:1891 +GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA +>EAS221_1:8:15:881:1932 +CACTTAAGAGATATAGATTGGCAGAACAGATTTAA +>EAS221_1:8:15:881:1932 +CACTTTAAATCAACAACAGTAAAATAAAACAAAGG +>EAS221_1:8:4:679:110 +AATGTTCCCCAGATACCATCCCTGTCTTACTTCCA +>EAS221_1:8:4:679:110 +TCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGT +>EAS221_1:8:58:369:244 +CTGTAATGATGCCCCTTGGCCATCACCCAGTCCCT +>EAS221_1:8:58:369:244 +TTGGGAGATTTTTAATGATTCCTCAATGTTAAAAT +>EAS221_1:8:60:1020:1259 +CTATGAAGAGACTATTGCCAGATGAACCACACATT +>EAS221_1:8:60:1020:1259 +TAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTC +>EAS221_1:8:65:1928:1125 +CATCACAATGAACAACAGGAAGAAAAGGTCTTTCA +>EAS221_1:8:65:1928:1125 +GGCATTTGCCTTCAGACCCTACACGAATACGTCTC +>EAS221_1:8:67:1797:1931 +GGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTG +>EAS221_1:8:67:1797:1931 +TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT +>EAS221_1:8:70:1349:1788 +ATTATATCAGATAAAGCACACTTTAAATCAACAAC +>EAS221_1:8:70:1349:1788 +TACTAGACCTAAGAGGGATGAGAAATTACCTAATT +>EAS221_1:8:73:108:1621 +GAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTC +>EAS221_1:8:73:108:1621 +GTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACA +>EAS221_1:8:77:781:676 +TCATGAAGCACTGAACTTCCACGTATCATCTAGGG +>EAS221_1:8:77:781:676 +TTGCCAGATGAACCACACATTAATACTATGTTTCT +>EAS221_1:8:78:1478:1446 +GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT +>EAS221_1:8:78:1478:1446 +TGTGGTCTGACAGGCTGCAACTGTGAGCCATCACA +>EAS221_1:8:84:1013:1074 +GCAAGGGGGTCTATGTGAACAAAGGCACTAAACAC +>EAS221_1:8:84:1013:1074 +TTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAA +>EAS221_1:8:8:1351:1986 +CCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATG +>EAS221_1:8:8:1351:1986 +TCTTACTTCCAGATCCCCAGAGGGAAAGCTTTCAA +>EAS221_3:2:100:1147:124 +AAAAAAGAATTTTAAAAATGAACAGAGCTTTCAAG +>EAS221_3:2:100:1147:124 +AATGCCAGGAAGATACATTGCAAGACAGACTTCAT +>EAS221_3:2:22:1623:709 +GAAGACAAGTCTCTTATGAATTAACCCAGTCAGAC +>EAS221_3:2:22:1623:709 +GGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGAC +>EAS221_3:2:2:491:1886 +CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAAT +>EAS221_3:2:59:1576:946 +AACTGTGAGCCATCACAATGAACAACAGGAAGAAA +>EAS221_3:2:59:1576:946 +CAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTT +>EAS221_3:2:60:590:1760 +AAAGTAACTGAACCTATGAGTCACAGGTATTCCTG +>EAS221_3:2:60:590:1760 +TCATCAAGATATGTAGTCATCAGACTATCTAAAGT +>EAS221_3:2:67:1467:1447 +AAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS221_3:2:67:1467:1447 +ATAGATTGGCAGAACAGATTTAAAAACATGAACTA +>EAS221_3:2:67:1864:477 +AATGATGCCCCTTGGCCATCACCCAGTCCCTGCCC +>EAS221_3:2:67:1864:477 +TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT +>EAS221_3:2:76:1729:813 +TAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTT +>EAS221_3:2:76:1729:813 +TCTGCGCTTGTACTTCTAAATCTATAAAAAAATTA +>EAS221_3:4:12:276:1797 +ACTATTGCCAGATGAACCACACATTAATACTATGT +>EAS221_3:4:12:276:1797 +TCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTC +>EAS221_3:4:21:132:1423 +GCCAGATGAACCACACATTAATACTATGTTTCTTA +>EAS221_3:4:21:132:1423 +TCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCC +>EAS221_3:4:29:1061:574 +ATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGG +>EAS221_3:4:29:1061:574 +GGTCTGACAGGCTGCAACTGTGAGCCATCACAATG +>EAS221_3:4:30:1452:1563 +ATGAATTAACCAAGTCAGACAAAAATAAAGAAAAA +>EAS221_3:4:30:1452:1563 +GATTTAGACATCTAAATGAAAGAGGCTCAAAGAAT +>EAS221_3:4:41:1308:619 +CATTGCAAGACAGACTTCATCAAGATATGTAGTCA +>EAS221_3:4:41:1308:619 +GAAGTATGAGATTATGTAAAGTAACTGAACCTATG +>EAS221_3:4:57:1675:720 +TATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAA +>EAS221_3:4:57:1675:720 +TCATATATCAATATTAACTTTGAATAAAAAGGGAT +>EAS221_3:4:66:584:407 +GCATTTGCCTTCAGACCCTACACGAATGCGTCTCT +>EAS221_3:4:66:584:407 +GGGCAATACATGAGATTATTAGGAAATGCTTTACT +>EAS221_3:4:78:1314:1275 +AGGAAATCCCATCAGAATAACAATGGGCTTCTCAG +>EAS221_3:4:78:1314:1275 +GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG +>EAS221_3:4:81:687:1379 +CCTAAGAGGGATGAGAAATTACCTAATTGGTACAA +>EAS221_3:4:81:687:1379 +TAAATAAAACACATAGCTAAAACTAAAAAAGCAAA +>EAS221_3:4:90:247:212 +ACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCT +>EAS221_3:4:90:247:212 +TGACAGGCTGCAACTGTGAGCCATCACAATGAACA +>EAS221_3:6:20:492:850 +AGTATGAAAACAATGTTCCCCAGATGCCGTCCCGG +>EAS221_3:6:20:492:850 +CCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTT +>EAS221_3:6:26:227:1053 +ATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCC +>EAS221_3:6:26:227:1053 +GGTTTAGGGGTATAATACCTCTACATGGCTGATTA +>EAS221_3:6:51:1486:1131 +ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA +>EAS221_3:6:51:1486:1131 +TCAGCAGAAACCTTACAAGCCAGAAGAGATTGGAT +>EAS221_3:6:70:843:706 +AATGATTCCTCAATGTTAAAATGTCTATTTTTGTC +>EAS221_3:6:70:843:706 +ATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCC +>EAS221_3:8:33:1240:846 +ACCTTACAAGCCAGAAGAGATTGGATCTAATTTTT +>EAS221_3:8:33:1240:846 +ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA +>EAS221_3:8:34:956:1309 +AACTATGAAGAGACTATTGCCAGATGAACCACACA +>EAS221_3:8:34:956:1309 +AGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTC +>EAS221_3:8:50:1203:1094 +AAATATAGTTGAAAGCTCTAACAATAGACTAAACC +>EAS221_3:8:50:1203:1094 +ACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAG +>EAS221_3:8:55:932:613 +TAGTCATCAGACTATCTAAAGTCAACATGAAGGAA +>EAS221_3:8:55:932:613 +TGTCAAACACGAATGTTATGCCCTGCTAAACTAAG +>EAS221_3:8:63:1265:820 +CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC +>EAS221_3:8:63:1265:820 +TGTCTTGACACCCAACTAATATTTGTCTGAGCAAA +>EAS221_3:8:65:463:703 +GAAACCTTACAAGCCAGAAGAGATTGGATCTAATT +>EAS221_3:8:65:463:703 +TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTT +>EAS221_3:8:74:770:1712 +ACATTACTACCCTGCAATTAATATAATTGTGTCCA +>EAS221_3:8:74:770:1712 +GAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCAC +>EAS221_3:8:7:1864:1569 +AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA +>EAS221_3:8:7:1864:1569 +AGATATGTAGTCATCAGACTATCTAAAGTCAACAT +>EAS51_62:1:38:250:647 +AATAATAAAATGATAAAAAGATCAATTCAGCAAGA +>EAS51_62:1:38:250:647 +ACTATATGCTGTTTACAAGAAACTCATTAATAAAT +>EAS51_62:2:133:8:379 +ATAAGATAATTCATCATCACTAAACCAGTCCTATA +>EAS51_62:2:133:8:379 +GGAAATCCCATCAGAATAACAATGGGCTTCTCAGC +>EAS51_62:2:258:266:101 +ACGCTGTCCTATGTACTTATCATGACTCTATCCCA +>EAS51_62:2:258:266:101 +CCATCACCCAGTCCCTGCCCCATCTCTTGTAATCT +>EAS51_62:2:260:147:818 +AAAATTTGGTAATTTAGTTTTTTTTTTTTTCTTTT +>EAS51_62:2:260:147:818 +ATCCATGTAACAAATCTGCGCTTTTACTTCTAAAT +>EAS51_62:3:103:443:166 +ACCTGTCAAACACGAATGTTATGCCCTGCTAAACT +>EAS51_62:3:103:443:166 +TACACACAAAAGTACAAAACTCACAGGTTTTATAA +>EAS51_62:3:169:292:652 +ATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCC +>EAS51_62:3:169:292:652 +GCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGA +>EAS51_62:3:200:263:280 +AGTAAAATAAAACAAAGGAGGTCATCATACAATGA +>EAS51_62:3:200:263:280 +TTGGTACAATGTACAATATTCTGATGATGGTTACA +>EAS51_62:3:263:74:407 +AAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCAT +>EAS51_62:3:263:74:407 +CTGCAACTGTGAGCCATCACAATGAACAACAGGAA +>EAS51_62:3:314:386:190 +AGACCCCCTTGCAACAACCTTGAGAACCCCAGGGA +>EAS51_62:3:314:386:190 +CACTCCAGCTCCCTGTCACCCAATGGACCTGTGAT +>EAS51_62:3:50:312:219 +ACTGCTACTCAATATATCCATGTAACAAATCTGCG +>EAS51_62:3:50:312:219 +TCCTACTAAATACATATGCACCTAACACAAGACTA +>EAS51_62:3:55:340:837 +TCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAAT +>EAS51_62:3:55:340:837 +TTAATATAATTGTGTCCATGTACACACGCTGTCCT +>EAS51_62:3:68:996:104 +AGAGGGATGAGAAATTACCTAATTGGTACAATGTA +>EAS51_62:3:68:996:104 +TACTTATATCAGATAAAGCACACTTTAAATCAACA +>EAS51_62:4:156:857:494 +CTCATACACACACATGGTTTAGGGGTATAATACCT +>EAS51_62:4:156:857:494 +GTTTCCCATCATGAAGCACTGAACTTCCACGTCTC +>EAS51_62:4:187:907:145 +TTTCTTCTCTCTCTTTTTTTTTTTTTTTATTGCAT +>EAS51_62:4:282:962:46 +GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG +>EAS51_62:4:282:962:46 +TACTTTACTGCTACTCAATATATCCATGTAACAAA +>EAS51_62:4:308:614:911 +AAAAACAATTTGGTAATTTAGTTTTTTTTTTTTTC +>EAS51_62:4:308:614:911 +TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA +>EAS51_62:5:119:38:945 +ATTCTAAAATCAGCAAGAGAAAAGCATACAGTCAT +>EAS51_62:5:119:38:945 +TCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGA +>EAS51_62:5:131:779:345 +GTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAA +>EAS51_62:5:131:779:345 +TGGAAGACATAATCCCACGCTTCCTATGGAAAGGT +>EAS51_62:5:154:669:853 +GTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA +>EAS51_62:5:154:669:853 +TGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGA +>EAS51_62:5:192:716:235 +ATACACACACATGGTTTAGGGGTATAATACCTCTA +>EAS51_62:5:192:716:235 +GACCCTACACGAATGCGTCTCTACCACAGGGGGCT +>EAS51_62:5:236:498:526 +ACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGT +>EAS51_62:5:236:498:526 +CAGCACATTACTACCCTGCAATTAATATAATTGTG +>EAS51_62:5:290:319:736 +CGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCT +>EAS51_62:5:290:319:736 +GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC +>EAS51_62:5:295:882:282 +AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG +>EAS51_62:5:295:882:282 +CAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA +>EAS51_62:5:86:697:941 +AAAAAAATCCCGGAAGATACATTGCAAGACAGACT +>EAS51_62:5:86:697:941 +GTATGAGATTATGTAAAGTAACTGAACCTATGAGT +>EAS51_62:6:12:484:836 +AAACACATAGCTAAAACTAAAAAAGCAAAAACAAA +>EAS51_62:6:12:484:836 +AAATACTACTAGACCTAAGAGGGATGAGAAATTAC +>EAS51_62:6:148:170:895 +AAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGT +>EAS51_62:6:148:170:895 +AAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGA +>EAS51_62:6:50:542:881 +CCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGA +>EAS51_62:6:50:542:881 +TCCCAATTACGTCCTATCTTCTTCTTAGGTAAGAA +>EAS51_62:7:144:28:475 +AGGAAATGCTTTACTGTCATAACTATGAAGAGACT +>EAS51_62:7:144:28:475 +CAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGT +>EAS51_62:7:157:784:936 +GCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCT +>EAS51_62:7:157:784:936 +TGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTT +>EAS51_62:7:162:195:761 +AACATGAACTAACTATATGCTGTTTACAAGAAACT +>EAS51_62:7:162:195:761 +TCACAGGTTTTATAAAACAATTAATTGAGACTACA +>EAS51_62:7:178:286:414 +CACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAG +>EAS51_62:7:178:286:414 +TTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACG +>EAS51_62:7:196:511:896 +ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT +>EAS51_62:7:196:511:896 +GAAAAGCATACAGTCATCTATAAAGGAAATCCCAT +>EAS51_62:7:248:17:435 +ATAACCATCCTACTAAATACATATGCACCTAACAC +>EAS51_62:7:248:17:435 +CATGAGTTCAGGAAAAGGGGTGGAAAAAGATGTTC +>EAS51_62:7:312:236:655 +GAGAAATTACCTAATTGGTACAATGTACAATATTC +>EAS51_62:7:312:236:655 +TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA +>EAS51_62:7:96:836:737 +ATCAACCTCATACACACACATGGTTTAGGGGTATA +>EAS51_62:7:96:836:737 +TCCCATCATGAAGCACTGAACTTCCACGTCTCATC +>EAS51_62:8:52:967:804 +AACTAAGCATCATAAATGAAGGGGAAATAAAGTCA +>EAS51_62:8:52:967:804 +TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT +>EAS51_64:2:326:153:231 +ATTGTTTTCAACTTTGGAAAACAATTTGGTAATTT +>EAS51_64:2:326:153:231 +TGCTACTCAATATATCCATGTAACAAATCTGCGCT +>EAS51_64:3:143:310:958 +CTGCACATTACTACCCTGCAATTAATATAATTGTG +>EAS51_64:3:143:310:958 +GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC +>EAS51_64:3:190:727:308 +ACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTG +>EAS51_64:3:190:727:308 +GGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGG +>EAS51_64:3:255:45:399 +AAAAACTATGCTAAGTATTGGTAAAGATGTGGGGA +>EAS51_64:3:255:45:399 +GGTACAATGTACAATATTCTGATGATGGTTACACT +>EAS51_64:3:285:417:147 +AGTCAAAATTAAAGTTCAATACTCACCATCATAAA +>EAS51_64:3:285:417:147 +TTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAA +>EAS51_64:3:309:303:278 +CTATGAAGAGACTATTGCCAGATGAACCACACATT +>EAS51_64:3:309:303:278 +TCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCC +>EAS51_64:3:67:782:132 +ATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAG +>EAS51_64:3:67:782:132 +TCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTT +>EAS51_64:3:7:268:263 +TCGTACAGAAGTTTAATGGAGCCTTGGGACCTTAC +>EAS51_64:3:7:268:263 +TTGCGTTATTTGAGTTGGTGGAAGACATAATCCCA +>EAS51_64:3:80:885:513 +GAAATTCTTCATCCTGGACCCTGAGAGATTCTGCA +>EAS51_64:3:80:885:513 +GCATTTTGTCAGTTACCAAATGTGTTTATTACCAG +>EAS51_64:3:90:435:691 +GGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGT +>EAS51_64:3:90:435:691 +TCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGT +>EAS51_64:4:102:467:897 +AGCATGGTTGTACAGGGCAATACATGAGATTATTA +>EAS51_64:4:102:467:897 +GCTTTCAACGCTTCTAGCCATTTCTTTTGTCTTTT +>EAS51_64:4:116:738:142 +AAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS51_64:4:116:738:142 +TGTCAAACACGAATGTTATGCCCTGCTAAACTAAG +>EAS51_64:4:163:31:455 +CTTACTTCCAGCTCCCCAGAGGGACAGCTNNCAAC +>EAS51_64:4:163:31:455 +GGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCT +>EAS51_64:4:179:389:585 +TGGCCACTTTTTATCGCATTTCCCTTTAGAACCTA +>EAS51_64:4:179:389:585 +TGTGAAATGAATGAGATTATTAGGAAATGCTTTAC +>EAS51_64:4:181:476:394 +AACAATATTAACTTTGAATAAAAAGGGATTAAATT +>EAS51_64:4:181:476:394 +TCATCACTAAACCAGTCCTATAAGAAATGCTCAAA +>EAS51_64:4:189:467:475 +CTACATGGCTGATTATGAAAACAATGTTCCCCAGA +>EAS51_64:4:189:467:475 +TATCTGGATTCTGGGAAATTCTTCATCCTGGACCC +>EAS51_64:4:189:571:366 +AACAAATACTACTAGACCTAAGAGGGATGAGAAAT +>EAS51_64:4:189:571:366 +TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA +>EAS51_64:4:318:345:156 +GGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT +>EAS51_64:4:318:345:156 +TTATGAAAACAATGTTCCCCAGATACCATCCCTGT +>EAS51_64:4:57:786:414 +CTCATTCACTCCAGCTCCCTGTCACCCAATGGACC +>EAS51_64:4:57:786:414 +TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA +>EAS51_64:5:177:24:389 +AGCTTTCAAGAAGTATGAGATTATGTAAAGTAACT +>EAS51_64:5:177:24:389 +CAAGACAGACTTCATCAAGATATGTAGTCATCAGA +>EAS51_64:5:202:39:380 +CAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGT +>EAS51_64:5:202:39:380 +CTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAG +>EAS51_64:5:290:247:509 +AGCCCATACTTTACTGCTACTCAATATATCCATGT +>EAS51_64:5:290:247:509 +TCTCAAATATTGCTAGTGGGAGTATAAATTGTTTT +>EAS51_64:6:118:41:489 +ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA +>EAS51_64:6:118:41:489 +CCCTGCTAAACTAAGCATCATAAATGAAGGGGAAA +>EAS51_64:6:124:128:489 +CTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTT +>EAS51_64:6:124:128:489 +GTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGT +>EAS51_64:6:143:763:480 +AAACGCGTAACTGCGCTCTCATTCACTCCAGCTCC +>EAS51_64:6:143:763:480 +CTGAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA +>EAS51_64:6:195:348:703 +CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA +>EAS51_64:6:195:348:703 +TAATGAAAACTATATTTATGCTATTCAGTTCTAAA +>EAS51_64:6:206:994:556 +ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTC +>EAS51_64:6:206:994:556 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>EAS51_64:6:210:809:735 +AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT +>EAS51_64:6:210:809:735 +GAGGTTCAGAACTTGAAGACAAGTCTCTTATGAAT +>EAS51_64:6:213:54:878 +CTCTGTCTTGATTTACTTGTTGTTGGTTTTTTGTT +>EAS51_64:6:300:622:86 +GGTTACACTAAAAGCCCATACTTTACTGCTACTCA +>EAS51_64:6:300:622:86 +TCATACAATGATAAAAAGATCAATTCAGCAAGAAG +>EAS51_64:6:54:695:952 +ACAGTAAAATAAAACAAAGGAGGTCATCATACAAT +>EAS51_64:6:54:695:952 +GGTTACACTAAAAGCCCATACTTTACTGCTACTCA +>EAS51_64:7:104:965:517 +AGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA +>EAS51_64:7:104:965:517 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>EAS51_64:7:140:752:822 +CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA +>EAS51_64:7:140:752:822 +GCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGAT +>EAS51_64:7:152:918:824 +TACTATGTTTCTTATCTGCACATTACTACCCTGCA +>EAS51_64:7:152:918:824 +TTTCCCATCATGAAGCACTGAACTTCCACGTCTCA +>EAS51_64:7:92:493:891 +AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTG +>EAS51_64:7:92:493:891 +AGTCAACATGAAGGAAAAAAATTCTAAAATCAGCA +>EAS51_66:1:282:274:50 +ATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAA +>EAS51_66:1:282:274:50 +CCCCTCTAAGCCGTTCTATTTGTAATGAAAACTAT +>EAS51_66:1:289:207:323 +CCATACTTTACTGCTACTCAATATATCCATGTAAC +>EAS51_66:1:289:207:323 +CTAGTGGGAGTATAAATTGATTTCCACTTTGGAAA +>EAS51_66:1:64:182:741 +AAAAAAACAAATTAAACTCTAACAAAAGTAAATAA +>EAS51_66:3:102:511:946 +ATGTAAAAGTGACTGTTATTGTCTTGACACCCAAC +>EAS51_66:3:102:511:946 +CCCAGTCCCTGCCCCATCTCGGGTAATCTCTCTCC +>EAS51_66:3:155:375:623 +AAGGAGTAGCTATACTTATATCAGATAAAGCACAC +>EAS51_66:3:155:375:623 +CAATATTAACTTTGAATAAAAAGGGATTAAATTCC +>EAS51_66:3:166:532:438 +AACAAATACTACTAGACCTAAGAGGGATGAGAAAT +>EAS51_66:3:166:532:438 +AACTAAAAAAGCAAAAACAAAAACTATGCTAAGTA +>EAS51_66:3:233:191:520 +TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT +>EAS51_66:3:246:711:981 +AAAAAAACCTGTCAAACACGAATGTTATGCCCTGC +>EAS51_66:3:246:711:981 +AGACTTCATCAAGATATGTAGTCATCAGACTATCT +>EAS51_66:3:263:689:572 +AAAGAAAAAAAAACCTGTCAAACACGAATGTTATG +>EAS51_66:3:263:689:572 +AAGATATGTAGTCATCAGACTATCTAAAGTCAACA +>EAS51_66:3:29:381:169 +ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT +>EAS51_66:3:29:381:169 +CATCAACCTCATACACACACATGGTTTAGGGGTAT +>EAS51_66:3:39:59:738 +GAGATTATTAGGAAATGCTTTACTGTCATAATTAT +>EAS51_66:3:39:59:738 +GTCCTATGTTCTTCTTAGGGAAGAACAGCTTAGGT +>EAS51_66:4:188:460:1000 +GTGTCCATGTACACACGCTGTCCTATGTACTTATC +>EAS51_66:4:188:460:1000 +TCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGC +>EAS51_66:4:191:40:536 +ATAAAAAAAGACTACCCAGATTCATAAAACAAATA +>EAS51_66:4:191:40:536 +CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA +>EAS51_66:4:209:92:210 +GAGATTATTAGGAAATGCTTTACTGTCATAACTAT +>EAS51_66:4:209:92:210 +TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT +>EAS51_66:4:240:264:231 +CAACAGATCAAGAAGGAGGGGCAATGGACGAGTTA +>EAS51_66:4:240:264:231 +TGTAATGAAAACTATATTTATGCTATTCAGTTCTA +>EAS51_66:4:277:482:316 +CACTAATGCGCTCCACGCCCAAGCCCTTCTCACAG +>EAS51_66:4:277:482:316 +TGTCCTATGTACTTATCATGACTCTATCCCAAATT +>EAS51_66:4:310:287:420 +AAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGAT +>EAS51_66:4:310:287:420 +TACAGAGCAACTAGGTAAAAAATTAACATTACAAC +>EAS51_66:4:322:350:374 +ACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAA +>EAS51_66:4:322:350:374 +CATTGCAAGACAGACTTCATCAAGATATGTAGTCA +>EAS51_66:5:210:674:911 +TCATAAATACACACAAAAGTACAAAACTCACAGGT +>EAS51_66:5:210:674:911 +TGGCAGAACAGATTTAAAAACATGAACTAACTATA +>EAS51_66:5:269:280:716 +TGATATCTGGATTCTGGGAAATTCTTCATCCTGGA +>EAS51_66:5:269:280:716 +TTTGTCAATGTCAGGGAAGGAGCATTTTTTCAGTT +>EAS51_66:5:273:545:1001 +AACAAAGGAGGTCATCATACAATGATAAAAAGATC +>EAS51_66:5:273:545:1001 +AGATTTAAAAACATGAACTAACTATATGCTGTTTA +>EAS51_66:5:285:395:450 +GTCATCTATAAAGGAAATCCCATCAGAATAACAAT +>EAS51_66:5:285:395:450 +TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS51_66:5:308:400:602 +ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG +>EAS51_66:5:308:400:602 +CCTGTCACCCAATGGACCTGTGATATCTGGATTCT +>EAS51_66:6:284:442:747 +AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGC +>EAS51_66:6:310:747:415 +TGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGT +>EAS51_66:6:310:747:415 +TGTAGTCATCAGACTATCTAAAGTCAACATGAAGG +>EAS51_66:7:174:987:334 +ACACCCAAGCCCTTCTCACAGTTTCTGCCCCCAGC +>EAS51_66:7:174:987:334 +GTCCATGTACACACGCTGTCCTATGTACTTATCAT +>EAS51_66:7:4:234:610 +AAAAAACCTGTCAAACACGAATGTTATGCCCTCCT +>EAS51_66:7:4:234:610 +AAAAATCAACATCACAAATACACACAAAAGTACAA +>EAS51_66:7:84:411:336 +GCTTGTACTTCTAAATCTATAAAAAAATTAAAATT +>EAS51_66:8:36:688:722 +ATGTCTATTTTTGTCTTGACACCCAACTAATATTT +>EAS51_66:8:36:688:722 +GTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTT +>EAS51_66:8:43:972:506 +AGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT +>EAS51_66:8:43:972:506 +TAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +>EAS51_66:8:66:655:769 +TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT +>EAS51_66:8:66:655:769 +TTTGTCAGTTACCAAATGTGTTTATTACCAGAGGG +>EAS51_66:8:9:80:353 +AATTAATATAATTGTGTCCATGTACACACGCTGTC +>EAS51_66:8:9:80:353 +CCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCT +>EAS51_78:7:113:43:634 +ATTTGTCTGAGAAAAACAGTCTAGATGAGAGAGAA +>EAS51_78:7:113:43:634 +CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTTTT +>EAS51_78:7:147:64:416 +AAACAATGTCCCCCAGATACCATCCCTGTCTTACT +>EAS51_78:7:147:64:416 +CTCATCTAGGGGAACAGGGAGGTGCACTAATGCGC +>EAS51_78:7:164:727:977 +GAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAA +>EAS51_78:7:164:727:977 +TACAAGCCAGAAGAGATTGGATCTAATTTTTCGAC +>EAS51_78:7:186:199:927 +CTACGCGAATGCGTCTCTACCACAGGGGGCTGCGC +>EAS51_78:7:186:199:927 +TGGCATTTGCCTTCAGACCCTACACGAATGCGTCT +>EAS51_78:7:215:516:299 +AAGCTATGCTAAGTATTGGTAAAGATGTGGGGAAA +>EAS51_78:7:215:516:299 +AATTACCTAATTGGTACAATGTACAATATTCTGAT +>EAS51_78:7:270:448:491 +GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTC +>EAS51_78:7:270:448:491 +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAAC +>EAS51_78:7:303:402:142 +AGCATTTTGTCAGTTACCAAATGTGTTTATTACCA +>EAS51_78:7:303:402:142 +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT +>EAS51_78:7:316:961:576 +TGTGATATCTGGATTCTGGGAAATTCTTCATCCCG +>EAS51_78:7:316:961:576 +TTACGGGTGTAATCTCTCTACATGGCTAATTATGA +>EAS54_61:1:115:868:887 +CATACACACACATGGTTTAGGGGTATAATACCTCT +>EAS54_61:1:115:868:887 +TCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC +>EAS54_61:2:168:61:867 +GATGTTCTACGCAAACAGAAACCAAATGAGAGAAG +>EAS54_61:2:168:61:867 +TCATAAAACAAATACTACTAGACCTAAGAGGGATG +>EAS54_61:2:66:757:918 +CCATCCTACTAAATACATATGCACCTAACACAAGA +>EAS54_61:2:66:757:918 +GGGGTGGAAAAAGATGTTCTACGCAAACAGAAACC +>EAS54_61:3:150:933:810 +AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT +>EAS54_61:3:150:933:810 +CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG +>EAS54_61:3:155:758:710 +ATCAGATAAAGCACACTTTAAATCAACAACAGTAA +>EAS54_61:3:155:758:710 +TTTGAATAAAAAGGGATTAAATTCCCCCACTTAAG +>EAS54_61:3:20:762:748 +CACAATGAACAACAGGAAGAAAAGGTCTTTCAAAA +>EAS54_61:3:20:762:748 +TTCTTTTGGCATTTGCCTTCAGACCCTACACGAAT +>EAS54_61:4:143:69:578 +ATTGGGAGCCCCTCTAAGCCGTTCTATTTGTAATG +>EAS54_61:4:83:452:970 +AATGAAAACTATATTTATGCTATTCAGTTCTAAAT +>EAS54_61:4:83:452:970 +AGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTG +>EAS54_61:4:86:660:932 +AATACATATGCACCTAACACAAGACTACCCAGATT +>EAS54_61:4:86:660:932 +ATATAAAAAAATTAAAATTTAACAAAAGTAAATAA +>EAS54_61:6:126:541:194 +AGTACGACCAGCTCCCCAGAGGGAAAGCTTTCAAC +>EAS54_61:6:126:541:194 +CAGCCCAGATCCAGATTGCTTGTGGTCTGACAGGC +>EAS54_61:6:25:949:33 +AAAGTAACTGAACCTATGAGTCACAGGTATTCCTG +>EAS54_61:6:25:949:33 +GATATGTAGTCATCAGACTATCTAAAGTCAACATG +>EAS54_61:7:114:506:971 +ACTAAATACATATGCACCTAACACAAGACTACCCA +>EAS54_61:7:114:506:971 +GGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA +>EAS54_61:7:64:37:257 +CCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTT +>EAS54_61:7:64:37:257 +TTTGATTTGGTGGAAGACATAATCCCACGCTTCCT +>EAS54_61:8:165:441:708 +CCATCATAAATACACACAAAAGTACAAAACTCACA +>EAS54_61:8:165:441:708 +CTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT +>EAS54_61:8:4:173:814 +CTGCTACTCAATATATCCATGTAACAAATCTGCGC +>EAS54_61:8:4:173:814 +GATAAAAAGATCAATTCAGCAAGAAGATATAACCA +>EAS54_65:2:127:288:655 +ACTAGGTAAAAAATTAACATTACAACAGGAACAAA +>EAS54_65:2:127:288:655 +TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA +>EAS54_65:2:182:924:833 +TTTTTTTTTTTTTATTTGCGCTTTTTTTTTTTTTT +>EAS54_65:2:264:157:150 +GGAAAAATGGACAAGATTCTGATGAGGGTTACACT +>EAS54_65:2:264:157:150 +TAAAGCACACTTTAAATCAACAACAGTAAAATAAA +>EAS54_65:2:94:356:809 +AAATCTATAACAAAATTAAAATTTAACAAAAGTAA +>EAS54_65:2:94:356:809 +CTAAATACATATGCACCTAACACAAGACTACCCAG +>EAS54_65:3:102:884:63 +GTCTTGACACCCAACTAATATTTGTCTGAGCAAAA +>EAS54_65:3:102:884:63 +TGTCTTCCTCTGTCTTGATTTCCTTGTTGTTGGTT +>EAS54_65:3:155:541:234 +CTAAATACATATGCACCTAACACAAGACTACCCAG +>EAS54_65:3:155:541:234 +TGCGCTTGTACTTCTAAATCTATAAAAAAATTAAA +>EAS54_65:3:214:946:229 +AAATGAACAGAGCTTTCAAGAAGTATGATATTATG +>EAS54_65:3:214:946:229 +ACAAAGAATGCCAGGAAGATACATTGCAAGACAGA +>EAS54_65:3:273:901:459 +CCAGCATGGTTGTACTGGGCAATACATGAGATTAT +>EAS54_65:3:273:901:459 +TGTCCTATGTACTTATCATGACTCTATCCCAAATT +>EAS54_65:3:290:558:349 +ACCATCCCTGTCTTACTTCCAGCTCCCCAGCGGGA +>EAS54_65:3:290:558:349 +TCTCAGCTAGGGGAACAGGGAGGTGCACTAATGCG +>EAS54_65:3:320:20:250 +AAATAAAACACATAGCTAAAACTAAAAAAGCAAAA +>EAS54_65:3:320:20:250 +TTTTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAA +>EAS54_65:3:321:311:983 +ATTTATGCTATTCAGTTCTAAATATAGAAATTGAA +>EAS54_65:3:326:652:890 +TTCTGTAATTGAAAAATTCATTTAAGAAATTACAA +>EAS54_65:4:137:319:642 +CTTGTTGTTGGTTTTCTGTTTCTTTTTTTGATTTT +>EAS54_65:4:174:753:617 +ATATATCCATGTAACAAATCTGCGCTTGTACTTCT +>EAS54_65:4:174:753:617 +GATATAACCATCCTACTAAATACATATGCACCTAA +>EAS54_65:4:192:714:341 +AAATTAAAATTTAACAAAAGTAAATAAAACACATA +>EAS54_65:4:192:714:341 +ACACAAGACTACCCAGATTCATAAAACAAATACTA +>EAS54_65:4:193:38:987 +AGGGAAGAACAGCTTAGGTATCAATTTTGTGTTCT +>EAS54_65:4:193:38:987 +TGAGATTATTAGGAAATGCTTTACTGTCATAACTA +>EAS54_65:4:246:313:499 +ACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAG +>EAS54_65:4:246:313:499 +CTTTAAACGCTTCTAGCCATTTCTTTTGGCATTTG +>EAS54_65:4:325:795:213 +AGACTACAGAGCAACTAGGTAAAAAATTAACATTA +>EAS54_65:4:325:795:213 +GGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG +>EAS54_65:4:61:346:384 +CAACTAAGAAGAAACCTTACAAGCCAGAAGAGATT +>EAS54_65:4:61:346:384 +CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGAT +>EAS54_65:4:91:267:655 +CAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGT +>EAS54_65:4:91:267:655 +TGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGT +>EAS54_65:6:115:538:276 +CAAATGTGTTTATTACCAGAGGGATGGAGGGAAGA +>EAS54_65:6:115:538:276 +TATTTGTAATGAAAACTATATTTATGCTATTCAGT +>EAS54_65:6:164:797:930 +AGCTAGAGACCCATTTGGAGCCCCTCTAAGCCGTT +>EAS54_65:6:164:797:930 +GTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGT +>EAS54_65:6:18:376:416 +GCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGA +>EAS54_65:6:18:376:416 +TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGTTT +>EAS54_65:6:277:590:364 +CTCTACATGGCTGATTATGAAAACAATGTTCCCCA +>EAS54_65:6:277:590:364 +CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTC +>EAS54_65:6:326:71:741 +TCTCGTTTTTTTTTCTTTCTTTTCTCTTTTTTTTT +>EAS54_65:6:49:183:435 +CCATCATGAAGCACTGAACTTCCACGTCTCATCTA +>EAS54_65:6:49:183:435 +GACTATTGCCAGATGAACCACACATTAATACTATG +>EAS54_65:6:67:56:806 +TATAAAGGAAATCCCATCAGAATAACAATGGGCTT +>EAS54_65:6:67:56:806 +TCCTGACAAGCAAATGCTAAGATAATTCATCATCA +>EAS54_65:7:117:452:744 +AATATTAACTTTGAATAAAAAGGGATTAAATTCCC +>EAS54_65:7:117:452:744 +ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA +>EAS54_65:7:155:629:357 +AGAAGAGATTGGATCTAATTTTTGGACTTCTTAAA +>EAS54_65:7:155:629:357 +AGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA +>EAS54_65:7:159:253:353 +ATAAAACAATTAATTGAGACTACAGAGCAACTAGG +>EAS54_65:7:159:253:353 +GAAGGGGAAATAAAGTCAAGTCTTTCCTGACAGGC +>EAS54_65:7:56:57:985 +TTCTGTCTTCTCTCCTGTCTTCTTTTCTCTTCTTT +>EAS54_65:7:56:57:985 +TTTTTTCTCTTTTCTCTTTTTTTTTTTTTTTTTTT +>EAS54_65:7:68:825:405 +AAAACCTCATATATCAATATTAACTTTGAATAAAA +>EAS54_65:7:68:825:405 +AAACCAAATGAGAGAAGGAGTAGCTATACTTATAT +>EAS54_65:8:10:975:766 +AATAACACAAGACTACCCAGATTCATAAAACAAAT +>EAS54_65:8:10:975:766 +TTAATAAAGACATGAGTTCAGGTAAAGGGGTGAAA +>EAS54_65:8:140:924:923 +GAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGAT +>EAS54_65:8:140:924:923 +TTTTAGCCATTTCTTTTGGCATTTGCCTTCAGACC +>EAS54_65:8:147:687:428 +ATATGCACCTAACACAAGACTACCCAGATTCATAA +>EAS54_65:8:147:687:428 +ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG +>EAS54_65:8:178:187:610 +AAATACACACAAAAGTACAAAACTCACAGGTTTTA +>EAS54_65:8:178:187:610 +TTGGCAGAACAGATTTAAAAACATGAACTAACTAT +>EAS54_65:8:240:719:799 +AGATTGGCAGAACAGATTTAAAAACATGAACTAAC +>EAS54_65:8:240:719:799 +TTAAAGTTCAATACTCACCATCATAAATACACACA +>EAS54_65:8:305:819:245 +AAATTCATTTAAGAAATTACAAAATATAGTTGAAA +>EAS54_65:8:76:493:708 +TTTATGCTATTCAGTTCTAAATATAGAAATTGAAA +>EAS54_67:1:138:186:274 +GGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGG +>EAS54_67:1:138:186:274 +TAATTGTGTCCATGTACACACGCTGTCCTATGTAC +>EAS54_67:1:159:222:274 +GAACCACACATTAATACTATGTTTCTTATCTGCAC +>EAS54_67:1:159:222:274 +GTCTGGGGAAAGTCTCAGGGAGCCGTCCGTGTCCT +>EAS54_67:1:15:381:715 +GACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTT +>EAS54_67:1:15:381:715 +GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT +>EAS54_67:1:88:54:900 +ATCAACAACAGAAAAATAAAACAAAGGAGGTCATC +>EAS54_67:1:88:54:900 +TGATGATGGTTACACTAAAAGCCCATACTTCACTG +>EAS54_67:2:22:471:500 +GTAAATAAAACACATAGCTAAAACTAAAAAAGCAA +>EAS54_67:2:22:471:500 +TACTACTAGACCTAAGAGGGATGAGAAATTACCTA +>EAS54_67:3:114:736:433 +AACAAAACCTCATATATCAATATTAACTTTGAATA +>EAS54_67:3:114:736:433 +ATGTTCTACGCAAACAGAAACCAAGTGAGAGAAGG +>EAS54_67:3:172:196:746 +AAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCT +>EAS54_67:3:172:196:746 +GCATACAGTCATCTATAAAGGAAATCCCATCAGAA +>EAS54_67:3:175:730:949 +TTATGCTATTCAGTTCTAAATATAGAAATTGAAAC +>EAS54_67:3:197:261:624 +GACTATCTAAAGTCAACATGAAGGAAAAAAATTCT +>EAS54_67:3:197:261:624 +GCCCTGCTAAACTAAGCATCATAAATGAAGGGGAA +>EAS54_67:3:47:471:858 +ACACCCAACTAATATTTGTCTGAGCAAAACAGTCT +>EAS54_67:3:47:471:858 +CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG +>EAS54_67:4:142:943:582 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA +>EAS54_67:4:145:607:216 +AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC +>EAS54_67:4:145:607:216 +TGAAAAACAGGAAGAAAAGGTCTTTCAAAAGGTGA +>EAS54_67:4:7:526:343 +TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT +>EAS54_67:4:7:526:343 +TGAAAACAGTGTTCCCCAGATACCATCCCTGTCTT +>EAS54_67:5:117:33:262 +AATTAACATTACAACAGGAACAAAACCTCATATAT +>EAS54_67:5:117:33:262 +ACAAGCAAATGCTAAGATAATTCATCATCACTAAA +>EAS54_67:5:124:241:608 +CTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG +>EAS54_67:5:124:241:608 +GGGTATAATACCTCTACATGGCTGATTATGAAAAC +>EAS54_67:5:127:828:697 +ATGCCAGGAAGATACATTGCAAGACAGACTTCATC +>EAS54_67:5:127:828:697 +TAAAGAAAAAAAAACCTGTCAAACACGAATGTTAT +>EAS54_67:5:149:639:910 +CAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGA +>EAS54_67:5:149:639:910 +TCACTAAACCAGTCCTATAAGAAATGCTCAAAAGA +>EAS54_67:5:71:408:741 +AGTCATCTATAAAGGAAATCCCATCAGAATAACAA +>EAS54_67:5:71:408:741 +TCCTGACAAGCAAATGCTAAGATAATTCATCATCA +>EAS54_67:6:107:395:312 +CAAAATATAGTTGAAAGCTCTAACAATAGACTAAA +>EAS54_67:6:107:395:312 +CAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGT +>EAS54_67:6:109:953:668 +CAATATATCCATGTAACAAATCTGCGCTTGTACTT +>EAS54_67:6:109:953:668 +CCACTTTGGAAAACAATTTGGTAATTTCGTTTTTT +>EAS54_67:6:198:503:669 +CAATGATAAAAAGATCAATTCAGCAAGAAGATATA +>EAS54_67:6:198:503:669 +CAGATTTAAAAACATGAACTAACTATATGCTGTTT +>EAS54_67:6:43:859:229 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA +>EAS54_67:6:46:285:790 +AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC +>EAS54_67:6:46:285:790 +TCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAAG +>EAS54_67:7:101:752:996 +AACCTTACAAGCCAGAAGAGATTGGATCTAATTTT +>EAS54_67:7:101:752:996 +AAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT +>EAS54_67:7:197:399:319 +CAAAAAACAAATACTACTAGACCTAAGAGGGATGA +>EAS54_67:7:197:399:319 +TAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA +>EAS54_67:8:19:855:491 +TGGCATTTGCCTTCAGACCCTACACGAATGCGTCT +>EAS54_67:8:19:855:491 +TGTGTGTTCTCATCAACCTCATACACACACATGGT +>EAS54_67:8:46:900:610 +GATATCTGGATTCTGGGAAATTCTTCATCCTGGAC +>EAS54_67:8:46:900:610 +TACATGGCTGATTATGAAAACAATGTTCCCCAGAT +>EAS54_71:2:125:628:79 +GCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGA +>EAS54_71:2:125:628:79 +TTTATGCTATTCAGTTCTAAATATAGAAATTGAAA +>EAS54_71:2:204:264:413 +CAATGAACAACAGAAAGAAAAGTTCTTTCAAAAGG +>EAS54_71:2:204:264:413 +TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTG +>EAS54_71:2:85:686:696 +AATCAGCAAGAGAAAAGCATACAGTCATCTATAAA +>EAS54_71:2:85:686:696 +TAAACTAAGCATCATAAATGAAGTGGAAATAAAG +>EAS54_71:3:186:989:869 +ACACACATGGTTTAGGGGTATAATACCTCTACATG +>EAS54_71:3:186:989:869 +GGGAAATTCTTCATCCTGGACCCTGAGAGATTCT +>EAS54_71:3:254:32:275 +GATGGAGGGAAGAGGGACGCTGAAGAACTTTGAT +>EAS54_71:3:254:32:275 +TGCAACTGTGAGCCATCACAATGAACAACAGGAAG +>EAS54_71:3:257:288:731 +AAGAAGATATAACCATCCTACTAAATACATATGCA +>EAS54_71:3:257:288:731 +TGCTGTTTACAAGAAACTCATTAATAAAGACATG +>EAS54_71:3:267:821:860 +GCATACAGTCATCTATAAAGGAAATCCCATCAGA +>EAS54_71:3:267:821:860 +TCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG +>EAS54_71:3:78:855:352 +AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTT +>EAS54_71:3:78:855:352 +AACAATAGACTAAACCAAGCAGAAGAAAGAGGTTC +>EAS54_71:4:127:725:381 +AATTACAAAATATAGTTGAAAGCTCTAACAATAGA +>EAS54_71:4:127:725:381 +TGAACCTATGAGTCACAGGTATTCCTGAGGAAAA +>EAS54_71:4:13:981:659 +CGGGACAATGGACGAGGTAAACCGCACATTGACAA +>EAS54_71:4:13:981:659 +TGTAGCCCCTCTAAGGCGTTCTATTTGTAATGAA +>EAS54_71:4:14:88:306 +AAAGAATGCCAGGAAGATACATTGCAAGACAGAC +>EAS54_71:4:14:88:306 +AGAAGAGATTAGATCTAATTTTTGGACTTCTTAAA +>EAS54_71:4:165:397:25 +GCAACTGTGAGCCATCACAATGAACAACAGGAAGA +>EAS54_71:4:165:397:25 +TTCAACGCTTCTAGCCATTTCTTTTGGCATTTGC +>EAS54_71:4:169:256:888 +AGGTTCAGAACTTGAAGACAAGTCTCTTATGAATT +>EAS54_71:4:169:256:888 +ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT +>EAS54_71:4:169:862:829 +AAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCA +>EAS54_71:4:169:862:829 +GCCATTTCTTTTGGCATTTGCCTTCAGACCCTAC +>EAS54_71:4:206:741:810 +ACTAACTATATGCTGTTTACAAGAAACTCATTAA +>EAS54_71:4:206:741:810 +CAAAAGTACAAAACTCACAGGTTTTATAAAACAAT +>EAS54_71:4:209:159:130 +CTTATCATGACTCTATCCCAAATTCCCAATTACGT +>EAS54_71:4:209:159:130 +GCCCCCAGCATGGTTGTACTGGGCAATACATGAG +>EAS54_71:4:233:97:262 +ACCACACATTAATACTATGTTTCTTATCTGCCCA +>EAS54_71:4:233:97:262 +GTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGA +>EAS54_71:4:252:428:683 +TGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCT +>EAS54_71:4:284:269:882 +TTTCTTTTCTCTTTTTTTTTTTTTTGTTTTTGCA +>EAS54_71:4:328:669:662 +GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTT +>EAS54_71:4:328:669:662 +TCTTCATCCTGTACCCTGAGAGATTCTGCAGCCCA +>EAS54_71:4:72:63:435 +CCTTGCAACAACCTTGAGAACCCCAGGGAATTTG +>EAS54_71:4:72:63:435 +TGATATCTGGATTCTGGGAAATTCTTCATCCTGGA +>EAS54_71:4:73:182:444 +AACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCG +>EAS54_71:4:73:182:444 +CTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTT +>EAS54_71:5:153:543:671 +GCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTG +>EAS54_71:5:153:543:671 +TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT +>EAS54_71:5:16:434:204 +AGATGAGAGAGAACTTCCCTGGAGGTCTGATGGC +>EAS54_71:5:16:434:204 +CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT +>EAS54_71:5:81:685:141 +ACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +>EAS54_71:5:81:685:141 +AGATATGTAGTCATCAGACTATCTAAAGTCAACAT +>EAS54_71:6:172:896:83 +AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGT +>EAS54_71:6:172:896:83 +CATTTGCCTTCAGACCCTACACGAATGCGTCTCTA +>EAS54_71:6:215:133:909 +TGTGTGTTCTCATCAACCTCATACACACACATGG +>EAS54_71:6:215:133:909 +TTGCCTTCAGACCCTACACGAATGCGTCTCTACCA +>EAS54_71:6:224:932:942 +CTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGG +>EAS54_71:6:224:932:942 +GTCCATGTACACACGCTGTCCTATGTACTTATCA +>EAS54_71:6:228:354:203 +AATGGACCTGTGATATCTGGATTCTGGGAAATTC +>EAS54_71:6:228:354:203 +TCAACCTCATACACACACATGGTTTAGGGGTATAA +>EAS54_71:6:264:705:89 +AAACATATGCACCTAACACAAGACTACCCAGATTC +>EAS54_71:6:264:705:89 +AAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA +>EAS54_71:6:324:515:230 +AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAG +>EAS54_71:6:324:515:230 +CCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGG +>EAS54_71:6:82:932:400 +GACACCCAACTAATATTTGTCTGAGCAAAACAGTC +>EAS54_71:6:82:932:400 +GTAATCTCTCTCCTCTTCGCTGCATCCCTGTCTT +>EAS54_71:7:130:260:553 +AGCAAGAGAAAAGCATACAGTCATCTATAAAGGAA +>EAS54_71:7:130:260:553 +GTGAGAAGTTTGGAAAAACTATTTGAGGAAGCAC +>EAS54_71:7:194:867:616 +ATCCATGTAACAAATCTGCGCTTGTACTTCTATT +>EAS54_71:7:194:867:616 +TTTTCCACTTTGGAAAACAATTTGGTAATTTCGTT +>EAS54_71:7:212:329:348 +AACCACACATTAATACTATGTTTCTTATCTGCAC +>EAS54_71:7:212:329:348 +CCCATCATGAAGCACTGAACTTCCACGTCTCATCT +>EAS54_71:7:250:698:842 +AAAAAGTACAAAACTCACAGGTTTTATAAAACAA +>EAS54_71:7:250:698:842 +AAGAAAAAAAAACCTGTCAAACACGAATGTTATGC +>EAS54_71:7:80:760:490 +CATGGCTGATTATGAAAACAATGTTCCCCAGATAC +>EAS54_71:7:80:760:490 +CTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAG +>EAS54_71:7:97:743:602 +AAGCAAATGCTAAGATAATTCATCATCACTAAACC +>EAS54_71:7:97:743:602 +ATTACAACAGGAACAAAACCTCATATATCAATAT +>EAS54_71:8:105:854:975 +ATTTAACAAAAGTAAATAAAACACATAGCTAAAAC +>EAS54_71:8:105:854:975 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTG +>EAS54_71:8:113:856:319 +AAATCAACAACAGTAAAATAAAACAAAGGAGGT +>EAS54_71:8:113:856:319 +CCCACTTAAGAGATATAGATTGGCAGAACAGATTT +>EAS54_71:8:215:830:609 +AAGACATCTAAATGAAAGAGGCTCAAAGAATGC +>EAS54_71:8:234:21:950 +TTTTTTTTTTTTCTCCTCTCTTTTTTTTTTTTT +>EAS54_71:8:321:642:388 +TACCAAATGTGTTTATTACCAGAGGGATGGAGG +>EAS54_71:8:321:642:388 +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT +>EAS54_71:8:38:856:336 +AATGGACCTGTGATATCTGGATTCTGGGAAATT +>EAS54_71:8:38:856:336 +CACACATGGTTTAGGGGTATAATACCTCTACATGG +>EAS54_73:3:203:419:243 +GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT +>EAS54_73:3:203:419:243 +TTGAAAGCTCTAACAATAGACTAAACCAAGCAGAA +>EAS54_73:3:239:796:221 +ATAACTATGAAGAGACTATTGCCAGCTGACCCCCC +>EAS54_73:3:239:796:221 +GGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGT +>EAS54_73:3:23:502:103 +AGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTT +>EAS54_73:3:23:502:103 +GACACCCAACTAATATTTGTCTGAGCAAAACAGTC +>EAS54_73:3:29:833:612 +AAGCTCTAACAATAGACTAAACCAAGCAGAAGAAA +>EAS54_73:3:29:833:612 +CAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGT +>EAS54_73:3:313:827:992 +AAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTG +>EAS54_73:3:313:827:992 +TGTTTCTTTGTTTGATTTGGTGGAAGACATAATCC +>EAS54_73:3:37:761:635 +CCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT +>EAS54_73:3:37:761:635 +TGTGAGCCATCACAATGAACAACAGGAAGAAAAGG +>EAS54_73:3:4:854:140 +CCTGACAAGCAAATGCTAAGATAATTCATCATCAC +>EAS54_73:3:4:854:140 +GTCATCTATAAAGGAAATCCCATCAGAATAACAAT +>EAS54_73:3:88:24:744 +GTCCTGTGATATCTGGATTCTGGGAAATTCTTCAT +>EAS54_73:3:88:24:744 +TGTCAATGTCAGGGAAGGAGCATTTTTGAAGTTTA +>EAS54_73:5:145:635:390 +TAAACCAGTCCTATAAGAAATGCTCAAAAGAATTG +>EAS54_73:5:145:635:390 +TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGA +>EAS54_73:5:169:714:644 +CCTAATTGGTACAATGTACAATATTCTGATGATGG +>EAS54_73:5:169:714:644 +GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG +>EAS54_73:5:220:733:736 +CCATCCTACTAAATACATATGCACCTAACACAAGA +>EAS54_73:5:220:733:736 +TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA +>EAS54_73:5:231:339:551 +CTGAGAGATTCTGCAGCCCAGATCCAGATTGCTTG +>EAS54_73:5:231:339:551 +TGTCAGTTACCAAATGTGTTTATTACCAGAGGGAT +>EAS54_73:5:255:796:239 +AAGGGATTAAATTCCCCCACTTAAGAGATAGAGAT +>EAS54_73:5:255:796:239 +ATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGT +>EAS54_73:5:263:557:988 +AATGATAAAAAGATCAATTCAGCAAGAAGATATAA +>EAS54_73:5:263:557:988 +CTGCTACTCAATATATCCATGTAACAAATCTGCGC +>EAS54_73:5:271:874:367 +AAAAAACCTGTCAAACACGAATGTTATGCCCTGCT +>EAS54_73:5:271:874:367 +ATATGTAGTCATCAGACTATCTAAAGTCAACATTA +>EAS54_73:5:3:233:911 +GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC +>EAS54_73:5:3:233:911 +TGGCTGATTATGAAAACAATGTTCCCCAGATACCA +>EAS54_73:5:44:498:945 +ATAGGGATGGAGGGAAGAGGGCCGCTGAAGAACTT +>EAS54_73:5:44:498:945 +CCTATAAGCCGTTCTATTTGTAATGAAAACTATAT +>EAS54_73:5:53:61:31 +AAGAAACTCATTAATAAAGACATGAGTTCAGATAA +>EAS54_73:5:53:61:31 +CAATTCAGCAAGAAGATATAACCATCCTACTAAAT +>EAS54_73:7:134:243:630 +ACATTACTACCCTGCAATTAATATAATTGTGTCCA +>EAS54_73:7:134:243:630 +TCATCTAGGGGAACAGGGAGGCGCACTAATGAGCT +>EAS54_73:7:200:65:291 +CAATACTCACCATCATAAATACACACAAAAGTACA +>EAS54_73:7:200:65:291 +CTAACTATATGCTGTTTACAAGAAACTCATTAATA +>EAS54_73:7:223:440:667 +AATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATT +>EAS54_73:7:223:440:667 +TTCAGAACTTGAAGACAAGTCTCTTATGAATTAAC +>EAS54_73:7:254:572:431 +AAGAGATATAGATTGGCAGAACAGATTTAAAAACA +>EAS54_73:7:254:572:431 +ATCAGATAAAGCACACTTTAAATCAACAACAGTAA +>EAS54_73:7:63:854:610 +AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT +>EAS54_73:7:63:854:610 +GACTATTGCCAGATGAACCACACATTAATACTATG +>EAS54_73:7:97:892:419 +AATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTA +>EAS54_73:7:97:892:419 +GCAACTAGGTAAAAAATTAACATTACAACAGGAAC +>EAS54_81:2:128:394:455 +GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTC +>EAS54_81:2:128:394:455 +TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT +>EAS54_81:2:27:856:401 +ACCTCTACATGGCTGATTATGAAAACAATGTTCCC +>EAS54_81:2:27:856:401 +TCATCTAGGGGAACAGGGAGGTGCACTAATGCGCT +>EAS54_81:2:280:512:316 +GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG +>EAS54_81:2:280:512:316 +TTACTGTCATAACTATGAAGAGACTATTGCCAGCT +>EAS54_81:2:285:367:932 +ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG +>EAS54_81:2:285:367:932 +GCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGT +>EAS54_81:2:317:72:221 +AAAAAAATTCTAAAATCAGCAAGAGAAAAGCATAC +>EAS54_81:2:317:72:221 +ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTC +>EAS54_81:2:31:98:804 +CACGAATGCGTCTCTACCACAGGGGGCTGCGCGGC +>EAS54_81:2:31:98:804 +CTTTACTGTCATAACTATGAAGAGACTATTGCCAG +>EAS54_81:2:49:330:699 +AAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS54_81:2:49:330:699 +TTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAA +>EAS54_81:2:5:491:391 +CCCTGCTCACAGTTTCTGCCCCCAGCATGGTTGTA +>EAS54_81:2:5:491:391 +TTGGCATTTGCCTTCAGACCCTACACGAATGCGTC +>EAS54_81:6:11:801:386 +AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA +>EAS54_81:6:11:801:386 +CACTATAAATCAACAACAGTAAAATAAAACAAAGG +>EAS54_81:6:122:589:134 +AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA +>EAS54_81:6:122:589:134 +ACAGACTTCATCAAGATATGTAGTCATCAGACTAT +>EAS54_81:6:199:511:426 +AATTCTTCATCCTGGACCCTGAGAGATTCTGCAGC +>EAS54_81:6:199:511:426 +GGGGTATAATACCTCTACATGGCTGATTATGAAAA +>EAS54_81:6:204:779:181 +AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG +>EAS54_81:6:204:779:181 +CTTTTGGCATTTGCCTTCAGACCCTACACGAATGC +>EAS54_81:6:265:251:147 +AAAAGTACAAAACTCACAGGTTTTATAAAACAATT +>EAS54_81:6:265:251:147 +TGTTATGCCCTGCTAAACTAAGCATCATAAATGAA +>EAS54_81:6:273:424:207 +AAGAGATATAGATTGGCAGAACAGATTTAAAAACA +>EAS54_81:6:273:424:207 +TAAATCAACAACAGTAAAATAAAACAAAGGAGGTC +>EAS54_81:6:35:186:412 +ATAACCATCCTACTAAATACATATGCACCTAACAC +>EAS54_81:6:35:186:412 +CATGTAACAAATCTGCGCTTGTACTTCTAAATCTA +>EAS54_81:6:75:917:886 +ACTGGGCAATACATGAGATTATTAGGAAATGCTTT +>EAS54_81:6:75:917:886 +TTATCATGACTCTATCCCAAATTCCCAATTACGTC +>EAS54_81:7:124:253:889 +CTAAGCATCATAAATGAAGGGGAAATAAAGTCAAG +>EAS54_81:7:124:253:889 +TCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG +>EAS54_81:7:166:979:531 +ATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGAT +>EAS54_81:7:166:979:531 +TGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCC +>EAS54_81:7:226:869:36 +ATATATAAAGGAAATCCCATCAGAATAACAATGGG +>EAS54_81:7:226:869:36 +TGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTG +>EAS54_81:7:246:205:734 +CTCCAGGGAAGTTATCTCTCATCTAGANNNNNTTG +>EAS54_81:7:246:205:734 +CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT +>EAS54_81:7:293:355:321 +GCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTC +>EAS54_81:7:293:355:321 +TTACCAAATGTGTTTATTACCAGAGGGATGGAGGG +>EAS54_81:7:324:472:791 +AAAGCCAATACTTTACTGCTACTCAATATATCCAT +>EAS54_81:7:324:472:791 +TGATAAAAAGATCAATTCAGCAAGAAGATATAACC +>EAS54_81:7:325:150:465 +AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG +>EAS54_81:7:325:150:465 +TGATGCCCTCTTCTTCCAAAGATGAAACGCGTAAC +>EAS54_81:7:74:596:137 +CTCTATCCCAAATTCCCAATTACGTCCTATCTTCT +>EAS54_81:7:74:596:137 +GGTCCCTGCCCCATCGCTTGTAATCTCTCGCCTTT +>EAS54_81:8:130:912:658 +TACACACACATGGTTTAGGGGTATAATACCTCTAC +>EAS54_81:8:130:912:658 +TCCCATCATGAAGCACTGAACTTCCACGTCTCATC +>EAS54_81:8:142:858:903 +ATGGTTGTACTGGGCAATACATGAGATTATTAGGA +>EAS54_81:8:142:858:903 +CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC +>EAS54_81:8:14:360:580 +ACCCTACACGAATGCGTCTCTACCACAGGGGGCGG +>EAS54_81:8:14:360:580 +ATGAGATTATTAGGAAATGCTTTACTGTCATAACT +>EAS54_81:8:159:71:155 +AAAGGTTGTTGGGAGATTTTTAATGATTCCTCGAT +>EAS54_81:8:159:71:155 +GTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAA +>EAS54_81:8:177:800:714 +CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGAT +>EAS54_81:8:177:800:714 +TTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTG +>EAS54_81:8:271:180:509 +AATACTCACCATCATAAATACACACAAAAGTACAA +>EAS54_81:8:271:180:509 +ATATAGATTGGCAGAACAGATTTAAAAACATGAAC +>EAS54_81:8:40:925:442 +GAGGTTCAGAACTTGAAGACAAGTCTCTTATGAAT +>EAS54_81:8:40:925:442 +TTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCT +>EAS54_81:8:41:530:663 +AGAACAGATTTAAAAACATGAACTAACTATATGCT +>EAS54_81:8:41:530:663 +ATACTCACCATCATAAATACACACAAAATTACAAA +>EAS54_81:8:63:930:152 +ACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATTC +>EAS54_81:8:63:930:152 +ATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGAT +>EAS54_81:8:78:735:536 +TTTTTTTTTTTTTCATTTCTCTTTTTTTTTTTTTT +>EAS56_53:1:124:243:35 +GCATATCCAGATTGCTGGTGGTCTGACAGGCAGCA +>EAS56_53:1:124:243:35 +TGTGTTTATTACCAGAGGGATGGAGGGAAGAGCGA +>EAS56_53:1:154:118:488 +AAAAGCATACAGTCATCTATAAAGGAAATCCCATC +>EAS56_53:1:154:118:488 +AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA +>EAS56_53:1:23:403:981 +TACTGTCATAACTATGAAGAGACTATTGCCAGATG +>EAS56_53:1:23:403:981 +TCTTCATAGGGAAGAACAGCTTAGGTATCAATTTG +>EAS56_53:1:47:303:887 +ACATTACTACCCTGCCATTAATATACTTGTGTCCA +>EAS56_53:1:47:303:887 +CACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGA +>EAS56_53:1:92:875:345 +AAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAA +>EAS56_53:1:92:875:345 +CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA +>EAS56_53:2:170:265:818 +GAGGGGAAGCTTTCAACGCTTCTAGCACTTTCTTT +>EAS56_53:2:170:265:818 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS56_53:2:59:286:290 +AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC +>EAS56_53:2:59:286:290 +TCAAGTCTTTCCTGACAAGCAAATGCTAAGATAAT +>EAS56_53:3:101:809:776 +GTACTTCTAAATCTATAAAAAAATTAAAATTTAAC +>EAS56_53:3:101:809:776 +TATGCACCTAACACAAGACTACCCAGATTCATAAA +>EAS56_53:3:107:738:484 +GGTCATCATACAATGATAAAAAGATCAATTCAGCA +>EAS56_53:3:107:738:484 +TGAACTAACTATATGCTGTTTACAAGAAACTCATT +>EAS56_53:3:126:558:408 +TTCTATTTGTAATGAAAACTATATTTATGCTATTC +>EAS56_53:3:126:558:408 +TTTATTACCAGAGGGATGGAGGGAAGAGGGACGCT +>EAS56_53:3:134:126:465 +AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAA +>EAS56_53:3:134:126:465 +AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA +>EAS56_53:4:130:568:978 +TAAATATAGAAATTGAAACAGCTGTGTTTAGTGAC +>EAS56_53:4:130:568:978 +TGAAACGCGAAACTGCACTCTCATTCACTCCAGCT +>EAS56_53:4:153:977:200 +TCATCAACCTCATACACACACATGGTTTAGGGGTA +>EAS56_53:4:153:977:200 +TGTCACCCAATGGACCTGTGATATCTGGATTCTGG +>EAS56_53:4:154:762:630 +AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTT +>EAS56_53:4:154:762:630 +CCTTCAGACCCTACACGAATGCGTCTCTACCACAG +>EAS56_53:4:168:528:288 +CAGGCTGCAACTGTGAGCCATCACAATGAACAACA +>EAS56_53:4:168:528:288 +GCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCC +>EAS56_53:4:45:707:147 +AAAAATTCTAAAATCAGCAAGAGAAAAGCATACAG +>EAS56_53:4:45:707:147 +ATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAG +>EAS56_53:6:180:695:621 +ATACAGTCATCTATAAAGGAAATCCCATCAGAATA +>EAS56_53:6:180:695:621 +TACTGAAAAGCAAATGCTAAGATAATTCATCATCA +>EAS56_53:7:22:22:934 +ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA +>EAS56_53:7:22:22:934 +CTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG +>EAS56_53:8:122:430:882 +CCTACTAAATACATATGCACCTAACACAAGACTAC +>EAS56_53:8:122:430:882 +CTATAAAAAAATTAAAATTTAACAAAAGTAAATAA +>EAS56_53:8:179:549:753 +TACTACCCTGCAATTAATATAATTGTGTCCATGTA +>EAS56_53:8:179:549:753 +TGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTC +>EAS56_53:8:28:701:724 +TGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGAT +>EAS56_53:8:28:701:724 +TTTTGTCAGTTACCAAATGTGTTTATTACCAGAGG +>EAS56_57:1:122:38:103 +ATAAAACAATTAATTGAGACTACAGAGCAACTAGG +>EAS56_57:1:122:38:103 +GAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTAC +>EAS56_57:1:125:884:276 +TTGCAAGACAGACTTCATCAAGATATGTAGTCATC +>EAS56_57:1:125:884:276 +TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAAC +>EAS56_57:1:189:130:136 +ATTTAGACATCTAAATGAAAGAGGCTCAAAGAATG +>EAS56_57:1:189:130:136 +GGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGA +>EAS56_57:1:189:503:110 +ATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGA +>EAS56_57:1:189:503:110 +CTAACAATAGACTAAACCAAGCAGAAGAAAGAGTT +>EAS56_57:1:228:182:717 +GGTCTGACAGGCTGCAACTGTGAGCCATCCCCATG +>EAS56_57:1:228:182:717 +TCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAA +>EAS56_57:1:278:440:902 +AAGCACTGAACTTCCACGTCTCATCTAGGGGAACA +>EAS56_57:1:278:440:902 +ATACTATGTTTCTTATCTGCACATTACTACCCTGC +>EAS56_57:1:288:384:444 +TAACTTTGAATAAAAAGGGATTAAATTCCCCCACT +>EAS56_57:1:288:384:444 +TCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAA +>EAS56_57:2:158:909:321 +ATACAGTCATCTATAAAGGAAATCCCATCAGAATA +>EAS56_57:2:158:909:321 +TTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCT +>EAS56_57:2:178:192:499 +GTGAGCCATCACAATGAACAACAGGAAGAAAAGGT +>EAS56_57:2:178:192:499 +TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC +>EAS56_57:2:206:873:186 +ACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCA +>EAS56_57:2:206:873:186 +GTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTG +>EAS56_57:2:236:841:20 +AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC +>EAS56_57:2:236:841:20 +GCTAAGATAATTCATCATCACTAAACCAGTCCTAT +>EAS56_57:2:237:855:581 +CTAAACGCCCATACTTTACTGCTACTCAATATATC +>EAS56_57:2:237:855:581 +TACAATGATAAAAAGATCAATTCAGCAAGAAGATA +>EAS56_57:2:23:268:529 +TGAAAGAGGCTCAAAGAATGCCAGGAAGATACATT +>EAS56_57:2:259:42:969 +GCTGTAATGATGCCCCTTGGCCATCACCCGGTCCC +>EAS56_57:2:259:42:969 +GGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCA +>EAS56_57:2:262:297:601 +TGGACCTGTGATATCTGGATTCTGGGAAATTCTTC +>EAS56_57:2:262:297:601 +TGTTCTCATCAACCTCATACACACACATGGTTTAG +>EAS56_57:2:284:597:682 +AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT +>EAS56_57:2:284:597:682 +TTCATCAAGATATGTAGTCATCAGACTATCTAAAG +>EAS56_57:2:44:153:969 +AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAA +>EAS56_57:2:44:153:969 +AAAAGCATACAGTCATCTATAAAGGAAATCCCATC +>EAS56_57:3:112:729:591 +ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT +>EAS56_57:3:112:729:591 +GAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT +>EAS56_57:3:119:761:239 +CGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATC +>EAS56_57:3:119:761:239 +TGAAGAGACTATTGCCAGATGAACCACACATTAAT +>EAS56_57:3:285:489:327 +AATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGA +>EAS56_57:3:285:489:327 +CTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAA +>EAS56_57:3:319:174:811 +CACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATG +>EAS56_57:3:319:174:811 +TTATCTGCACATTTCTACCCTGCAATTAATATAAT +>EAS56_57:3:41:739:907 +CAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA +>EAS56_57:3:41:739:907 +GAATGCCAGGAAGATACATTGCAAGACAGACTTCA +>EAS56_57:3:81:786:340 +TACTATGTTTCTTATCTGCACATTACTACCCTGCA +>EAS56_57:3:81:786:340 +TCCACGTCTCATCTAGGGGAACAGAGAGGTGCACT +>EAS56_57:4:233:478:792 +GCCTTCAGACCCTACACGAATGCGTCTCTACCACA +>EAS56_57:4:233:478:792 +GTGTTCTCATCAACCTCATACACACACATGGTTTA +>EAS56_57:4:262:965:756 +AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC +>EAS56_57:4:262:965:756 +TTAATATAATTGTGTCCATGTACACACGCTGTCCT +>EAS56_57:4:71:707:568 +CTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG +>EAS56_57:4:71:707:568 +GTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGAT +>EAS56_57:4:98:862:154 +AAAGATCAATTCAGCAAGAAGATATAACCATCCTA +>EAS56_57:4:98:862:154 +TGCTACTCAATATATCCATGTAACAAATCTGCGCT +>EAS56_57:5:105:521:563 +TATGTACTTATCATGACTCTATCCCAAATTCCCAA +>EAS56_57:5:105:521:563 +TGTAATGCTGCCCCTTGGCCATCCCCCGGTCCCTG +>EAS56_57:5:136:389:320 +TCCTATGTACTTATCATGACTCTATCCCAAATTCC +>EAS56_57:5:136:389:320 +TTCTGCCCCCAGCATGGTTGTACTGGGCAATACAT +>EAS56_57:5:145:383:182 +AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT +>EAS56_57:5:145:383:182 +TTGAAGACAAGTCTCTTATGAATTAACCCAGTCAG +>EAS56_57:5:207:926:427 +GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG +>EAS56_57:5:207:926:427 +TAGGAAATGCTTTACTGTCATAACTATGAAGAGAC +>EAS56_57:5:214:644:390 +AAAAACATGAACTAACTATATGCTGTTTACAAGAA +>EAS56_57:5:214:644:390 +AAATAAAACAAAGGAGGTCATGATACAATGATAAA +>EAS56_57:5:24:284:360 +AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA +>EAS56_57:5:24:284:360 +CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA +>EAS56_57:5:266:133:789 +AAAAAATTAACATTACAACAGGAACAAAACCTCAT +>EAS56_57:5:266:133:789 +GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAA +>EAS56_57:5:303:542:924 +AATAAAACAAAGGAGGTCATCATACAATGATAAAA +>EAS56_57:5:303:542:924 +CAATGTACAATATTCTGATGATGGTTACACTAAAA +>EAS56_57:5:309:109:987 +AACAAATACTACTAGACCTAAGAGGGATGAGAAAT +>EAS56_57:5:309:109:987 +GAGAGAAGGAGTAGCTATACTTATATCAGATAAAG +>EAS56_57:5:30:788:376 +ACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +>EAS56_57:5:30:788:376 +TGTTTCTTATCTGCACATTACTACCCTGCAATTAA +>EAS56_57:5:324:728:956 +ATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGT +>EAS56_57:5:324:728:956 +TGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCT +>EAS56_57:5:53:544:889 +AGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA +>EAS56_57:5:53:544:889 +GCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGAT +>EAS56_57:5:71:994:576 +AAGATAATTCATCATCACTAAACCAGTCCTATAAG +>EAS56_57:5:71:994:576 +TAGGTAAAAAATTAACATTACAACAGGAACAAAAC +>EAS56_57:6:145:144:796 +ATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTC +>EAS56_57:6:145:144:796 +GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC +>EAS56_57:6:157:643:175 +GGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGA +>EAS56_57:6:157:643:175 +TTCTATTTGTAATGAAAACTATATTTATGCTATTC +>EAS56_57:6:175:289:351 +CATCCTACTAAATACATATGCACCTAACACAAGAC +>EAS56_57:6:175:289:351 +TGCGCTTGTACTTCTAAATCTATAAAAAAATTAAA +>EAS56_57:6:190:289:82 +AGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC +>EAS56_57:6:190:289:82 +CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA +>EAS56_57:6:21:553:57 +AAATACTACTAGACCTAAGAGGGATGAGAAATTAC +>EAS56_57:6:21:553:57 +AACAAAAGTAAATAAAACACATAGCTAAAACTAAA +>EAS56_57:6:234:787:12 +AAGCTTGGGCTGTAATGATGCCCCTTGGCCATCAC +>EAS56_57:6:234:787:12 +ACACGCTGGCCTATGTACTTATAATGACTCTATCC +>EAS56_57:6:325:759:288 +GCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCT +>EAS56_57:6:325:759:288 +GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC +>EAS56_57:6:44:280:641 +AACCCCCTTGCAACAACCTTGAGAACCCCAGGGAA +>EAS56_57:6:44:280:641 +TCATTCACTCCAGCTCCCTGTCACCCAATGGACCT +>EAS56_57:6:4:223:776 +AGTAACTGAACCTATGAGTCACAGGTATTCCTGAG +>EAS56_57:6:4:223:776 +TGTAGTCATCAGACTATCTAAAGTCAACATGAAGG +>EAS56_57:7:159:125:297 +GCAAGCTAGAGTCCCATTTGGAGCCACTCTAAGAC +>EAS56_57:7:159:125:297 +GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTA +>EAS56_57:7:247:522:670 +CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC +>EAS56_57:7:247:522:670 +TACATGAGATTATTAGGAAATGCTTTACTGTCATA +>EAS56_57:7:273:562:954 +AAAGTTCAATACTCACCATCATAAATACACACAAA +>EAS56_57:7:273:562:954 +TTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAA +>EAS56_57:7:287:258:321 +TAATACTATGTTTCTTATCTGCACATTACTACCCT +>EAS56_57:7:287:258:321 +TGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCAT +>EAS56_57:7:33:954:724 +CCTAAGAGGGATGAGAAATTACCTAATTGGTACAA +>EAS56_57:7:33:954:724 +TCAGATAAAGCACACTTTAAATCAACAACAGTAAA +>EAS56_57:7:57:826:977 +ATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAA +>EAS56_57:7:57:826:977 +TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTT +>EAS56_57:7:76:786:458 +GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC +>EAS56_57:7:76:786:458 +TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT +>EAS56_57:8:72:44:435 +AAAAGCAAAAACAAAAACTATGCTAAGTATTGGTA +>EAS56_57:8:72:44:435 +ATTGGTACAATGTACAATATTCTGATGATGGTTAA +>EAS56_59:1:126:526:276 +GAACTTCTGTAATTGAAAAATTCATTTAAGAAATT +>EAS56_59:1:128:584:952 +ATCATACAATGATAAAAAGATCAATTCAGCAAGAA +>EAS56_59:1:128:584:952 +GCCCATACTTTACTGCTACTCAATATATCCATGTA +>EAS56_59:1:219:294:861 +CTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCC +>EAS56_59:1:219:294:861 +TATAATTGTGTCCATGTACACACGCTGTCCTCTGT +>EAS56_59:1:248:122:558 +AATGTACAATATTCTGATGATGGTTACACTAAAAG +>EAS56_59:1:248:122:558 +GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGA +>EAS56_59:1:278:906:933 +AATATAATTGTGTCCATGTACACACGCTGTCCTAT +>EAS56_59:1:278:906:933 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS56_59:1:82:670:302 +AGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACG +>EAS56_59:1:82:670:302 +TCCTACTAAATACATATGCACCTAACACAAGACTA +>EAS56_59:1:93:490:901 +AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA +>EAS56_59:1:93:490:901 +GTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGA +>EAS56_59:2:104:402:732 +AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA +>EAS56_59:2:104:402:732 +AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA +>EAS56_59:2:162:272:415 +ACATGAACTAACTATATGCTGTTTACAAGAAACTC +>EAS56_59:2:162:272:415 +ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT +>EAS56_59:2:177:266:842 +ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT +>EAS56_59:2:177:266:842 +GGCATTTGCCTTCAGACCCTACACGAATGCGTCTC +>EAS56_59:2:177:552:234 +ACGCTGTCCTATGTACTTATCATGACTCTATCCCA +>EAS56_59:2:177:552:234 +GCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCC +>EAS56_59:2:201:768:529 +AGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTT +>EAS56_59:2:201:768:529 +CAGACTATCTAAAGTCAACATGAAGGAAAAAAATT +>EAS56_59:2:239:1001:406 +AGCATACAGTCATCTATAAAGGAAATCCCATCAGA +>EAS56_59:2:239:1001:406 +CTTTCCTGACAAGCAAATGCTAAGATAATTCATCA +>EAS56_59:2:60:677:921 +CATCAGACTATCTAAAGTCAACATGAAGGAAAAAA +>EAS56_59:2:60:677:921 +GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT +>EAS56_59:3:149:953:349 +AGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTG +>EAS56_59:3:149:953:349 +TTCTTTTGGCATTTGCCTTCAGACCCTACACGAAT +>EAS56_59:3:166:626:836 +AGTACAAAACTCACAGGTTTTATAAAACAATTAAT +>EAS56_59:3:166:626:836 +CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA +>EAS56_59:3:182:1002:639 +AAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG +>EAS56_59:3:182:1002:639 +AACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGA +>EAS56_59:3:316:25:230 +GATGCCCCTTGGCCATCACCCGGTCCCTGCCCCAT +>EAS56_59:3:316:25:230 +TGTCCTATGTACTTATCATGACTCTATCCCAAATT +>EAS56_59:4:119:651:88 +GATTGGATCTAATTTTTGGACTTCTTAAAGAAAAA +>EAS56_59:4:119:651:88 +GCTCAAAGAATGCCAGGAAGATACATTGCAAGACA +>EAS56_59:4:262:928:237 +TGAGTTCAGGTAAAGGTGTGGAAAAAGATGTTCTA +>EAS56_59:4:262:928:237 +TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA +>EAS56_59:4:267:394:437 +AAACATCATAAATACACACAAAAGTACAAAACTCA +>EAS56_59:4:267:394:437 +GGACTTCTTAAAGAAAAAAAAACCTGTCAAACACG +>EAS56_59:4:278:524:521 +CACATTAATACTATGTTTCTTATCTGCACATTACT +>EAS56_59:4:278:524:521 +CCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAA +>EAS56_59:4:329:577:757 +AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC +>EAS56_59:4:329:577:757 +TCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAAG +>EAS56_59:5:113:694:725 +CTGTAATGATGCCCCTTGGCCATCACCCGGTCCCT +>EAS56_59:5:113:694:725 +GTACACACGCTGTCCTATGTACTTATCATGACTCT +>EAS56_59:5:125:137:58 +AACTATATGCTGTTTACAAGAAACTCATTAATAAA +>EAS56_59:5:125:137:58 +GGTTTTATAAAACAATTAATTGAGACTACAGAGCA +>EAS56_59:5:181:713:140 +AGGGGAAATAAAGTCAAGTATTTCCTGACAAGCAA +>EAS56_59:5:181:713:140 +CTACAGAGCAACAAGGTAAAAAATTAACATTACAA +>EAS56_59:5:198:929:684 +AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA +>EAS56_59:5:198:929:684 +GAAATCCCATCAGAATAACAATGGGCTTCTCAGCA +>EAS56_59:5:232:336:46 +ATTTTTTTTTTTTCTTTTCTCTTGTTTCTTTTTTT +>EAS56_59:5:325:544:349 +AAAATTAAAGTTCAATACTCACCATCATAAATACA +>EAS56_59:5:325:544:349 +CAAGCCAGAAGAGATTGGATCTAATTTTTGGACTT +>EAS56_59:5:90:629:652 +AGCCCATACTTTACTGCTACTCAATATATCCATGT +>EAS56_59:5:90:629:652 +ATATTGCTAGTGGGAGTATAAATTGTTTTCCACTT +>EAS56_59:6:187:925:547 +GGCTGATTATGAAAACAATGTTCCCAAGATACCAT +>EAS56_59:6:187:925:547 +TGAACTTCCACGTCTCATCTAGGGGAACAGGGAGG +>EAS56_59:6:199:327:965 +ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT +>EAS56_59:6:199:327:965 +NCAACAACCTTGAGAACCCCAGGGAATTTGTCAAT +>EAS56_59:6:227:657:95 +GTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGA +>EAS56_59:6:227:657:95 +GTCATCTATAAAGGAAATCCCATCAGAATAACAAT +>EAS56_59:6:286:753:854 +TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC +>EAS56_59:6:286:753:854 +TTATCATGACTCTATCCCAAATTCCCAATTACGTC +>EAS56_59:6:312:837:406 +AGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCT +>EAS56_59:6:312:837:406 +CTGCACATTACTACCCTGCAATTAATATAATTGTG +>EAS56_59:6:3:186:68 +AAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT +>EAS56_59:6:3:186:68 +TTACAAGCCAGAAGAGATTGGATCTAATTTTTGTA +>EAS56_59:6:89:457:591 +ATGCCCTGCTAAACTAAGCATCATAAATGAAGGGG +>EAS56_59:6:89:457:591 +CAGGTTTTATAAAACAATTAATTGAGACTACATAG +>EAS56_59:7:260:985:520 +TCCCTGTCACCCAATGGACCTGTGATATCTGGATT +>EAS56_59:7:260:985:520 +TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA +>EAS56_59:7:318:679:883 +GATTTAGACATCTAAATGAAAGAGGCTCAAAGAAT +>EAS56_59:7:319:246:304 +CTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGA +>EAS56_59:7:319:246:304 +TGTCACCCAATGGACCTGTGATATCTGGATTCTGG +>EAS56_59:7:82:902:868 +CTGTCACCCAATGGACCTGTGATATCTGGATTCTG +>EAS56_59:7:82:902:868 +TTGCAACAACCTTGAGAACCCCAGGGAATTTGTCA +>EAS56_59:8:49:182:192 +ACACAAAAGTACAAAACTCACAGGTTTTATAAAAC +>EAS56_59:8:49:182:192 +GTTATGCCCTGCTAAACTGAGCATCATAAATGAAG +>EAS56_59:8:80:542:549 +AGTAAACTCTCAAATATTGCTAGTGGGAGTATAAA +>EAS56_59:8:80:542:549 +CTAAAAGCCCATACTTTACTGCTACTCAATATATC +>EAS56_61:1:119:880:781 +ACAAATCTGCGCTTGTACTTCTAAATCTATAACAA +>EAS56_61:1:119:880:781 +ACATATGCACCTAACACAAGACTACCCAGATTCAT +>EAS56_61:1:210:880:606 +GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAT +>EAS56_61:1:210:880:606 +TCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCA +>EAS56_61:1:303:184:14 +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCA +>EAS56_61:1:303:184:14 +CAATGGACCTGTGATATCTGGATTCTGGGAAATTC +>EAS56_61:2:152:860:286 +AGACTATTGCCAGATGAACCACACATTAATACTAT +>EAS56_61:2:152:860:286 +TTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCA +>EAS56_61:3:140:522:212 +CGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGA +>EAS56_61:3:140:522:212 +GACAGGCTGCAACTGTGAGCCATCACAATGAACAA +>EAS56_61:3:165:665:220 +ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA +>EAS56_61:3:165:665:220 +GGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG +>EAS56_61:3:208:118:673 +AAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA +>EAS56_61:3:208:118:673 +GAACAGAGCTTTCAAGAAGTATGAGATTATGTAAA +>EAS56_61:3:260:827:289 +AAACCTCATATATCAATATTAACTTTGAATAAAAA +>EAS56_61:3:260:827:289 +TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA +>EAS56_61:3:45:758:616 +ATAAATTGTTTTCCACTTTGGAAAACAATTTGGTA +>EAS56_61:3:45:758:616 +CATACTTTACTGCTACTCAATATATCCATGTAACA +>EAS56_61:3:5:45:441 +TTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTT +>EAS56_61:4:262:456:74 +TACCAAATGTGTTTATTACCAGAGGGATGGAGGGA +>EAS56_61:4:262:456:74 +TGGGAAATTCTTCATCCTGGACCCTGAGAGATTCT +>EAS56_61:5:194:470:416 +AAACTATTTGAGGAAGTAATTGGGGAAAACCTCTT +>EAS56_61:5:194:470:416 +TCAGAACTTGAAGACAAGTCTCTTATGAATTAACC +>EAS56_61:5:209:824:866 +ATCATCACTAAACCAGTCCTATAAGAAATGCTCAA +>EAS56_61:5:209:824:866 +CAGCAACAAAACCTCATATATCAATATTAACTTTG +>EAS56_61:5:263:314:696 +AACTCATTAATAAAGACATGAGTTCAGGTAAAGGG +>EAS56_61:5:263:314:696 +AAGATCAATTCAGCAAGAAGATATAACCATCCTAC +>EAS56_61:5:272:240:950 +CAGCAGAGCTTGGATCTAATTTTTGGACTTCTTCA +>EAS56_61:5:272:240:950 +TCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAA +>EAS56_61:6:10:106:737 +ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT +>EAS56_61:6:10:106:737 +ACAATGATAAAAAGATCAATTCAGCAAGAAGATAT +>EAS56_61:6:160:272:398 +AATGTTCCCCAGATACCATCCCTGTCTTACTTCCA +>EAS56_61:6:160:272:398 +GTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCA +>EAS56_61:6:226:370:91 +AGAATAACAATGGGCTTCTCAGCGGAAACCTTACA +>EAS56_61:6:226:370:91 +AGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA +>EAS56_61:6:227:259:597 +AATATAGAAATTGAAACAGCTGTGTTTAGTGCCTT +>EAS56_61:6:256:67:461 +TCATGTTTGTGTCTTTCTATGCATTTTTTTTTTTT +>EAS56_61:6:256:67:461 +TTGTTTTTTCTTCTTTTCTCTTTTTTTTTTTTTTT +>EAS56_61:6:283:963:234 +AAAAAGATGTTCTACGCAAACAGAAACCAAATGAG +>EAS56_61:6:283:963:234 +ACATATGCACCTAACACAAGACTACCCAGATTCAT +>EAS56_61:6:307:208:477 +AAAGTCAAAATTAAAGTTCAATACTCACCATCATA +>EAS56_61:6:307:208:477 +ACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAA +>EAS56_61:7:280:133:495 +AGATGAACCACACATTAATACTATGTTTCTTATCT +>EAS56_61:7:280:133:495 +CCATCATGAAGCACTGAACTTCCACGTCTCATCTA +>EAS56_61:7:41:745:603 +CATTGCAAGACAGACTTCATCAAGATATGTAGTCA +>EAS56_61:7:41:745:603 +TAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGT +>EAS56_61:7:7:682:201 +CATACAGTCATCTATAAAGGAAATCCCATCAGAAT +>EAS56_61:7:7:682:201 +GGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACA +>EAS56_61:8:60:358:494 +GGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACA +>EAS56_61:8:60:358:494 +TACCCAGATTCATAAAACAAATACTACTAGACCTA +>EAS56_61:8:7:171:402 +GTGATATCTGGATTCTGGGAAATTCTTCATCCTGG +>EAS56_61:8:7:171:402 +TCTACATGGCTGATTATGAAAACAATGTTCCCCAG +>EAS56_63:1:119:446:185 +TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT +>EAS56_63:1:119:446:185 +TTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAG +>EAS56_63:1:145:71:26 +CTAGGGGAACAGGGAGGTGCACTAATGCGCTCCAC +>EAS56_63:1:145:71:26 +TTTCTTATCTGCACATTACTACCCTGCAATTAATA +>EAS56_63:2:119:161:322 +ATCAGAATAACAATGGGCTTCTCAGCAGAAACCTT +>EAS56_63:2:119:161:322 +CTTTAGTCTTGCTAGAGATTTAGACATCTAAATGA +>EAS56_63:2:33:357:858 +AAAATTAAAGTTCAATACTCACCATCATAAATACA +>EAS56_63:2:33:357:858 +AGGGATTAAATTCCCCCACTTAAGAGATATAGATT +>EAS56_63:2:74:656:272 +AACAAAGGAGGTCATCATACAATGATAAAAAGATC +>EAS56_63:2:74:656:272 +TGTACAATATTCTGATGATGGTTACACTAAAAGCC +>EAS56_63:3:40:594:752 +ATACACACACATGGTTTAGGGGTATAATACCTCTA +>EAS56_63:3:40:594:752 +CTGCGCGGTTTCCCATCATGAAGCACTGAACTTCC +>EAS56_63:3:41:468:459 +AAATTTAACAAAAGTAAATAAAACACATAGCTAAA +>EAS56_63:3:41:468:459 +TTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTT +>EAS56_63:3:93:1002:845 +AATTCCCAATTACGTCCTATCTTCTTCTTAGGGAA +>EAS56_63:3:93:1002:845 +GGGCAATACATGAGATTATTAGGAAATGCTTTACT +>EAS56_63:4:141:9:811 +TTTCTTTTCTCCTTTTTTTTTTTTTTTTTCTACAT +>EAS56_63:4:184:659:377 +AAAAAGATGTTCTACGCAAACAGAAACCAAATGAG +>EAS56_63:4:184:659:377 +CAAAACTACCCAGATTCATAAAACAAATACTACTA +>EAS56_63:4:38:28:122 +AAATATAGTTGAAAGCTCTAACAATAGACTAAACC +>EAS56_63:4:38:28:122 +GTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTG +>EAS56_63:5:117:570:971 +ACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAG +>EAS56_63:5:117:570:971 +ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA +>EAS56_63:5:123:998:248 +ATCACAATGAACAACAGGAAGAAAAGGTCTTTCAA +>EAS56_63:5:123:998:248 +TTTCTTTTGGCATTTGCCTTCAGACCCTACACGAA +>EAS56_63:5:36:678:316 +ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA +>EAS56_63:5:36:678:316 +TTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTA +>EAS56_63:5:96:788:614 +TAGGGGTATAATACCTCTACATGGCTGATTATGAA +>EAS56_63:5:96:788:614 +TTCCACGTCTCATCTAGGGGAACAGGGAGGTGCAC +>EAS56_63:6:102:816:260 +AAATTACCTAATTGGTACAATGTACAATATTCTGA +>EAS56_63:6:102:816:260 +TCAGATAAAGCACACTTTAAATCAACAACAGTAAA +>EAS56_63:6:42:920:522 +AATTAATATAATTGTGTCCATGTACACACGCTGTT +>EAS56_63:6:42:920:522 +CTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCC +>EAS56_63:6:91:360:585 +AAGATGAAACGCGTAACTGCGCTCTCATTCACTCC +>EAS56_63:6:91:360:585 +GACATCACAATGAACAACAGGAAGAAAAGGTCTTT +>EAS56_63:7:109:22:383 +ATGTACAATATTCTGATGATGGTTACACTAAAAGC +>EAS56_63:7:109:22:383 +CAACAACAGTAAAATAAAACAAAGGAGGTCATCAT +>EAS56_63:7:137:139:248 +GTCAAACACGAATGTTATGCCCTGCTAAACTAAGC +>EAS56_63:7:137:139:248 +TATCTAAAGTCAACATGAAGGAAAAAAATTCTAAA +>EAS56_63:7:166:42:147 +AATGCGCTCCACGCCCAAGCCCTTCTAACAGTTTC +>EAS56_63:7:166:42:147 +CTGCACATTACTACCCTGCAATTAATATAATTGTG +>EAS56_63:7:185:213:330 +CAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAA +>EAS56_63:7:185:213:330 +TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT +>EAS56_63:7:190:95:706 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS56_63:7:190:95:706 +TTGTGTCCATGTACACACGCTGTCCTATGTACTTA +>EAS56_63:7:34:334:825 +CATTTAAGAAATTACAAAATATAGTTGAAAGCTCT +>EAS56_63:8:138:186:459 +CATTGCAAGACAGACTTCATCAAGATATGTAGTCA +>EAS56_63:8:138:186:459 +GCCAGAAGAGATTGGAGCTAATTTTTGGACTTCTT +>EAS56_63:8:150:508:757 +ATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGT +>EAS56_63:8:150:508:757 +CTTGACACCCAACTAATATTTGTCTGAGCAAAACA +>EAS56_63:8:4:571:820 +AAGAGATATAGATTGGCAGAACAGATTTAAAAACA +>EAS56_63:8:4:571:820 +CAACAACAGTAAAATAAAACAAAGGAGGTCATCAT +>EAS56_63:8:62:125:888 +CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA +>EAS56_63:8:62:125:888 +TGCCAGGAAGATACATTGCAAGACAGACTTCATCA +>EAS56_65:1:163:846:223 +CCAGCTCCCTGTCACCCAATGGACCTGTGATATCT +>EAS56_65:1:163:846:223 +GCCTTTGTTCACATAGACCCCCTTGCAACAACCTT +>EAS56_65:1:178:305:843 +ATGTTTCTTATCTGCACATTACTACCCTGCAATTA +>EAS56_65:1:178:305:843 +CCACGTCTCATCTAGGGGAACAGGGAGGTGCACTA +>EAS56_65:1:23:536:229 +AAAGCATACAGTCATCTATAAAGGAAATCCCATCA +>EAS56_65:1:23:536:229 +AAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCA +>EAS56_65:1:53:272:944 +CAACCCCCTTGCAACAACCTTGCGAACCCCAGGGA +>EAS56_65:1:53:272:944 +TGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAA +>EAS56_65:2:224:579:433 +ATAACAATGGGCTTCTCAGCAGAAACCTTACAAGC +>EAS56_65:2:224:579:433 +TTCATCATCACTAAACCAGTCCTATAAGAAATGCT +>EAS56_65:2:56:155:49 +ATCCTACTAAATACATATGCACCTAACACAAGACT +>EAS56_65:2:56:155:49 +ATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCT +>EAS56_65:3:168:741:680 +AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC +>EAS56_65:3:168:741:680 +TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT +>EAS56_65:3:47:64:359 +TTTTTTTTTTTCTCTCCTCTTTTTTTTTTTTTTTT +>EAS56_65:4:124:367:72 +AGACTACCCAGATTCATAAAACAAATACTACTAGA +>EAS56_65:4:124:367:72 +CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA +>EAS56_65:4:126:966:514 +AGAAGAAGTAGCTATACTTATATCAGATAAAGCAC +>EAS56_65:4:126:966:514 +TAAAAAGGGATTAAATTCCCCCACTTAAGAGATAT +>EAS56_65:4:150:94:843 +AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA +>EAS56_65:4:150:94:843 +CAGATACATCCCACTTTAAATCAACCACAGTAAAA +>EAS56_65:4:296:78:421 +TCTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTT +>EAS56_65:4:296:78:421 +TGTTGGTGTTCGTTTTTTCTCCTGTTTCTTTTTCT +>EAS56_65:5:121:380:656 +AATGTGTTTATTACCAGAGGGATGGAGGGAAGAGG +>EAS56_65:5:121:380:656 +GCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTG +>EAS56_65:5:131:742:561 +TCAAAAGGTGATGTGTGTTCTCATCAACCTCATAC +>EAS56_65:5:131:742:561 +TGCCTTCAGACCCTACACGAATGCGTCTCTACCAC +>EAS56_65:5:211:84:84 +CTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAG +>EAS56_65:5:211:84:84 +GCAAGAGAAAAGCATACAGTCATCTATAAAGGAAA +>EAS56_65:5:262:53:888 +AAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGAT +>EAS56_65:5:262:53:888 +TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA +>EAS56_65:5:278:848:765 +GTACACACGCTGTCCTATGTACTTATCATGACTCT +>EAS56_65:5:278:848:765 +TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG +>EAS56_65:5:299:336:613 +ATCCTACTAAATACATATGCACCTAACACAAGACT +>EAS56_65:5:299:336:613 +TACTCAATATATCCATGTAACAAATCTGCGCTTGT +>EAS56_65:5:30:92:753 +TATAATACCTCTACATGGCTGATTATGAAAACAAT +>EAS56_65:5:30:92:753 +TGGATTCTGGGAAATTCTTCATCCTGGACCCTGAG +>EAS56_65:5:312:985:871 +ATAAAACACATAGCTAAAACTAAAAAAGCAAAAAC +>EAS56_65:5:312:985:871 +TAAGAGGGATGAGAAATTACCTAATTGGTACAATG +>EAS56_65:5:37:611:267 +AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA +>EAS56_65:5:37:611:267 +TATAAAGGAAATCCCATAAGAATAACAATGGGCTT +>EAS56_65:5:75:637:650 +CTGATTATGAAAACAATGTTCCCCAGATACCATCC +>EAS56_65:5:75:637:650 +GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC +>EAS56_65:6:197:759:975 +AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATA +>EAS56_65:6:197:759:975 +AATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTC +>EAS56_65:6:37:610:260 +CAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGT +>EAS56_65:6:37:610:260 +CCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTC +>EAS56_65:6:46:173:214 +AACAATGGGCTTCTCAGCAGAAACCTTACAAGCCA +>EAS56_65:6:46:173:214 +CTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA +>EAS56_65:6:66:257:524 +ATACATGAGATTATTAGGAAATGCTTTACTGTCAT +>EAS56_65:6:66:257:524 +GCCTTCAGACCCTACACGAATGCGTCTCTACCACC +>EAS56_65:6:67:800:450 +TCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGA +>EAS56_65:6:67:800:450 +TTACAAAATATAGTTGAAAGCTCTAACAATAGACT +>EAS56_65:6:82:822:767 +AACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAA +>EAS56_65:6:82:822:767 +TTAGGAAATGCTTTACTGTCATAACTATGAAGAGA +>EAS56_65:7:118:775:467 +AACAGTAAAATAAAACAAAGGAGGTCATCATACAA +>EAS56_65:7:118:775:467 +TGTACAATATTCTGATGATGGTTACACTAAAAGCC +>EAS56_65:7:122:398:994 +GGGATTAAATTCCCCCACTTAAGAGATATAGATTG +>EAS56_65:7:122:398:994 +TAAAAGTCAAAATTAAAGTTCAATACTCACCATCA +>EAS56_65:7:219:40:833 +CCCATACTTTACTGCTACTCAATATATCCATGTAA +>EAS56_65:7:219:40:833 +GGAGGTCATCATACAATGATAAAAAGATCAATTCA +>EAS56_65:7:288:552:440 +AGAGGGAACGCTTTCAACTCTTCTAGCCATTTCTT +>EAS56_65:7:288:552:440 +TGTGGTCTGACAGGCTGCAACTGTGAGCCTTCCAT +>EAS56_65:7:67:692:110 +ATTGCCAGATGAACCACACATTAATACTATGTTTC +>EAS56_65:7:67:692:110 +GTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGA +>EAS56_65:8:117:156:84 +GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA +>EAS56_65:8:117:156:84 +TGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAG +>EAS56_65:8:206:563:262 +ATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCT +>EAS56_65:8:206:563:262 +ATTAGGAAATGCTTTACTGTCATAACTATGAAGAG +>EAS56_65:8:218:173:667 +CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC +>EAS56_65:8:218:173:667 +TAATGATTCCTCAATGTTAAAATGTCTATTTTTGT +>EAS56_65:8:24:415:944 +GTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGC +>EAS56_65:8:24:415:944 +TAGGTAAAAAATTAACATTACAACAGGAACAAAAC +>EAS56_65:8:275:851:240 +CCCCAGAGGGAAAGCTTTCAACGTTTCTAGCCATT +>EAS56_65:8:275:851:240 +GTGGTCTGACAGGCTGCAACTGTGAGCCATCACAA +>EAS56_65:8:317:83:500 +TTTTTTTTTTTTCTTTTCTCCTTTTTTTTTTGTTT +>EAS56_65:8:64:507:478 +TAATTGAAAAATTCATTTAAGAAATTACAAAATAT diff --git a/tests/pysam_data/ex1.fq b/tests/pysam_data/faidx_ex1.fq similarity index 100% rename from tests/pysam_data/ex1.fq rename to tests/pysam_data/faidx_ex1.fq diff --git a/tests/samtools_test.py b/tests/samtools_test.py index f247373..f48d23e 100644 --- a/tests/samtools_test.py +++ b/tests/samtools_test.py @@ -103,9 +103,9 @@ class BinaryTest(unittest.TestCase): ), "fixmate": ( - ("ex1.fixmate", "fixmate ex1.bam ex1.fixmate"), - ("pysam_ex1.fixmate", - (pysam.fixmate, "pysam_ex1.bam pysam_ex1.fixmate")), + ("ex1.fixmate.bam", "fixmate ex1.bam ex1.fixmate.bam"), + ("pysam_ex1.fixmate.bam", + (pysam.fixmate, "pysam_ex1.bam pysam_ex1.fixmate.bam")), ), "flagstat": ( @@ -114,22 +114,24 @@ class BinaryTest(unittest.TestCase): ), "calmd": ( - ("ex1.calmd", "calmd ex1.bam ex1.fa > ex1.calmd"), - ("pysam_ex1.calmd", (pysam.calmd, "pysam_ex1.bam ex1.fa")), + ("ex1.calmd.bam", "calmd ex1.bam ex1.fa > ex1.calmd.bam"), + ("pysam_ex1.calmd.bam", (pysam.calmd, "pysam_ex1.bam ex1.fa")), ), "merge": ( ("ex1.merge", "merge -f ex1.merge ex1.bam ex1.bam"), - # -f option does not work - following command will cause the subsequent - # command to fail + # -f option does not work - following command will + # cause the subsequent command to fail ("pysam_ex1.merge", (pysam.merge, "pysam_ex1.merge pysam_ex1.bam pysam_ex1.bam")), ), "rmdup": ( - ("ex1.rmdup", "rmdup ex1.bam ex1.rmdup"), - ("pysam_ex1.rmdup", - (pysam.rmdup, "pysam_ex1.bam pysam_ex1.rmdup")), + # use -s option, otherwise the following error in samtools 1.2: + # Samtools-htslib-API: bam_get_library() not yet implemented + ("ex1.rmdup.bam", "rmdup -s ex1.bam ex1.rmdup.bam"), + ("pysam_ex1.rmdup.bam", + (pysam.rmdup, "pysam_ex1.bam -s pysam_ex1.rmdup.bam")), ), "reheader": ( @@ -138,8 +140,9 @@ class BinaryTest(unittest.TestCase): ), "cat": ( - ("ex1.cat", "cat ex1.bam ex1.bam > ex1.cat"), - ("pysam_ex1.cat", (pysam.cat, "ex1.bam ex1.bam")), + ("ex1.cat.bam", "cat -o ex1.cat.bam ex1.bam ex1.bam"), + ("pysam_ex1.cat.bam", + (pysam.cat, " -o pysam_ex1.cat.bam ex1.bam ex1.bam")), ), "targetcut": ( @@ -184,26 +187,29 @@ class BinaryTest(unittest.TestCase): # the samtools commands are executed. # The first three (faidx, import, index) need to be in that order, # the rest is arbitrary. - order = ('faidx', 'import', 'index', - # 'pileup1', 'pileup2', deprecated - # 'glfview', deprecated - 'view', 'view2', + order = ('faidx', + 'import', + 'index', + 'view', + 'view2', 'sort', 'mpileup', 'depth', 'idxstats', - # 'fixmate', + 'fixmate', 'flagstat', - # 'calmd', + 'calmd', 'merge', - # 'rmdup', + 'rmdup', 'reheader', 'cat', 'bedcov', 'targetcut', 'phase', - # 'bamshuf', 'bam2fq', + # Segmentation fault: + # 'bamshuf', + # File not binary identical # 'pad2unpad', ) @@ -213,6 +219,7 @@ class BinaryTest(unittest.TestCase): For setup, all commands will be run before the first test is executed. Individual tests will then just compare the output files. + ''' if BinaryTest.first_time: @@ -234,12 +241,12 @@ class BinaryTest(unittest.TestCase): savedir = os.getcwd() os.chdir(WORKDIR) for label in self.order: - # print ("command=", label) + sys.stdout.write("preparing test {}".format(label)) command = self.commands[label] # build samtools command and target and run samtools_target, samtools_command = command[0] runSamtools(" ".join((SAMTOOLS, samtools_command))) - + sys.stdout.write(" samtools ok") # get pysam command and run try: pysam_target, pysam_command = command[1] @@ -248,14 +255,18 @@ class BinaryTest(unittest.TestCase): (label, command, msg)) pysam_method, pysam_options = pysam_command - + try: - output = pysam_method(*pysam_options.split(" "), raw=True) + output = pysam_method(*pysam_options.split(" "), + raw=True, + catch_stdout=True) except pysam.SamtoolsError as msg: raise pysam.SamtoolsError( "error while executing %s: options=%s: msg=%s" % (label, pysam_options, msg)) + sys.stdout.write(" pysam ok\n") + if ">" in samtools_command: with open(pysam_target, "wb") as outfile: if type(output) == list: @@ -309,14 +320,17 @@ class BinaryTest(unittest.TestCase): def testMpileup(self): self.checkCommand("mpileup") + def testCalmd(self): + self.checkCommand("calmd") + def testDepth(self): self.checkCommand("depth") def testIdxstats(self): self.checkCommand("idxstats") - # def testFixmate(self): - # self.checkCommand("fixmate") + def testFixmate(self): + self.checkCommand("fixmate") def testFlagstat(self): self.checkCommand("flagstat") @@ -324,8 +338,8 @@ class BinaryTest(unittest.TestCase): def testMerge(self): self.checkCommand("merge") - # def testRmdup(self): - # self.checkCommand("rmdup") + def testRmdup(self): + self.checkCommand("rmdup") def testReheader(self): self.checkCommand("reheader") @@ -345,32 +359,21 @@ class BinaryTest(unittest.TestCase): def testBedcov(self): self.checkCommand("bedcov") + def testView(self): + self.checkCommand("view") + # def testBamshuf(self): - # self.checkCommand("bamshuf") + # self.checkCommand("bamshuf") # def testPad2Unpad(self): - # self.checkCommand("pad2unpad") - - # def testPileup1( self ): - # self.checkCommand( "pileup1" ) - - # def testPileup2( self ): - # self.checkCommand( "pileup2" ) - - # deprecated - # def testGLFView( self ): - # self.checkCommand( "glfview" ) - - def testView(self): - self.checkCommand("view") + # self.checkCommand("pad2unpad") def testEmptyIndex(self): self.assertRaises(IOError, pysam.index, "exdoesntexist.bam") def __del__(self): if os.path.exists(WORKDIR): - pass - # shutil.rmtree( WORKDIR ) + shutil.rmtree(WORKDIR) class StdoutTest(unittest.TestCase): diff --git a/tests/tabix_test.py b/tests/tabix_test.py index 1ad48ba..961f89a 100644 --- a/tests/tabix_test.py +++ b/tests/tabix_test.py @@ -13,6 +13,8 @@ import pysam import unittest import glob import re +import copy +from TestUtils import checkURL DATADIR = 'tabix_data' @@ -493,6 +495,16 @@ class TestParser(unittest.TestCase): os.unlink(tmpfilename) + def testCopy(self): + a = self.tabix.fetch(parser=pysam.asTuple()).next() + b = copy.copy(a) + self.assertEqual(a, b) + + a = self.tabix.fetch(parser=pysam.asGTF()).next() + b = copy.copy(a) + self.assertEqual(a, b) + + class TestIterators(unittest.TestCase): @@ -924,8 +936,6 @@ for vcf_file in vcf_files: n = "VCFFromVCFTest_%s" % os.path.basename(vcf_file[:-4]) globals()[n] = type(n, (TestVCFFromVCF,), dict(filename=vcf_file,)) -############################################################################ - class TestRemoteFileHTTP(unittest.TestCase): @@ -933,16 +943,28 @@ class TestRemoteFileHTTP(unittest.TestCase): region = "chr1:1-1000" local = os.path.join(DATADIR, "example.gtf.gz") + def setUp(self): + self.remote_file = pysam.TabixFile(self.url, "r") + self.local_file = pysam.TabixFile(self.local, "r") + def testFetchAll(self): - remote_file = pysam.TabixFile(self.url, "r") - remote_result = list(remote_file.fetch()) - local_file = pysam.TabixFile(self.local, "r") - local_result = list(local_file.fetch()) + if not checkURL(self.url): + return + + remote_result = list(self.remote_file.fetch()) + local_result = list(self.local_file.fetch()) self.assertEqual(len(remote_result), len(local_result)) for x, y in zip(remote_result, local_result): self.assertEqual(x, y) + def testHeader(self): + self.assertEqual(list(self.local_file.header), []) + self.assertRaises(AttributeError, + getattr, + self.remote_file, + "header") + class TestIndexArgument(unittest.TestCase): @@ -1019,5 +1041,17 @@ class TestMultipleIterators(unittest.TestCase): self.assertEqual(str(a), str(b)) +class TestContextManager(unittest.TestCase): + + filename = os.path.join(DATADIR, "example.gtf.gz") + + def testManager(self): + + with pysam.TabixFile(self.filename) as tabixfile: + tabixfile.fetch() + self.assertEqual(tabixfile.closed, True) + + + if __name__ == "__main__": unittest.main() -- 2.30.2