From: Charles Plessy Date: Thu, 13 Jan 2011 09:09:25 +0000 (+0900) Subject: Imported Upstream version 0.1.1 X-Git-Tag: archive/raspbian/0.22.0+ds-1+rpi1~1^2^2~470 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=fe45439c5a3ae7f5910085fc73cc7e6c2237af3a;p=python-pysam.git Imported Upstream version 0.1.1 --- diff --git a/MANIFEST.in b/MANIFEST.in index cd0beee..c0ca312 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,4 +11,13 @@ include THANKS include pysam/csamtools.pxd include pysam/pysam_util.h include samtools/*.h +include tests/00README.txt +include tests/Makefile +include tests/ex1.fa +include tests/ex1.sam.gz +include tests/ex3.sam +include tests/ex4.sam +include tests/example.py +include tests/pysam_test.py +include tests/segfault_tests.py diff --git a/PKG-INFO b/PKG-INFO index 7126e3b..174c1a9 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: pysam -Version: 0.1 +Version: 0.1.1 Summary: pysam Home-page: http://code.google.com/p/pysam/ Author: Andreas Heger diff --git a/pysam/csamtools.pyx b/pysam/csamtools.pyx index b469e78..a0bda89 100644 --- a/pysam/csamtools.pyx +++ b/pysam/csamtools.pyx @@ -46,6 +46,9 @@ cdef makeAlignedRead( bam1_t * src): '''enter src into AlignedRead.''' cdef AlignedRead dest dest = AlignedRead() + # destroy dummy delegate created in constructor + # to prevent memory leak. + bam_destroy1(dest._delegate) dest._delegate = bam_dup1(src) return dest @@ -160,8 +163,7 @@ cdef class Samfile: so to open a :term:`BAM` file for reading:: f=Samfile('ex1.bam','rb') - - + For writing, the header of a :term:`TAM` file/:term:`BAM` file can be constituted from several sources: @@ -285,17 +287,13 @@ cdef class Samfile: if self.index == NULL: raise IOError("could not open index `%s` " % filename ) - def _getTarget( self, tid ): + def getrname( self, tid ): '''(tid ) - convert numerical :term:`tid` into target name.''' + convert numerical :term:`tid` into :ref:`reference` name.''' if not 0 <= tid < self.samfile.header.n_targets: raise ValueError( "tid out of range 0<=tid<%i" % self.samfile.header.n_targets ) return self.samfile.header.target_name[tid] - def getNumReferences( self ): - """return the number of :term:`reference` sequences.""" - return self.samfile.header.n_targets - def _parseRegion( self, reference = None, start = None, end = None, region = None ): '''parse region information. @@ -305,6 +303,8 @@ cdef class Samfile: returns a tuple of region, tid, start and end. Region is a valid samtools :term:`region` or None if the region extends over the whole file. + + Note that regions are 1-based, while start,end are python coordinates. ''' cdef int rtid @@ -316,7 +316,7 @@ cdef class Samfile: # translate to a region if reference: if start != None and end != None: - region = "%s:%i-%i" % (reference, start, end) + region = "%s:%i-%i" % (reference, start+1, end) else: region = reference @@ -331,7 +331,9 @@ cdef class Samfile: return region, rtid, rstart, rend def fetch( self, - reference = None, start = None, end = None, + reference = None, + start = None, + end = None, region = None, callback = None, until_eof = False ): @@ -369,14 +371,17 @@ cdef class Samfile: return bam_fetch(self.samfile.x.bam, self.index, rtid, rstart, rend, callback, fetch_callback ) else: if region: - return IteratorRow( self, region ) + return IteratorRow( self, rtid, rstart, rend ) else: if until_eof: return IteratorRowAll( self ) else: # return all targets by chaining the individual targets together. i = [] - for x in self.references: i.append( IteratorRow( self, x)) + rstart = 0 + rend = 1<<29 + for rtid from 0 <= rtid < self.nreferences: + i.append( IteratorRow( self, rtid, rstart, rend)) return itertools.chain( *i ) else: if region != None: @@ -427,11 +432,14 @@ cdef class Samfile: bam_plbuf_destroy(buf) else: if region: - return IteratorColumn( self, region ) + return IteratorColumn( self, rtid, rstart, rend ) else: # return all targets by chaining the individual targets together. i = [] - for x in self.references: i.append( IteratorColumn( self, x)) + rstart = 0 + rend = 1<<29 + for rtid from 0 <= rtid < self.nreferences: + i.append( IteratorColumn( self, rtid, rstart, rend)) return itertools.chain( *i ) else: @@ -439,7 +447,6 @@ cdef class Samfile: def close( self ): '''closes file.''' - if self.samfile != NULL: samclose( self.samfile ) bam_index_destroy(self.index); @@ -458,6 +465,11 @@ cdef class Samfile: ''' return samwrite( self.samfile, read._delegate ) + property nreferences: + '''number of :term:`reference` sequences in the file.''' + def __get__(self): + return self.samfile.header.n_targets + property references: """tuple with the names of :term:`reference` sequences.""" def __get__(self): @@ -608,23 +620,20 @@ cdef class IteratorRow: cdef bam1_t * b cdef error_msg cdef int error_state - def __cinit__(self, Samfile samfile, region ): + cdef Samfile samfile + def __cinit__(self, Samfile samfile, int tid, int beg, int end ): self.bam_iter = NULL assert samfile._isOpen() assert samfile._hasIndex() + + # makes sure that samfile stays alive as long as the + # iterator is alive. + self.samfile = samfile # parse the region - cdef int tid - cdef int beg - cdef int end self.error_state = 0 self.error_msg = None - bam_parse_region( samfile.samfile.header, region, &tid, &beg, &end) - if tid < 0: - self.error_state = 1 - self.error_msg = "invalid region `%s`" % region - return cdef bamFile fp fp = samfile.samfile.x.bam @@ -714,13 +723,14 @@ cdef class IteratorColumn: # check if first iteration cdef int notfirst + # result of the last plbuf_push cdef int n_pu cdef int eof cdef IteratorRow iter - def __cinit__(self, Samfile samfile, region ): + def __cinit__(self, Samfile samfile, int tid, int start, int end ): - self.iter = IteratorRow( samfile, region ) + self.iter = IteratorRow( samfile, tid, start, end ) self.buf = bam_plbuf_init(NULL, NULL ) self.n_pu = 0 self.eof = 0 @@ -729,9 +739,18 @@ cdef class IteratorColumn: return self cdef int cnext(self): + '''perform next iteration. + + return 1 if there is a buffer to emit. Return 0 for end of iteration. + ''' cdef int retval1, retval2 + # pysam bam_plbuf_push returns: + # 1: if buf is full and can be emitted + # 0: if b has been added + # -1: if there was an error + # check if previous plbuf was incomplete. If so, continue within # the loop and yield if necessary if self.n_pu > 0: @@ -744,12 +763,11 @@ cdef class IteratorColumn: # an new column has finished while self.n_pu == 0: retval1 = self.iter.cnext() - # wrap up if no more input if retval1 == 0: self.n_pu = pysam_bam_plbuf_push( NULL, self.buf, 0) self.eof = 1 - return 1 + return self.n_pu # submit to plbuf self.n_pu = pysam_bam_plbuf_push( self.iter.getCurrent(), self.buf, 0) @@ -878,13 +896,17 @@ cdef class AlignedRead: def __get__(self): return self._delegate.core.flag property rname: - """chromosome/target ID""" + """:term:`reference` ID""" def __get__(self): return self._delegate.core.tid property pos: """0-based leftmost coordinate""" def __get__(self): return self._delegate.core.pos + property rlen: + '''length of the read. Returns 0 if not given.''' + def __get__(self): + return self._delegate.core.l_qseq property mapq: """mapping quality""" def __get__(self): @@ -1034,8 +1056,7 @@ cdef class PileupRead: def __str__(self): return "\t".join( map(str, (self.alignment, self.qpos, self.indel, self.level, self.is_del, self.is_head, self.is_tail ) ) ) - - + property alignment: """a :class:`pysam.AlignedRead` object of the aligned read""" def __get__(self): @@ -1052,18 +1073,17 @@ cdef class PileupRead: """1 iff the base on the padded read is a deletion""" def __get__(self): return self._is_del - property head: + property is_head: def __get__(self): return self._is_head - property tail: + property is_tail: def __get__(self): return self._is_tail - - - - - - + property level: + def __get__(self): + return self._level + + class Outs: '''http://mail.python.org/pipermail/python-list/2000-June/038406.html''' def __init__(self, id = 1): diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c index 7f3174b..8d28096 100644 --- a/pysam/pysam_util.c +++ b/pysam/pysam_util.c @@ -138,6 +138,11 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos) return ret; } +// the following code has been taken from bam_plbuf_push +// and modified such that instead of a function call +// the function returns and will continue (if cont is true). +// from where it left off. + // returns // 1: if buf is full and can be emitted // 0: if b has been added diff --git a/setup.py b/setup.py index 7a7ab7e..544e48f 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,9 @@ pysam import os, sys, glob, shutil +name = "pysam" +version = "0.1.1" + samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c" ) samtools_dest = os.path.abspath( "samtools" ) @@ -32,9 +35,6 @@ if len(sys.argv) >= 2 and sys.argv[1] == "import": from distutils.core import setup, Extension from Pyrex.Distutils import build_ext -name = "pysam" -version = "0.1" - classifiers = """ Development Status :: 2 - Alpha Operating System :: MacOS :: MacOS X diff --git a/tests/00README.txt b/tests/00README.txt new file mode 100644 index 0000000..67b8689 --- /dev/null +++ b/tests/00README.txt @@ -0,0 +1,32 @@ +File ex1.fa contains two sequences cut from the human genome +build36. They were exatracted with command: + + samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 + +Sequence names were changed manually for simplicity. File ex1.sam.gz +contains MAQ alignments exatracted with: + + (samtools view NA18507_maq.bam 2:2044001-2045500; + samtools view NA18507_maq.bam 20:68001-69500) + +and processed with `samtools fixmate' to make it self-consistent as a +standalone alignment. + +To try samtools, you may run the following commands: + + samtools faidx ex1.fa # index the reference FASTA + samtools import ex1.fa.fai ex1.sam.gz ex1.bam # SAM->BAM + samtools index ex1.bam # index BAM + samtools tview ex1.bam ex1.fa # view alignment + samtools pileup -cf ex1.fa ex1.bam # pileup and consensus + samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz + +In order for the script pysam_test.py to work, you will need pysam +in your PYTHONPATH. + +In order for the script example.py to work, you will need pysam +in your PYTHONPATH and run + + make all + +beforehand. diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..7de46f5 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,30 @@ +all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz ex2.sam.gz ex2.sam ex1.sam ex3.bam ex3.bam.bai ex4.bam ex4.bam.bai + @echo; echo \# You can now launch the viewer with: \'samtools tview ex1.bam ex1.fa\'; echo; + +ex2.sam.gz: ex1.bam ex1.bam.bai + samtools view -h ex1.bam | gzip > ex2.sam.gz + +ex3.bam: ex3.sam ex1.fa.fai + samtools import ex1.fa.fai ex3.sam ex3.bam + +ex4.bam: ex4.sam ex1.fa.fai + samtools import ex1.fa.fai ex4.sam ex4.bam + +%.sam: %.sam.gz + gunzip < $< > $@ + +ex1.fa.fai:ex1.fa + samtools faidx ex1.fa +ex1.bam:ex1.sam.gz ex1.fa.fai + samtools import ex1.fa.fai ex1.sam.gz ex1.bam +%.bam.bai:%.bam + samtools index $< +ex1.pileup.gz:ex1.bam ex1.fa + samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz +ex1.glf:ex1.bam ex1.fa + samtools pileup -gf ex1.fa ex1.bam > ex1.glf +ex1.glfview.gz:ex1.glf + samtools glfview ex1.glf | gzip > ex1.glfview.gz + +clean: + rm -fr *.bam *.bai *.glf* *.fai *.pileup* *~ calDepth *.dSYM pysam_*.sam ex2.sam ex2.sam.gz ex1.sam diff --git a/tests/ex1.fa b/tests/ex1.fa new file mode 100644 index 0000000..b4ed0cf --- /dev/null +++ b/tests/ex1.fa @@ -0,0 +1,56 @@ +>chr1 +CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT +GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC +GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC +AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC +AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT +CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC +AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC +AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC +ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC +CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT +TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT +TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT +ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA +ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG +TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA +CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG +TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC +TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG +TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG +AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC +TCCCTCGTCTTCTTA +>chr2 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG +CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT +TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT +CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG +GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT +CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT +TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA +AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA +ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA +AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC +TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA +GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT +AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA +AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT +AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT +AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT +ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT +GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG +CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA +GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA +AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA +TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC +CAGAAAAAAATATTTACAGTAACT diff --git a/tests/ex1.sam.gz b/tests/ex1.sam.gz new file mode 100644 index 0000000..8dd2bc4 Binary files /dev/null and b/tests/ex1.sam.gz differ diff --git a/tests/ex3.sam b/tests/ex3.sam new file mode 100644 index 0000000..801d13a --- /dev/null +++ b/tests/ex3.sam @@ -0,0 +1,9 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:1575 +@SQ SN:chr2 LN:1584 +@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 +@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 +@CO this is a comment +@CO this is another comment +read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 +read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 diff --git a/tests/ex4.sam b/tests/ex4.sam new file mode 100644 index 0000000..b2282b8 --- /dev/null +++ b/tests/ex4.sam @@ -0,0 +1,9 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:100 +@SQ SN:chr2 LN:100 +@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 +@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 +@CO this is a comment +@CO this is another comment +read_28833_29006_6945 99 chr1 21 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 +read_28701_28881_323b 147 chr2 21 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 diff --git a/tests/example.py b/tests/example.py new file mode 100644 index 0000000..42e7347 --- /dev/null +++ b/tests/example.py @@ -0,0 +1,119 @@ +import pysam + +samfile = pysam.Samfile( "ex1.bam", "rb" ) + +print "###################" +# check different ways to iterate +print len(list(samfile.fetch())) +print len(list(samfile.fetch( "seq1", 10, 200 ))) +print len(list(samfile.fetch( region="seq1:10-200" ))) +print len(list(samfile.fetch( "seq1" ))) +print len(list(samfile.fetch( region="seq1"))) +print len(list(samfile.fetch( "seq2" ))) +print len(list(samfile.fetch( region="seq2"))) +print len(list(samfile.fetch())) +print len(list(samfile.fetch( "seq1" ))) +print len(list(samfile.fetch( region="seq1"))) +print len(list(samfile.fetch())) + +print len(list(samfile.pileup( "seq1", 10, 200 ))) +print len(list(samfile.pileup( region="seq1:10-200" ))) +print len(list(samfile.pileup( "seq1" ))) +print len(list(samfile.pileup( region="seq1"))) +print len(list(samfile.pileup( "seq2" ))) +print len(list(samfile.pileup( region="seq2"))) +print len(list(samfile.pileup())) +print len(list(samfile.pileup())) + +print "########### fetch with callback ################" +def my_fetch_callback( alignment ): print str(alignment) +samfile.fetch( region="seq1:10-200", callback=my_fetch_callback ) + +print "########## pileup with callback ################" +def my_pileup_callback( pileups ): print str(pileups) +samfile.pileup( region="seq1:10-200", callback=my_pileup_callback ) + +print "##########iterator row #################" +iter = pysam.IteratorRow( samfile, "seq1:10-200") +for x in iter: print str(x) + +print "##########iterator col #################" +iter = pysam.IteratorColumn( samfile, "seq1:10-200") +for x in iter: print str(x) + +print "#########row all##################" +iter = pysam.IteratorRowAll( samfile ) +for x in iter: print str(x) + + +print "###################" + +class Counter: + mCounts = 0 + def __call__(self, alignment): + self.mCounts += 1 + +c = Counter() +samfile.fetch( "seq1:10-200", c ) +print "counts=", c.mCounts + +print samfile.getTarget( 0 ) +print samfile.getTarget( 1 ) + +for p in pysam.pileup( "-c", "ex1.bam" ): + print str(p) + +print pysam.pileup.getMessages() + +for p in pysam.pileup( "-c", "ex1.bam", raw=True ): + print str(p), + + + +print "###########################" + +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) + +print "num targets=", samfile.getNumTargets() + +iter = pysam.IteratorRowAll( samfile ) +for x in iter: print str(x) + +samfile.close() + +print "###########################" +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) +def my_fetch_callback( alignment ): + print str(alignment) + +try: + samfile.fetch( "seq1:10-20", my_fetch_callback ) +except AssertionError: + print "caught fetch exception" + +samfile.close() + +print "###########################" +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) +def my_pileup_callback( pileups ): + print str(pileups) +try: + samfile.pileup( "seq1:10-20", my_pileup_callback ) +except NotImplementedError: + print "caught pileup exception" + +# playing arount with headers +samfile = pysam.Samfile( "ex3.sam", "r" ) +print samfile.targets +print samfile.lengths +print samfile.text +print samdile.header +header = samfile.header +samfile.close() + +header["HD"]["SO"] = "unsorted" +outfile = pysam.Samfile( "out.sam", "wh", + header = header ) + +outfile.close() + diff --git a/tests/pysam_test.py b/tests/pysam_test.py new file mode 100755 index 0000000..2bbe8a3 --- /dev/null +++ b/tests/pysam_test.py @@ -0,0 +1,551 @@ +#!/usr/bin/env python +'''unit testing code for pysam.''' + +import pysam +import unittest +import os +import itertools +import subprocess +import shutil + +def checkBinaryEqual( filename1, filename2 ): + '''return true if the two files are binary equal.''' + if os.path.getsize( filename1 ) != os.path.getsize( filename2 ): + return False + + infile1 = open(filename1, "rb") + infile2 = open(filename2, "rb") + + def chariter( infile ): + while 1: + c = infile.read(1) + if c == "": break + yield c + + found = False + for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ): + if c1 != c2: break + else: + found = True + + infile1.close() + infile2.close() + return found + +def runSamtools( cmd ): + '''run a samtools command''' + + try: + retcode = subprocess.call(cmd, shell=True) + if retcode < 0: + print >>sys.stderr, "Child was terminated by signal", -retcode + except OSError, e: + print >>sys.stderr, "Execution failed:", e + +class BinaryTest(unittest.TestCase): + '''test samtools command line commands and compare + against pysam commands. + + Tests fail, if the output is not binary identical. + ''' + + first_time = True + + # a list of commands to test + mCommands = \ + { "faidx" : \ + ( + ("ex1.fa.fai", "samtools faidx ex1.fa"), + ("pysam_ex1.fa.fai", (pysam.faidx, "ex1.fa") ), + ), + "import" : + ( + ("ex1.bam", "samtools import ex1.fa.fai ex1.sam.gz ex1.bam" ), + ("pysam_ex1.bam", (pysam.samimport, "ex1.fa.fai ex1.sam.gz pysam_ex1.bam") ), + ), + "index": + ( + ("ex1.bam.bai", "samtools index ex1.bam" ), + ("pysam_ex1.bam.bai", (pysam.index, "pysam_ex1.bam" ) ), + ), + "pileup1" : + ( + ("ex1.pileup", "samtools pileup -cf ex1.fa ex1.bam > ex1.pileup" ), + ("pysam_ex1.pileup", (pysam.pileup, "-c -f ex1.fa ex1.bam" ) ) + ), + "pileup2" : + ( + ("ex1.glf", "samtools pileup -gf ex1.fa ex1.bam > ex1.glf" ), + ("pysam_ex1.glf", (pysam.pileup, "-g -f ex1.fa ex1.bam" ) ) + ), + "glfview" : + ( + ("ex1.glfview", "samtools glfview ex1.glf > ex1.glfview"), + ("pysam_ex1.glfview", (pysam.glfview, "ex1.glf" ) ), + ), + "view" : + ( + ("ex1.view", "samtools view ex1.bam > ex1.view"), + ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ), + ), + } + + # some tests depend on others. The order specifies in which order + # the samtools commands are executed. + mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view' ) + + def setUp( self ): + '''setup tests. + + For setup, all commands will be run before the first test is + executed. Individual tests will then just compare the output + files. + ''' + + if BinaryTest.first_time: + # copy the source + shutil.copy( "ex1.fa", "pysam_ex1.fa" ) + + for label in self.mOrder: + command = self.mCommands[label] + samtools_target, samtools_command = command[0] + pysam_target, pysam_command = command[1] + runSamtools( samtools_command ) + pysam_method, pysam_options = pysam_command + output = pysam_method( *pysam_options.split(" "), raw=True) + if ">" in samtools_command: + outfile = open( pysam_target, "w" ) + for line in output: outfile.write( line ) + outfile.close() + + BinaryTest.first_time = False + + def checkCommand( self, command ): + if command: + samtools_target, pysam_target = self.mCommands[command][0][0], self.mCommands[command][1][0] + self.assertTrue( checkBinaryEqual( samtools_target, pysam_target ), + "%s failed: files %s and %s are not the same" % (command, samtools_target, pysam_target) ) + + def testImport( self ): + self.checkCommand( "import" ) + + def testIndex( self ): + self.checkCommand( "index" ) + + def testPileup1( self ): + self.checkCommand( "pileup1" ) + + def testPileup2( self ): + self.checkCommand( "pileup2" ) + + def testGLFView( self ): + self.checkCommand( "glfview" ) + + def testView( self ): + self.checkCommand( "view" ) + + def __del__(self): + + for label, command in self.mCommands.iteritems(): + samtools_target, samtools_command = command[0] + pysam_target, pysam_command = command[1] + if os.path.exists( samtools_target): os.remove( samtools_target ) + if os.path.exists( pysam_target): os.remove( pysam_target ) + if os.path.exists( "pysam_ex1.fa" ): os.remove( "pysam_ex1.fa" ) + +class IOTest(unittest.TestCase): + '''check if reading samfile and writing a samfile are consistent.''' + + def checkEcho( self, input_filename, reference_filename, + output_filename, + input_mode, output_mode, use_template = True): + '''iterate through *input_filename* writing to *output_filename* and + comparing the output to *reference_filename*. + + The files are opened according to the *input_mode* and *output_mode*. + + If *use_template* is set, the header is copied from infile using the + template mechanism, otherwise target names and lengths are passed explicitely. + ''' + + infile = pysam.Samfile( input_filename, input_mode ) + if use_template: + outfile = pysam.Samfile( output_filename, output_mode, template = infile ) + else: + outfile = pysam.Samfile( output_filename, output_mode, + referencenames = infile.references, + referencelengths = infile.lengths ) + + iter = infile.fetch() + for x in iter: outfile.write( x ) + infile.close() + outfile.close() + + self.assertTrue( checkBinaryEqual( reference_filename, output_filename), + "files %s and %s are not the same" % (reference_filename, output_filename) ) + + def testReadWriteBam( self ): + + input_filename = "ex1.bam" + output_filename = "pysam_ex1.bam" + reference_filename = "ex1.bam" + + self.checkEcho( input_filename, reference_filename, output_filename, + "rb", "wb" ) + + def testReadWriteBamWithTargetNames( self ): + + input_filename = "ex1.bam" + output_filename = "pysam_ex1.bam" + reference_filename = "ex1.bam" + + self.checkEcho( input_filename, reference_filename, output_filename, + "rb", "wb", use_template = False ) + + def testReadWriteSamWithHeader( self ): + + input_filename = "ex2.sam" + output_filename = "pysam_ex2.sam" + reference_filename = "ex2.sam" + + self.checkEcho( input_filename, reference_filename, output_filename, + "r", "wh" ) + + def testReadWriteSamWithoutHeader( self ): + + input_filename = "ex2.sam" + output_filename = "pysam_ex2.sam" + reference_filename = "ex1.sam" + + self.checkEcho( input_filename, reference_filename, output_filename, + "r", "w" ) + +class TestIteratorRow(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def checkRange( self, rnge ): + '''compare results from iterator with those from samtools.''' + ps = list(self.samfile.fetch(region=rnge)) + sa = list(pysam.view( "ex1.bam", rnge , raw = True) ) + self.assertEqual( len(ps), len(sa), "unequal number of results for range %s: %i != %i" % (rnge, len(ps), len(sa) )) + # check if the same reads are returned and in the same order + for line, pair in enumerate( zip( ps, sa ) ): + data = pair[1].split("\t") + self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) ) + + def testIteratePerContig(self): + '''check random access per contig''' + for contig in self.samfile.references: + self.checkRange( contig ) + + def testIterateRanges(self): + '''check random access per range''' + for contig, length in zip(self.samfile.references, self.samfile.lengths): + for start in range( 1, length, 90): + self.checkRange( "%s:%i-%i" % (contig, start, start + 90) ) # this includes empty ranges + + def tearDown(self): + self.samfile.close() + +class TestIteratorRowAll(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def testIterate(self): + '''compare results from iterator with those from samtools.''' + ps = list(self.samfile.fetch()) + sa = list(pysam.view( "ex1.bam", raw = True) ) + self.assertEqual( len(ps), len(sa), "unequal number of results: %i != %i" % (len(ps), len(sa) )) + # check if the same reads are returned + for line, pair in enumerate( zip( ps, sa ) ): + data = pair[1].split("\t") + self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) ) + + def tearDown(self): + self.samfile.close() + +class TestIteratorColumn(unittest.TestCase): + '''test iterator column against contents of ex3.bam.''' + + # note that samfile contains 1-based coordinates + # 1D means deletion with respect to reference sequence + # + mCoverages = { 'chr1' : [ 0 ] * 20 + [1] * 36 + [0] * (100 - 20 -35 ), + 'chr2' : [ 0 ] * 20 + [1] * 35 + [0] * (100 - 20 -35 ), + } + + def setUp(self): + self.samfile=pysam.Samfile( "ex4.bam","rb" ) + + def checkRange( self, rnge ): + '''compare results from iterator with those from samtools.''' + # check if the same reads are returned and in the same order + for column in self.samfile.pileup(region=rnge): + thiscov = len(column.pileups) + refcov = self.mCoverages[self.samfile.getrname(column.tid)][column.pos] + self.assertEqual( thiscov, refcov, "wrong coverage at pos %s:%i %i should be %i" % (self.samfile.getrname(column.tid), column.pos, thiscov, refcov)) + + def testIterateAll(self): + '''check random access per contig''' + self.checkRange( None ) + + def testIteratePerContig(self): + '''check random access per contig''' + for contig in self.samfile.references: + self.checkRange( contig ) + + def testIterateRanges(self): + '''check random access per range''' + for contig, length in zip(self.samfile.references, self.samfile.lengths): + for start in range( 1, length, 90): + self.checkRange( "%s:%i-%i" % (contig, start, start + 90) ) # this includes empty ranges + + def testInverse( self ): + '''test the inverse, is point-wise pileup accurate.''' + for contig, refseq in self.mCoverages.items(): + refcolumns = sum(refseq) + for pos, refcov in enumerate( refseq ): + columns = list(self.samfile.pileup( contig, pos, pos+1) ) + if refcov == 0: + # if no read, no coverage + self.assertEqual( len(columns), refcov, "wrong number of pileup columns returned for position %s:%i, %i should be %i" %(contig,pos,len(columns), refcov) ) + elif refcov == 1: + # one read, all columns of the read are returned + self.assertEqual( len(columns), refcolumns, "pileup incomplete - %i should be %i " % (len(columns), refcolumns)) + + def tearDown(self): + self.samfile.close() + + +class TestAlignedReadBam(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex3.bam","rb" ) + self.reads=list(self.samfile.fetch()) + + def testARqname(self): + self.assertEqual( self.reads[0].qname, "read_28833_29006_6945", "read name mismatch in read 1: %s != %s" % (self.reads[0].qname, "read_28833_29006_6945") ) + self.assertEqual( self.reads[1].qname, "read_28701_28881_323b", "read name mismatch in read 2: %s != %s" % (self.reads[1].qname, "read_28701_28881_323b") ) + + def testARflag(self): + self.assertEqual( self.reads[0].flag, 99, "flag mismatch in read 1: %s != %s" % (self.reads[0].flag, 99) ) + self.assertEqual( self.reads[1].flag, 147, "flag mismatch in read 2: %s != %s" % (self.reads[1].flag, 147) ) + + def testARrname(self): + self.assertEqual( self.reads[0].rname, 0, "chromosome/target id mismatch in read 1: %s != %s" % (self.reads[0].rname, 0) ) + self.assertEqual( self.reads[1].rname, 1, "chromosome/target id mismatch in read 2: %s != %s" % (self.reads[1].rname, 1) ) + + def testARpos(self): + self.assertEqual( self.reads[0].pos, 33-1, "mapping position mismatch in read 1: %s != %s" % (self.reads[0].pos, 33-1) ) + self.assertEqual( self.reads[1].pos, 88-1, "mapping position mismatch in read 2: %s != %s" % (self.reads[1].pos, 88-1) ) + + def testARmapq(self): + self.assertEqual( self.reads[0].mapq, 20, "mapping quality mismatch in read 1: %s != %s" % (self.reads[0].mapq, 20) ) + self.assertEqual( self.reads[1].mapq, 30, "mapping quality mismatch in read 2: %s != %s" % (self.reads[1].mapq, 30) ) + + def testARcigar(self): + self.assertEqual( self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)], "read name length mismatch in read 1: %s != %s" % (self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)]) ) + self.assertEqual( self.reads[1].cigar, [(0, 35)], "read name length mismatch in read 2: %s != %s" % (self.reads[1].cigar, [(0, 35)]) ) + + def testARmrnm(self): + self.assertEqual( self.reads[0].mrnm, 0, "mate reference sequence name mismatch in read 1: %s != %s" % (self.reads[0].mrnm, 0) ) + self.assertEqual( self.reads[1].mrnm, 1, "mate reference sequence name mismatch in read 2: %s != %s" % (self.reads[1].mrnm, 1) ) + + def testARmpos(self): + self.assertEqual( self.reads[0].mpos, 200-1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].mpos, 200-1) ) + self.assertEqual( self.reads[1].mpos, 500-1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].mpos, 500-1) ) + + def testARisize(self): + self.assertEqual( self.reads[0].isize, 167, "insert size mismatch in read 1: %s != %s" % (self.reads[0].isize, 167) ) + self.assertEqual( self.reads[1].isize, 412, "insert size mismatch in read 2: %s != %s" % (self.reads[1].isize, 412) ) + + def testARseq(self): + self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) + self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) + + def testARqual(self): + self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) + self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) + + def testPresentOptionalFields(self): + self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) ) + self.assertEqual( self.reads[0].opt('RG'), 'L1', "optional field mismatch in read 1, RG: %s != %s" % (self.reads[0].opt('RG'), 'L1') ) + self.assertEqual( self.reads[1].opt('RG'), 'L2', "optional field mismatch in read 2, RG: %s != %s" % (self.reads[1].opt('RG'), 'L2') ) + self.assertEqual( self.reads[1].opt('MF'), 18, "optional field mismatch in read 2, MF: %s != %s" % (self.reads[1].opt('MF'), 18) ) + + def testPairedBools(self): + self.assertEqual( self.reads[0].is_paired, True, "is paired mismatch in read 1: %s != %s" % (self.reads[0].is_paired, True) ) + self.assertEqual( self.reads[1].is_paired, True, "is paired mismatch in read 2: %s != %s" % (self.reads[1].is_paired, True) ) + self.assertEqual( self.reads[0].is_proper_pair, True, "is proper pair mismatch in read 1: %s != %s" % (self.reads[0].is_proper_pair, True) ) + self.assertEqual( self.reads[1].is_proper_pair, True, "is proper pair mismatch in read 2: %s != %s" % (self.reads[1].is_proper_pair, True) ) + + def tearDown(self): + self.samfile.close() + +class TestAlignedReadSam(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex3.sam","r" ) + self.reads=list(self.samfile.fetch()) + + def testARqname(self): + self.assertEqual( self.reads[0].qname, "read_28833_29006_6945", "read name mismatch in read 1: %s != %s" % (self.reads[0].qname, "read_28833_29006_6945") ) + self.assertEqual( self.reads[1].qname, "read_28701_28881_323b", "read name mismatch in read 2: %s != %s" % (self.reads[1].qname, "read_28701_28881_323b") ) + + def testARflag(self): + self.assertEqual( self.reads[0].flag, 99, "flag mismatch in read 1: %s != %s" % (self.reads[0].flag, 99) ) + self.assertEqual( self.reads[1].flag, 147, "flag mismatch in read 2: %s != %s" % (self.reads[1].flag, 147) ) + + def testARrname(self): + self.assertEqual( self.reads[0].rname, 0, "chromosome/target id mismatch in read 1: %s != %s" % (self.reads[0].rname, 0) ) + self.assertEqual( self.reads[1].rname, 1, "chromosome/target id mismatch in read 2: %s != %s" % (self.reads[1].rname, 1) ) + + def testARpos(self): + self.assertEqual( self.reads[0].pos, 33-1, "mapping position mismatch in read 1: %s != %s" % (self.reads[0].pos, 33-1) ) + self.assertEqual( self.reads[1].pos, 88-1, "mapping position mismatch in read 2: %s != %s" % (self.reads[1].pos, 88-1) ) + + def testARmapq(self): + self.assertEqual( self.reads[0].mapq, 20, "mapping quality mismatch in read 1: %s != %s" % (self.reads[0].mapq, 20) ) + self.assertEqual( self.reads[1].mapq, 30, "mapping quality mismatch in read 2: %s != %s" % (self.reads[1].mapq, 30) ) + + def testARcigar(self): + self.assertEqual( self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)], "read name length mismatch in read 1: %s != %s" % (self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)]) ) + self.assertEqual( self.reads[1].cigar, [(0, 35)], "read name length mismatch in read 2: %s != %s" % (self.reads[1].cigar, [(0, 35)]) ) + + def testARmrnm(self): + self.assertEqual( self.reads[0].mrnm, 0, "mate reference sequence name mismatch in read 1: %s != %s" % (self.reads[0].mrnm, 0) ) + self.assertEqual( self.reads[1].mrnm, 1, "mate reference sequence name mismatch in read 2: %s != %s" % (self.reads[1].mrnm, 1) ) + + def testARmpos(self): + self.assertEqual( self.reads[0].mpos, 200-1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].mpos, 200-1) ) + self.assertEqual( self.reads[1].mpos, 500-1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].mpos, 500-1) ) + + def testARisize(self): + self.assertEqual( self.reads[0].isize, 167, "insert size mismatch in read 1: %s != %s" % (self.reads[0].isize, 167) ) + self.assertEqual( self.reads[1].isize, 412, "insert size mismatch in read 2: %s != %s" % (self.reads[1].isize, 412) ) + + def testARseq(self): + self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) + self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) + + def testARqual(self): + self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) + self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) + + def testPresentOptionalFields(self): + self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) ) + self.assertEqual( self.reads[0].opt('RG'), 'L1', "optional field mismatch in read 1, RG: %s != %s" % (self.reads[0].opt('RG'), 'L1') ) + self.assertEqual( self.reads[1].opt('RG'), 'L2', "optional field mismatch in read 2, RG: %s != %s" % (self.reads[1].opt('RG'), 'L2') ) + self.assertEqual( self.reads[1].opt('MF'), 18, "optional field mismatch in read 2, MF: %s != %s" % (self.reads[1].opt('MF'), 18) ) + + def testPairedBools(self): + self.assertEqual( self.reads[0].is_paired, True, "is paired mismatch in read 1: %s != %s" % (self.reads[0].is_paired, True) ) + self.assertEqual( self.reads[1].is_paired, True, "is paired mismatch in read 2: %s != %s" % (self.reads[1].is_paired, True) ) + self.assertEqual( self.reads[0].is_proper_pair, True, "is proper pair mismatch in read 1: %s != %s" % (self.reads[0].is_proper_pair, True) ) + self.assertEqual( self.reads[1].is_proper_pair, True, "is proper pair mismatch in read 2: %s != %s" % (self.reads[1].is_proper_pair, True) ) + + def tearDown(self): + self.samfile.close() + +class TestHeaderSam(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex3.sam","r" ) + + def testHeaders(self): + self.assertEqual( self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}, "mismatch in headers: %s != %s" % (self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}) ) + + def tearDown(self): + self.samfile.close() + +class TestHeaderBam(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex3.sam","r" ) + + def testHeaders(self): + self.assertEqual( self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}, "mismatch in headers: %s != %s" % (self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}) ) + + def tearDown(self): + self.samfile.close() + +class TestPileupObjects(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def testPileupColumn(self): + for pcolumn1 in self.samfile.pileup( region="chr1:105" ): + if pcolumn1.pos == 104: + self.assertEqual( pcolumn1.tid, 0, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn1.tid, 0) ) + self.assertEqual( pcolumn1.pos, 105-1, "position mismatch in position 1: %s != %s" % (pcolumn1.pos, 105-1) ) + self.assertEqual( pcolumn1.n, 2, "# reads mismatch in position 1: %s != %s" % (pcolumn1.n, 2) ) + for pcolumn2 in self.samfile.pileup( region="chr2:1480" ): + if pcolumn2.pos == 1479: + self.assertEqual( pcolumn2.tid, 1, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn2.tid, 1) ) + self.assertEqual( pcolumn2.pos, 1480-1, "position mismatch in position 1: %s != %s" % (pcolumn2.pos, 1480-1) ) + self.assertEqual( pcolumn2.n, 12, "# reads mismatch in position 1: %s != %s" % (pcolumn2.n, 12) ) + + def testPileupRead(self): + for pcolumn1 in self.samfile.pileup( region="chr1:105" ): + if pcolumn1.pos == 104: + self.assertEqual( len(pcolumn1.pileups), 2, "# reads aligned to column mismatch in position 1: %s != %s" % (len(pcolumn1.pileups), 2) ) +# self.assertEqual( pcolumn1.pileups[0] # need to test additional properties here + + def tearDown(self): + self.samfile.close() + +class TestExceptions(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def testBadFile(self): + self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.bam", "rb" ) + self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.sam","r" ) + self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.bam", "r" ) + self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.sam","rb" ) + + def testBadContig(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr88" ) + + def testMeaninglessCrap(self): + self.assertRaises( ValueError, self.samfile.fetch, "skljf" ) + + def testBackwardsOrderNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, 'chr1', 100, 10 ) + + def testBackwardsOrderOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, region="chr1:100-10") + + def testOutOfRangeNegativeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 ) + + def testOutOfRangeNegativeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5-10" ) + self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5-0" ) + self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5--10" ) + + def testOutOfRangNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 9999999999, 99999999999 ) + + def testOutOfRangeLargeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 9999999999999999999999999999999, 9999999999999999999999999999999999999999 ) + + def testOutOfRangeLargeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" ) + + def tearDown(self): + self.samfile.close() + +# TODOS +# 1. finish testing all properties within pileup objects +# 2. check exceptions and bad input problems (missing files, optional fields that aren't present, etc...) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/segfault_tests.py b/tests/segfault_tests.py new file mode 100755 index 0000000..ff32fec --- /dev/null +++ b/tests/segfault_tests.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +'''unit testing code for pysam.''' + +import pysam +import unittest +import os +import itertools +import subprocess +import shutil + +class TestExceptions(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def testOutOfRangeNegativeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 ) + + def testOutOfRangeNegativeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-10" ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-0" ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5--10" ) + + def testOutOfRangeLargeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 99999999999999999, 999999999999999999 ) + + def testOutOfRangeLargeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" ) + + def tearDown(self): + self.samfile.close() + +if __name__ == "__main__": + unittest.main() +