Imported Upstream version 0.1.1

author Charles Plessy <plessy@debian.org>

Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)

committer Charles Plessy <plessy@debian.org>

Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)
author Charles Plessy <plessy@debian.org>
Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)
committer Charles Plessy <plessy@debian.org>
Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)
diff --git a/MANIFEST.in b/MANIFEST.in

index cd0beee257d320b0bc4f35875461d69ac5b4c623..c0ca312768cb8a748b9c4808ff3c4111aa35704d 100644 (file)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -11,4 +11,13 @@ include THANKS
  include pysam/csamtools.pxd
  include pysam/pysam_util.h
  include samtools/*.h
+include tests/00README.txt
+include tests/Makefile
+include tests/ex1.fa
+include tests/ex1.sam.gz
+include tests/ex3.sam
+include tests/ex4.sam
+include tests/example.py
+include tests/pysam_test.py
+include tests/segfault_tests.py
  
diff --git a/PKG-INFO b/PKG-INFO

index 7126e3bbd7bf619c486fed034ea75e4290ba4167..174c1a9bfc0eadf3a237ba9be9bb53805c2e1f3d 100644 (file)
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
  Metadata-Version: 1.0
  Name: pysam
-Version: 0.1
+Version: 0.1.1
  Summary: pysam
  Home-page: http://code.google.com/p/pysam/
  Author: Andreas Heger
diff --git a/pysam/csamtools.pyx b/pysam/csamtools.pyx

index b469e784473120fe4a57a798ac78900c8b26986c..a0bda8982b9b0ceaa68ef04eb078df80166a9e99 100644 (file)
--- a/pysam/csamtools.pyx
+++ b/pysam/csamtools.pyx
@@ -46,6 +46,9 @@ cdef makeAlignedRead( bam1_t * src):
      '''enter src into AlignedRead.'''
      cdef AlignedRead dest
      dest = AlignedRead()
+    # destroy dummy delegate created in constructor
+    # to prevent memory leak.
+    bam_destroy1(dest._delegate)
      dest._delegate = bam_dup1(src)
      return dest
  
@@ -160,8 +163,7 @@ cdef class Samfile:
      so to open a :term:`BAM` file for reading::
  
          f=Samfile('ex1.bam','rb')
-    
-    
+
  
      For writing, the header of a :term:`TAM` file/:term:`BAM` file can be constituted from several
      sources:
@@ -285,17 +287,13 @@ cdef class Samfile:
              if self.index == NULL:
                  raise IOError("could not open index `%s` " % filename )
  
-    def _getTarget( self, tid ):
+    def getrname( self, tid ):
          '''(tid )
-        convert numerical :term:`tid` into target name.'''
+        convert numerical :term:`tid` into :ref:`reference` name.'''
          if not 0 <= tid < self.samfile.header.n_targets:
              raise ValueError( "tid out of range 0<=tid<%i" % self.samfile.header.n_targets )
          return self.samfile.header.target_name[tid]
  
-    def getNumReferences( self ):
-        """return the number of :term:`reference` sequences."""
-        return self.samfile.header.n_targets
-
      def _parseRegion( self, reference = None, start = None, end = None, 
                         region = None ):
          '''parse region information.
@@ -305,6 +303,8 @@ cdef class Samfile:
          returns a tuple of region, tid, start and end. Region
          is a valid samtools :term:`region` or None if the region
          extends over the whole file.
+
+        Note that regions are 1-based, while start,end are python coordinates.
          '''
          
          cdef int rtid
@@ -316,7 +316,7 @@ cdef class Samfile:
          # translate to a region
          if reference:
              if start != None and end != None:
-                region = "%s:%i-%i" % (reference, start, end)
+                region = "%s:%i-%i" % (reference, start+1, end)
              else:
                  region = reference
  
@@ -331,7 +331,9 @@ cdef class Samfile:
          return region, rtid, rstart, rend
  
      def fetch( self, 
-               reference = None, start = None, end = None, 
+               reference = None, 
+               start = None, 
+               end = None, 
                 region = None, 
                 callback = None,
                 until_eof = False ):
@@ -369,14 +371,17 @@ cdef class Samfile:
                  return bam_fetch(self.samfile.x.bam, self.index, rtid, rstart, rend, <void*>callback, fetch_callback )
              else:
                  if region:
-                    return IteratorRow( self, region )
+                    return IteratorRow( self, rtid, rstart, rend )
                  else:
                      if until_eof:
                          return IteratorRowAll( self )
                      else:
                          # return all targets by chaining the individual targets together.
                          i = []
-                        for x in self.references: i.append( IteratorRow( self, x))
+                        rstart = 0
+                        rend = 1<<29
+                        for rtid from 0 <= rtid < self.nreferences: 
+                            i.append( IteratorRow( self, rtid, rstart, rend))
                          return itertools.chain( *i )
          else:                    
              if region != None:
@@ -427,11 +432,14 @@ cdef class Samfile:
                  bam_plbuf_destroy(buf)
              else:
                  if region:
-                    return IteratorColumn( self, region )
+                    return IteratorColumn( self, rtid, rstart, rend )
                  else:
                      # return all targets by chaining the individual targets together.
                      i = []
-                    for x in self.references: i.append( IteratorColumn( self, x))
+                    rstart = 0
+                    rend = 1<<29
+                    for rtid from 0 <= rtid < self.nreferences: 
+                        i.append( IteratorColumn( self, rtid, rstart, rend))
                      return itertools.chain( *i )
  
          else:
@@ -439,7 +447,6 @@ cdef class Samfile:
  
      def close( self ):
          '''closes file.'''
-        
          if self.samfile != NULL:
              samclose( self.samfile )
              bam_index_destroy(self.index);
@@ -458,6 +465,11 @@ cdef class Samfile:
          '''
          return samwrite( self.samfile, read._delegate )
  
+    property nreferences:
+        '''number of :term:`reference` sequences in the file.'''
+        def __get__(self):
+            return self.samfile.header.n_targets
+
      property references:
          """tuple with the names of :term:`reference` sequences."""
          def __get__(self): 
@@ -608,23 +620,20 @@ cdef class IteratorRow:
      cdef bam1_t *               b
      cdef                        error_msg
      cdef int                    error_state
-    def __cinit__(self, Samfile samfile, region ):
+    cdef Samfile                samfile
+    def __cinit__(self, Samfile samfile, int tid, int beg, int end ):
          self.bam_iter = NULL
  
          assert samfile._isOpen()
          assert samfile._hasIndex()
+        
+        # makes sure that samfile stays alive as long as the
+        # iterator is alive.
+        self.samfile = samfile
  
          # parse the region
-        cdef int      tid
-        cdef int      beg
-        cdef int      end
          self.error_state = 0
          self.error_msg = None
-        bam_parse_region( samfile.samfile.header, region, &tid, &beg, &end)
-        if tid < 0: 
-            self.error_state = 1
-            self.error_msg = "invalid region `%s`" % region
-            return
  
          cdef bamFile  fp
          fp = samfile.samfile.x.bam
@@ -714,13 +723,14 @@ cdef class IteratorColumn:
  
      # check if first iteration
      cdef int notfirst
+    # result of the last plbuf_push
      cdef int n_pu
      cdef int eof 
      cdef IteratorRow iter
  
-    def __cinit__(self, Samfile samfile, region ):
+    def __cinit__(self, Samfile samfile, int tid, int start, int end ):
  
-        self.iter = IteratorRow( samfile, region )
+        self.iter = IteratorRow( samfile, tid, start, end )
          self.buf = bam_plbuf_init(NULL, NULL )
          self.n_pu = 0
          self.eof = 0
@@ -729,9 +739,18 @@ cdef class IteratorColumn:
          return self 
  
      cdef int cnext(self):
+        '''perform next iteration.
+        
+        return 1 if there is a buffer to emit. Return 0 for end of iteration.
+        '''
  
          cdef int retval1, retval2
  
+        # pysam bam_plbuf_push returns:
+        # 1: if buf is full and can be emitted
+        # 0: if b has been added
+        # -1: if there was an error
+
          # check if previous plbuf was incomplete. If so, continue within
          # the loop and yield if necessary
          if self.n_pu > 0:
@@ -744,12 +763,11 @@ cdef class IteratorColumn:
          # an new column has finished
          while self.n_pu == 0:
              retval1 = self.iter.cnext()
-
              # wrap up if no more input
              if retval1 == 0: 
                  self.n_pu = pysam_bam_plbuf_push( NULL, self.buf, 0)            
                  self.eof = 1
-                return 1
+                return self.n_pu
  
              # submit to plbuf
              self.n_pu = pysam_bam_plbuf_push( self.iter.getCurrent(), self.buf, 0)            
@@ -878,13 +896,17 @@ cdef class AlignedRead:
          def __get__(self): 
              return self._delegate.core.flag
      property rname: 
-        """chromosome/target ID"""
+        """:term:`reference` ID"""
          def __get__(self): 
              return self._delegate.core.tid
      property pos: 
          """0-based leftmost coordinate"""
          def __get__(self): 
              return self._delegate.core.pos
+    property rlen:
+        '''length of the read. Returns 0 if not given.'''
+        def __get__(self): 
+            return self._delegate.core.l_qseq
      property mapq: 
          """mapping quality"""
          def __get__(self): 
@@ -1034,8 +1056,7 @@ cdef class PileupRead:
  
      def __str__(self):
          return "\t".join( map(str, (self.alignment, self.qpos, self.indel, self.level, self.is_del, self.is_head, self.is_tail ) ) )
-   
-    
+       
      property alignment:
          """a :class:`pysam.AlignedRead` object of the aligned read"""
          def __get__(self):
@@ -1052,18 +1073,17 @@ cdef class PileupRead:
          """1 iff the base on the padded read is a deletion"""
          def __get__(self):
              return self._is_del
-    property head:
+    property is_head:
          def __get__(self):
              return self._is_head
-    property tail:
+    property is_tail:
          def __get__(self):
              return self._is_tail
-    
-        
-        
-    
-            
-            
+    property level:
+        def __get__(self):
+            return self._level
+
+
  class Outs:
      '''http://mail.python.org/pipermail/python-list/2000-June/038406.html'''
      def __init__(self, id = 1):
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c

index 7f3174b6a0d6aeb98624e641fbc2d4861e35dfc0..8d28096d795c808d157a39219cb9fb00bfb31415 100644 (file)
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -138,6 +138,11 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
         return ret;
  }
  
+// the following code has been taken from bam_plbuf_push
+// and modified such that instead of a function call
+// the function returns and will continue (if cont is true).
+// from where it left off.
+
  // returns
  // 1: if buf is full and can be emitted
  // 0: if b has been added
diff --git a/setup.py b/setup.py

index 7a7ab7ead06fdfc44d4c55922426bb59a511886d..544e48fc13cc96fa9b25e9a901a8cbfbb939cb7e 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -8,6 +8,9 @@ pysam
  
  import os, sys, glob, shutil
  
+name = "pysam"
+version = "0.1.1"
+
  samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c" )
  samtools_dest = os.path.abspath( "samtools" )
  
@@ -32,9 +35,6 @@ if len(sys.argv) >= 2 and sys.argv[1] == "import":
  from distutils.core import setup, Extension
  from Pyrex.Distutils import build_ext
  
-name = "pysam"
-version = "0.1"
-
  classifiers = """
  Development Status :: 2 - Alpha
  Operating System :: MacOS :: MacOS X
diff --git a/tests/00README.txt b/tests/00README.txt

new file mode 100644 (file)

index 0000000..67b8689
--- /dev/null
+++ b/tests/00README.txt
@@ -0,0 +1,32 @@
+File ex1.fa contains two sequences cut from the human genome
+build36. They were exatracted with command:
+
+  samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550
+
+Sequence names were changed manually for simplicity. File ex1.sam.gz
+contains MAQ alignments exatracted with:
+
+  (samtools view NA18507_maq.bam 2:2044001-2045500;
+   samtools view NA18507_maq.bam 20:68001-69500)
+
+and processed with `samtools fixmate' to make it self-consistent as a
+standalone alignment.
+
+To try samtools, you may run the following commands:
+
+  samtools faidx ex1.fa                 # index the reference FASTA
+  samtools import ex1.fa.fai ex1.sam.gz ex1.bam   # SAM->BAM
+  samtools index ex1.bam                # index BAM
+  samtools tview ex1.bam ex1.fa         # view alignment
+  samtools pileup -cf ex1.fa ex1.bam    # pileup and consensus
+  samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz
+
+In order for the script pysam_test.py to work, you will need pysam
+in your PYTHONPATH.
+
+In order for the script example.py to work, you will need pysam
+in your PYTHONPATH and run
+
+  make all
+
+beforehand.
diff --git a/tests/Makefile b/tests/Makefile

new file mode 100644 (file)

index 0000000..7de46f5
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,30 @@
+all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz ex2.sam.gz ex2.sam ex1.sam ex3.bam ex3.bam.bai ex4.bam ex4.bam.bai
+               @echo; echo \# You can now launch the viewer with: \'samtools tview ex1.bam ex1.fa\'; echo;
+
+ex2.sam.gz: ex1.bam ex1.bam.bai
+               samtools view -h ex1.bam | gzip > ex2.sam.gz
+
+ex3.bam: ex3.sam ex1.fa.fai
+       samtools import ex1.fa.fai ex3.sam ex3.bam
+
+ex4.bam: ex4.sam ex1.fa.fai
+       samtools import ex1.fa.fai ex4.sam ex4.bam
+
+%.sam: %.sam.gz
+       gunzip < $< > $@
+
+ex1.fa.fai:ex1.fa
+               samtools faidx ex1.fa
+ex1.bam:ex1.sam.gz ex1.fa.fai
+               samtools import ex1.fa.fai ex1.sam.gz ex1.bam
+%.bam.bai:%.bam
+               samtools index $<
+ex1.pileup.gz:ex1.bam ex1.fa
+               samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz
+ex1.glf:ex1.bam ex1.fa
+               samtools pileup -gf ex1.fa ex1.bam > ex1.glf
+ex1.glfview.gz:ex1.glf
+               samtools glfview ex1.glf | gzip > ex1.glfview.gz
+
+clean:
+               rm -fr *.bam *.bai *.glf* *.fai *.pileup* *~ calDepth *.dSYM pysam_*.sam ex2.sam ex2.sam.gz ex1.sam
diff --git a/tests/ex1.fa b/tests/ex1.fa

new file mode 100644 (file)

index 0000000..b4ed0cf
--- /dev/null
+++ b/tests/ex1.fa
@@ -0,0 +1,56 @@
+>chr1
+CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT
+GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC
+GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG
+TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC
+AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA
+CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC
+AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT
+CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA
+ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC
+AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC
+AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC
+ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC
+CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT
+TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT
+TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT
+GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT
+ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA
+ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG
+TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA
+CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG
+TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC
+TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC
+TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG
+TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG
+AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA
+TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC
+TCCCTCGTCTTCTTA
+>chr2
+TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG
+CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT
+TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT
+CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA
+AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT
+AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC
+ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG
+GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT
+CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT
+TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA
+AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA
+ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT
+TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA
+AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC
+TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA
+GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT
+AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA
+AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT
+AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT
+AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT
+ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT
+GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG
+CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA
+GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA
+AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA
+TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC
+CAGAAAAAAATATTTACAGTAACT
diff --git a/tests/ex1.sam.gz b/tests/ex1.sam.gz

new file mode 100644 (file)

index 0000000..8dd2bc4

Binary files /dev/null and b/tests/ex1.sam.gz differ
diff --git a/tests/ex3.sam b/tests/ex3.sam

new file mode 100644 (file)

index 0000000..801d13a
--- /dev/null
+++ b/tests/ex3.sam
@@ -0,0 +1,9 @@
+@HD    VN:1.0
+@SQ    SN:chr1 LN:1575
+@SQ    SN:chr2 LN:1584
+@RG    ID:L1   PU:SC_1_10      LB:SC_1 SM:NA12891
+@RG    ID:L2   PU:SC_2_12      LB:SC_2 SM:NA12891
+@CO    this is a comment
+@CO    this is another comment
+read_28833_29006_6945  99      chr1    33      20      10M1D25M        =       200     167     AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG     <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<     NM:i:1  RG:Z:L1
+read_28701_28881_323b  147     chr2    88      30      35M     =       500     412     ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA     <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<     MF:i:18 RG:Z:L2
diff --git a/tests/ex4.sam b/tests/ex4.sam

new file mode 100644 (file)

index 0000000..b2282b8
--- /dev/null
+++ b/tests/ex4.sam
@@ -0,0 +1,9 @@
+@HD    VN:1.0
+@SQ    SN:chr1 LN:100
+@SQ    SN:chr2 LN:100
+@RG    ID:L1   PU:SC_1_10      LB:SC_1 SM:NA12891
+@RG    ID:L2   PU:SC_2_12      LB:SC_2 SM:NA12891
+@CO    this is a comment
+@CO    this is another comment
+read_28833_29006_6945  99      chr1    21      20      10M1D25M        =       200     167     AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG     <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<     NM:i:1  RG:Z:L1
+read_28701_28881_323b  147     chr2    21      30      35M     =       500     412     ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA     <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<     MF:i:18 RG:Z:L2
diff --git a/tests/example.py b/tests/example.py

new file mode 100644 (file)

index 0000000..42e7347
--- /dev/null
+++ b/tests/example.py
@@ -0,0 +1,119 @@
+import pysam
+
+samfile = pysam.Samfile( "ex1.bam", "rb" )
+
+print "###################"
+# check different ways to iterate
+print len(list(samfile.fetch()))
+print len(list(samfile.fetch( "seq1", 10, 200 )))
+print len(list(samfile.fetch( region="seq1:10-200" )))
+print len(list(samfile.fetch( "seq1" )))
+print len(list(samfile.fetch( region="seq1")))
+print len(list(samfile.fetch( "seq2" )))
+print len(list(samfile.fetch( region="seq2")))
+print len(list(samfile.fetch()))
+print len(list(samfile.fetch( "seq1" )))
+print len(list(samfile.fetch( region="seq1")))
+print len(list(samfile.fetch()))
+
+print len(list(samfile.pileup( "seq1", 10, 200 )))
+print len(list(samfile.pileup( region="seq1:10-200" )))
+print len(list(samfile.pileup( "seq1" )))
+print len(list(samfile.pileup( region="seq1")))
+print len(list(samfile.pileup( "seq2" )))
+print len(list(samfile.pileup( region="seq2")))
+print len(list(samfile.pileup()))
+print len(list(samfile.pileup()))
+
+print "########### fetch with callback ################"
+def my_fetch_callback( alignment ): print str(alignment)
+samfile.fetch( region="seq1:10-200", callback=my_fetch_callback )
+
+print "########## pileup with callback ################"
+def my_pileup_callback( pileups ): print str(pileups)
+samfile.pileup( region="seq1:10-200", callback=my_pileup_callback )
+
+print "##########iterator row #################"
+iter = pysam.IteratorRow( samfile, "seq1:10-200")
+for x in iter: print str(x)
+
+print "##########iterator col #################"
+iter = pysam.IteratorColumn( samfile, "seq1:10-200")
+for x in iter: print str(x)
+
+print "#########row all##################"
+iter = pysam.IteratorRowAll( samfile )
+for x in iter: print str(x)
+
+
+print "###################"
+
+class Counter:
+    mCounts = 0
+    def __call__(self, alignment):
+        self.mCounts += 1
+
+c = Counter()
+samfile.fetch( "seq1:10-200", c )
+print "counts=", c.mCounts
+
+print samfile.getTarget( 0 )
+print samfile.getTarget( 1 )
+
+for p in pysam.pileup( "-c", "ex1.bam" ):
+    print str(p)
+
+print pysam.pileup.getMessages()
+
+for p in pysam.pileup( "-c", "ex1.bam", raw=True ):
+    print str(p),
+
+
+
+print "###########################"
+
+samfile = pysam.Samfile( "ex2.sam.gz", "r" )
+
+print "num targets=", samfile.getNumTargets()
+
+iter = pysam.IteratorRowAll( samfile )
+for x in iter: print str(x)
+
+samfile.close()
+
+print "###########################"
+samfile = pysam.Samfile( "ex2.sam.gz", "r" )
+def my_fetch_callback( alignment ):
+    print str(alignment)
+
+try:
+    samfile.fetch( "seq1:10-20", my_fetch_callback )
+except AssertionError:
+    print "caught fetch exception"
+
+samfile.close()
+
+print "###########################"
+samfile = pysam.Samfile( "ex2.sam.gz", "r" )
+def my_pileup_callback( pileups ):
+    print str(pileups)
+try:
+    samfile.pileup( "seq1:10-20", my_pileup_callback )
+except NotImplementedError:
+    print "caught pileup exception"
+
+# playing arount with headers
+samfile = pysam.Samfile( "ex3.sam", "r" )
+print samfile.targets
+print samfile.lengths
+print samfile.text
+print samdile.header
+header = samfile.header
+samfile.close()
+
+header["HD"]["SO"] = "unsorted"
+outfile = pysam.Samfile( "out.sam", "wh", 
+                         header = header )
+
+outfile.close()
+
diff --git a/tests/pysam_test.py b/tests/pysam_test.py

new file mode 100755 (executable)

index 0000000..2bbe8a3
--- /dev/null
+++ b/tests/pysam_test.py
@@ -0,0 +1,551 @@
+#!/usr/bin/env python
+'''unit testing code for pysam.'''
+
+import pysam
+import unittest
+import os
+import itertools
+import subprocess
+import shutil
+
+def checkBinaryEqual( filename1, filename2 ):
+    '''return true if the two files are binary equal.'''
+    if os.path.getsize( filename1 ) !=  os.path.getsize( filename2 ):
+        return False
+
+    infile1 = open(filename1, "rb")
+    infile2 = open(filename2, "rb")
+
+    def chariter( infile ):
+        while 1:
+            c = infile.read(1)
+            if c == "": break
+            yield c
+
+    found = False
+    for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ):
+        if c1 != c2: break
+    else:
+        found = True
+
+    infile1.close()
+    infile2.close()
+    return found
+
+def runSamtools( cmd ):
+    '''run a samtools command'''
+
+    try:
+        retcode = subprocess.call(cmd, shell=True)
+        if retcode < 0:
+            print >>sys.stderr, "Child was terminated by signal", -retcode
+    except OSError, e:
+        print >>sys.stderr, "Execution failed:", e
+
+class BinaryTest(unittest.TestCase):
+    '''test samtools command line commands and compare
+    against pysam commands.
+
+    Tests fail, if the output is not binary identical.
+    '''
+
+    first_time = True
+
+    # a list of commands to test
+    mCommands = \
+        { "faidx" : \
+        ( 
+            ("ex1.fa.fai", "samtools faidx ex1.fa"), 
+            ("pysam_ex1.fa.fai", (pysam.faidx, "ex1.fa") ),
+            ),
+          "import" :
+              (
+                ("ex1.bam", "samtools import ex1.fa.fai ex1.sam.gz ex1.bam" ),
+                ("pysam_ex1.bam", (pysam.samimport, "ex1.fa.fai ex1.sam.gz pysam_ex1.bam") ),
+                ),
+          "index":
+              (
+                ("ex1.bam.bai", "samtools index ex1.bam" ),
+                ("pysam_ex1.bam.bai", (pysam.index, "pysam_ex1.bam" ) ),
+                ),
+          "pileup1" :
+          (
+                ("ex1.pileup", "samtools pileup -cf ex1.fa ex1.bam > ex1.pileup" ),
+                ("pysam_ex1.pileup", (pysam.pileup, "-c -f ex1.fa ex1.bam" ) )
+                ),
+          "pileup2" :
+          (
+                ("ex1.glf", "samtools pileup -gf ex1.fa ex1.bam > ex1.glf" ),
+                ("pysam_ex1.glf", (pysam.pileup, "-g -f ex1.fa ex1.bam" ) )
+                ),
+          "glfview" :
+        (
+                ("ex1.glfview", "samtools glfview ex1.glf > ex1.glfview"),
+                ("pysam_ex1.glfview", (pysam.glfview, "ex1.glf" ) ),
+                ),
+          "view" :
+        (
+                ("ex1.view", "samtools view ex1.bam > ex1.view"),
+                ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),
+                ),
+        }
+
+    # some tests depend on others. The order specifies in which order
+    # the samtools commands are executed.
+    mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view' )
+
+    def setUp( self ):
+        '''setup tests. 
+
+        For setup, all commands will be run before the first test is
+        executed. Individual tests will then just compare the output
+        files.
+        '''
+
+        if BinaryTest.first_time:
+            # copy the source 
+            shutil.copy( "ex1.fa", "pysam_ex1.fa" )
+
+            for label in self.mOrder:
+                command = self.mCommands[label]
+                samtools_target, samtools_command = command[0]
+                pysam_target, pysam_command = command[1]
+                runSamtools( samtools_command )
+                pysam_method, pysam_options = pysam_command
+                output = pysam_method( *pysam_options.split(" "), raw=True)
+                if ">" in samtools_command:
+                    outfile = open( pysam_target, "w" )
+                    for line in output: outfile.write( line )
+                    outfile.close()
+
+            BinaryTest.first_time = False
+
+    def checkCommand( self, command ):
+        if command:
+            samtools_target, pysam_target = self.mCommands[command][0][0], self.mCommands[command][1][0]
+            self.assertTrue( checkBinaryEqual( samtools_target, pysam_target ), 
+                             "%s failed: files %s and %s are not the same" % (command, samtools_target, pysam_target) )
+
+    def testImport( self ):
+        self.checkCommand( "import" )
+
+    def testIndex( self ):
+        self.checkCommand( "index" )
+        
+    def testPileup1( self ):
+        self.checkCommand( "pileup1" )
+    
+    def testPileup2( self ):
+        self.checkCommand( "pileup2" )
+
+    def testGLFView( self ):
+        self.checkCommand( "glfview" )
+
+    def testView( self ):
+        self.checkCommand( "view" )
+
+    def __del__(self):
+
+        for label, command in self.mCommands.iteritems():
+            samtools_target, samtools_command = command[0]
+            pysam_target, pysam_command = command[1]
+            if os.path.exists( samtools_target): os.remove( samtools_target )
+            if os.path.exists( pysam_target): os.remove( pysam_target )
+        if os.path.exists( "pysam_ex1.fa" ): os.remove( "pysam_ex1.fa" )
+
+class IOTest(unittest.TestCase):
+    '''check if reading samfile and writing a samfile are consistent.'''
+
+    def checkEcho( self, input_filename, reference_filename, 
+                   output_filename, 
+                   input_mode, output_mode, use_template = True):
+        '''iterate through *input_filename* writing to *output_filename* and
+        comparing the output to *reference_filename*. 
+        
+        The files are opened according to the *input_mode* and *output_mode*.
+
+        If *use_template* is set, the header is copied from infile using the
+        template mechanism, otherwise target names and lengths are passed explicitely. 
+        '''
+
+        infile = pysam.Samfile( input_filename, input_mode )
+        if use_template:
+            outfile = pysam.Samfile( output_filename, output_mode, template = infile )
+        else:
+            outfile = pysam.Samfile( output_filename, output_mode, 
+                                     referencenames = infile.references,
+                                     referencelengths = infile.lengths )
+
+        iter = infile.fetch()
+        for x in iter: outfile.write( x )
+        infile.close()
+        outfile.close()
+
+        self.assertTrue( checkBinaryEqual( reference_filename, output_filename), 
+                         "files %s and %s are not the same" % (reference_filename, output_filename) )
+
+    def testReadWriteBam( self ):
+        
+        input_filename = "ex1.bam"
+        output_filename = "pysam_ex1.bam"
+        reference_filename = "ex1.bam"
+
+        self.checkEcho( input_filename, reference_filename, output_filename,
+                        "rb", "wb" )
+
+    def testReadWriteBamWithTargetNames( self ):
+        
+        input_filename = "ex1.bam"
+        output_filename = "pysam_ex1.bam"
+        reference_filename = "ex1.bam"
+
+        self.checkEcho( input_filename, reference_filename, output_filename,
+                        "rb", "wb", use_template = False )
+
+    def testReadWriteSamWithHeader( self ):
+        
+        input_filename = "ex2.sam"
+        output_filename = "pysam_ex2.sam"
+        reference_filename = "ex2.sam"
+
+        self.checkEcho( input_filename, reference_filename, output_filename,
+                        "r", "wh" )
+
+    def testReadWriteSamWithoutHeader( self ):
+        
+        input_filename = "ex2.sam"
+        output_filename = "pysam_ex2.sam"
+        reference_filename = "ex1.sam"
+
+        self.checkEcho( input_filename, reference_filename, output_filename,
+                        "r", "w" )
+
+class TestIteratorRow(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex1.bam","rb" )
+
+    def checkRange( self, rnge ):
+        '''compare results from iterator with those from samtools.'''
+        ps = list(self.samfile.fetch(region=rnge))
+        sa = list(pysam.view( "ex1.bam", rnge , raw = True) )
+        self.assertEqual( len(ps), len(sa), "unequal number of results for range %s: %i != %i" % (rnge, len(ps), len(sa) ))
+        # check if the same reads are returned and in the same order
+        for line, pair in enumerate( zip( ps, sa ) ):
+            data = pair[1].split("\t")
+            self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) )
+
+    def testIteratePerContig(self):
+        '''check random access per contig'''
+        for contig in self.samfile.references:
+            self.checkRange( contig )
+
+    def testIterateRanges(self):
+        '''check random access per range'''
+        for contig, length in zip(self.samfile.references, self.samfile.lengths):
+            for start in range( 1, length, 90):
+                self.checkRange( "%s:%i-%i" % (contig, start, start + 90) ) # this includes empty ranges
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestIteratorRowAll(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex1.bam","rb" )
+
+    def testIterate(self):
+        '''compare results from iterator with those from samtools.'''
+        ps = list(self.samfile.fetch())
+        sa = list(pysam.view( "ex1.bam", raw = True) )
+        self.assertEqual( len(ps), len(sa), "unequal number of results: %i != %i" % (len(ps), len(sa) ))
+        # check if the same reads are returned
+        for line, pair in enumerate( zip( ps, sa ) ):
+            data = pair[1].split("\t")
+            self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) )
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestIteratorColumn(unittest.TestCase):
+    '''test iterator column against contents of ex3.bam.'''
+    
+    # note that samfile contains 1-based coordinates
+    # 1D means deletion with respect to reference sequence
+    # 
+    mCoverages = { 'chr1' : [ 0 ] * 20 + [1] * 36 + [0] * (100 - 20 -35 ),
+                   'chr2' : [ 0 ] * 20 + [1] * 35 + [0] * (100 - 20 -35 ),
+                   }
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex4.bam","rb" )
+
+    def checkRange( self, rnge ):
+        '''compare results from iterator with those from samtools.'''
+        # check if the same reads are returned and in the same order
+        for column in self.samfile.pileup(region=rnge):
+            thiscov = len(column.pileups)
+            refcov = self.mCoverages[self.samfile.getrname(column.tid)][column.pos]
+            self.assertEqual( thiscov, refcov, "wrong coverage at pos %s:%i %i should be %i" % (self.samfile.getrname(column.tid), column.pos, thiscov, refcov))
+
+    def testIterateAll(self):
+        '''check random access per contig'''
+        self.checkRange( None )
+
+    def testIteratePerContig(self):
+        '''check random access per contig'''
+        for contig in self.samfile.references:
+            self.checkRange( contig )
+
+    def testIterateRanges(self):
+        '''check random access per range'''
+        for contig, length in zip(self.samfile.references, self.samfile.lengths):
+            for start in range( 1, length, 90):
+                self.checkRange( "%s:%i-%i" % (contig, start, start + 90) ) # this includes empty ranges
+
+    def testInverse( self ):
+        '''test the inverse, is point-wise pileup accurate.'''
+        for contig, refseq in self.mCoverages.items():
+            refcolumns = sum(refseq)
+            for pos, refcov in enumerate( refseq ):
+                columns = list(self.samfile.pileup( contig, pos, pos+1) )
+                if refcov == 0:
+                    # if no read, no coverage
+                    self.assertEqual( len(columns), refcov, "wrong number of pileup columns returned for position %s:%i, %i should be %i" %(contig,pos,len(columns), refcov) )
+                elif refcov == 1:
+                    # one read, all columns of the read are returned
+                    self.assertEqual( len(columns), refcolumns, "pileup incomplete - %i should be %i " % (len(columns), refcolumns))
+                    
+    def tearDown(self):
+        self.samfile.close()
+
+    
+class TestAlignedReadBam(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex3.bam","rb" )
+        self.reads=list(self.samfile.fetch())
+
+    def testARqname(self):
+        self.assertEqual( self.reads[0].qname, "read_28833_29006_6945", "read name mismatch in read 1: %s != %s" % (self.reads[0].qname, "read_28833_29006_6945") )
+        self.assertEqual( self.reads[1].qname, "read_28701_28881_323b", "read name mismatch in read 2: %s != %s" % (self.reads[1].qname, "read_28701_28881_323b") )
+
+    def testARflag(self):
+        self.assertEqual( self.reads[0].flag, 99, "flag mismatch in read 1: %s != %s" % (self.reads[0].flag, 99) )
+        self.assertEqual( self.reads[1].flag, 147, "flag mismatch in read 2: %s != %s" % (self.reads[1].flag, 147) )
+
+    def testARrname(self):
+        self.assertEqual( self.reads[0].rname, 0, "chromosome/target id mismatch in read 1: %s != %s" % (self.reads[0].rname, 0) )
+        self.assertEqual( self.reads[1].rname, 1, "chromosome/target id mismatch in read 2: %s != %s" % (self.reads[1].rname, 1) )
+
+    def testARpos(self):
+        self.assertEqual( self.reads[0].pos, 33-1, "mapping position mismatch in read 1: %s != %s" % (self.reads[0].pos, 33-1) )
+        self.assertEqual( self.reads[1].pos, 88-1, "mapping position mismatch in read 2: %s != %s" % (self.reads[1].pos, 88-1) )
+
+    def testARmapq(self):
+        self.assertEqual( self.reads[0].mapq, 20, "mapping quality mismatch in read 1: %s != %s" % (self.reads[0].mapq, 20) )
+        self.assertEqual( self.reads[1].mapq, 30, "mapping quality mismatch in read 2: %s != %s" % (self.reads[1].mapq, 30) )
+
+    def testARcigar(self):
+        self.assertEqual( self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)], "read name length mismatch in read 1: %s != %s" % (self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)]) )
+        self.assertEqual( self.reads[1].cigar, [(0, 35)], "read name length mismatch in read 2: %s != %s" % (self.reads[1].cigar, [(0, 35)]) )
+
+    def testARmrnm(self):
+        self.assertEqual( self.reads[0].mrnm, 0, "mate reference sequence name mismatch in read 1: %s != %s" % (self.reads[0].mrnm, 0) )
+        self.assertEqual( self.reads[1].mrnm, 1, "mate reference sequence name mismatch in read 2: %s != %s" % (self.reads[1].mrnm, 1) )
+
+    def testARmpos(self):
+        self.assertEqual( self.reads[0].mpos, 200-1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].mpos, 200-1) )
+        self.assertEqual( self.reads[1].mpos, 500-1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].mpos, 500-1) )
+
+    def testARisize(self):
+        self.assertEqual( self.reads[0].isize, 167, "insert size mismatch in read 1: %s != %s" % (self.reads[0].isize, 167) )
+        self.assertEqual( self.reads[1].isize, 412, "insert size mismatch in read 2: %s != %s" % (self.reads[1].isize, 412) )
+
+    def testARseq(self):
+        self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") )
+        self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") )
+
+    def testARqual(self):
+        self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") )
+        self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") )
+
+    def testPresentOptionalFields(self):
+        self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) )
+        self.assertEqual( self.reads[0].opt('RG'), 'L1', "optional field mismatch in read 1, RG: %s != %s" % (self.reads[0].opt('RG'), 'L1') )
+        self.assertEqual( self.reads[1].opt('RG'), 'L2', "optional field mismatch in read 2, RG: %s != %s" % (self.reads[1].opt('RG'), 'L2') )
+        self.assertEqual( self.reads[1].opt('MF'), 18, "optional field mismatch in read 2, MF: %s != %s" % (self.reads[1].opt('MF'), 18) )
+
+    def testPairedBools(self):
+        self.assertEqual( self.reads[0].is_paired, True, "is paired mismatch in read 1: %s != %s" % (self.reads[0].is_paired, True) )
+        self.assertEqual( self.reads[1].is_paired, True, "is paired mismatch in read 2: %s != %s" % (self.reads[1].is_paired, True) )
+        self.assertEqual( self.reads[0].is_proper_pair, True, "is proper pair mismatch in read 1: %s != %s" % (self.reads[0].is_proper_pair, True) )
+        self.assertEqual( self.reads[1].is_proper_pair, True, "is proper pair mismatch in read 2: %s != %s" % (self.reads[1].is_proper_pair, True) )
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestAlignedReadSam(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex3.sam","r" )
+        self.reads=list(self.samfile.fetch())
+
+    def testARqname(self):
+        self.assertEqual( self.reads[0].qname, "read_28833_29006_6945", "read name mismatch in read 1: %s != %s" % (self.reads[0].qname, "read_28833_29006_6945") )
+        self.assertEqual( self.reads[1].qname, "read_28701_28881_323b", "read name mismatch in read 2: %s != %s" % (self.reads[1].qname, "read_28701_28881_323b") )
+
+    def testARflag(self):
+        self.assertEqual( self.reads[0].flag, 99, "flag mismatch in read 1: %s != %s" % (self.reads[0].flag, 99) )
+        self.assertEqual( self.reads[1].flag, 147, "flag mismatch in read 2: %s != %s" % (self.reads[1].flag, 147) )
+
+    def testARrname(self):
+        self.assertEqual( self.reads[0].rname, 0, "chromosome/target id mismatch in read 1: %s != %s" % (self.reads[0].rname, 0) )
+        self.assertEqual( self.reads[1].rname, 1, "chromosome/target id mismatch in read 2: %s != %s" % (self.reads[1].rname, 1) )
+
+    def testARpos(self):
+        self.assertEqual( self.reads[0].pos, 33-1, "mapping position mismatch in read 1: %s != %s" % (self.reads[0].pos, 33-1) )
+        self.assertEqual( self.reads[1].pos, 88-1, "mapping position mismatch in read 2: %s != %s" % (self.reads[1].pos, 88-1) )
+
+    def testARmapq(self):
+        self.assertEqual( self.reads[0].mapq, 20, "mapping quality mismatch in read 1: %s != %s" % (self.reads[0].mapq, 20) )
+        self.assertEqual( self.reads[1].mapq, 30, "mapping quality mismatch in read 2: %s != %s" % (self.reads[1].mapq, 30) )
+
+    def testARcigar(self):
+        self.assertEqual( self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)], "read name length mismatch in read 1: %s != %s" % (self.reads[0].cigar, [(0, 10), (2, 1), (0, 25)]) )
+        self.assertEqual( self.reads[1].cigar, [(0, 35)], "read name length mismatch in read 2: %s != %s" % (self.reads[1].cigar, [(0, 35)]) )
+
+    def testARmrnm(self):
+        self.assertEqual( self.reads[0].mrnm, 0, "mate reference sequence name mismatch in read 1: %s != %s" % (self.reads[0].mrnm, 0) )
+        self.assertEqual( self.reads[1].mrnm, 1, "mate reference sequence name mismatch in read 2: %s != %s" % (self.reads[1].mrnm, 1) )
+
+    def testARmpos(self):
+        self.assertEqual( self.reads[0].mpos, 200-1, "mate mapping position mismatch in read 1: %s != %s" % (self.reads[0].mpos, 200-1) )
+        self.assertEqual( self.reads[1].mpos, 500-1, "mate mapping position mismatch in read 2: %s != %s" % (self.reads[1].mpos, 500-1) )
+
+    def testARisize(self):
+        self.assertEqual( self.reads[0].isize, 167, "insert size mismatch in read 1: %s != %s" % (self.reads[0].isize, 167) )
+        self.assertEqual( self.reads[1].isize, 412, "insert size mismatch in read 2: %s != %s" % (self.reads[1].isize, 412) )
+
+    def testARseq(self):
+        self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") )
+        self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") )
+
+    def testARqual(self):
+        self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") )
+        self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") )
+
+    def testPresentOptionalFields(self):
+        self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) )
+        self.assertEqual( self.reads[0].opt('RG'), 'L1', "optional field mismatch in read 1, RG: %s != %s" % (self.reads[0].opt('RG'), 'L1') )
+        self.assertEqual( self.reads[1].opt('RG'), 'L2', "optional field mismatch in read 2, RG: %s != %s" % (self.reads[1].opt('RG'), 'L2') )
+        self.assertEqual( self.reads[1].opt('MF'), 18, "optional field mismatch in read 2, MF: %s != %s" % (self.reads[1].opt('MF'), 18) )
+
+    def testPairedBools(self):
+        self.assertEqual( self.reads[0].is_paired, True, "is paired mismatch in read 1: %s != %s" % (self.reads[0].is_paired, True) )
+        self.assertEqual( self.reads[1].is_paired, True, "is paired mismatch in read 2: %s != %s" % (self.reads[1].is_paired, True) )
+        self.assertEqual( self.reads[0].is_proper_pair, True, "is proper pair mismatch in read 1: %s != %s" % (self.reads[0].is_proper_pair, True) )
+        self.assertEqual( self.reads[1].is_proper_pair, True, "is proper pair mismatch in read 2: %s != %s" % (self.reads[1].is_proper_pair, True) )
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestHeaderSam(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex3.sam","r" )
+
+    def testHeaders(self):
+        self.assertEqual( self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}, "mismatch in headers: %s != %s" % (self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}) )
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestHeaderBam(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex3.sam","r" )
+
+    def testHeaders(self):
+        self.assertEqual( self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}, "mismatch in headers: %s != %s" % (self.samfile.header, {'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'RG': [{'LB': 'SC_1', 'ID': 'L1', 'SM': 'NA12891', 'PU': 'SC_1_10'}, {'LB': 'SC_2', 'ID': 'L2', 'SM': 'NA12891', 'PU': 'SC_2_12'}], 'CO': ['this is a comment', 'this is another comment'], 'HD': {'VN': '1.0'}}) )
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestPileupObjects(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex1.bam","rb" )
+
+    def testPileupColumn(self):
+        for pcolumn1 in self.samfile.pileup( region="chr1:105" ):
+            if pcolumn1.pos == 104:
+                self.assertEqual( pcolumn1.tid, 0, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn1.tid, 0) )
+                self.assertEqual( pcolumn1.pos, 105-1, "position mismatch in position 1: %s != %s" % (pcolumn1.pos, 105-1) )
+                self.assertEqual( pcolumn1.n, 2, "# reads mismatch in position 1: %s != %s" % (pcolumn1.n, 2) )
+        for pcolumn2 in self.samfile.pileup( region="chr2:1480" ):
+            if pcolumn2.pos == 1479:
+                self.assertEqual( pcolumn2.tid, 1, "chromosome/target id mismatch in position 1: %s != %s" % (pcolumn2.tid, 1) )
+                self.assertEqual( pcolumn2.pos, 1480-1, "position mismatch in position 1: %s != %s" % (pcolumn2.pos, 1480-1) )
+                self.assertEqual( pcolumn2.n, 12, "# reads mismatch in position 1: %s != %s" % (pcolumn2.n, 12) )
+
+    def testPileupRead(self):
+        for pcolumn1 in self.samfile.pileup( region="chr1:105" ):
+            if pcolumn1.pos == 104:
+                self.assertEqual( len(pcolumn1.pileups), 2, "# reads aligned to column mismatch in position 1: %s != %s" % (len(pcolumn1.pileups), 2) )
+#                self.assertEqual( pcolumn1.pileups[0]  # need to test additional properties here
+
+    def tearDown(self):
+        self.samfile.close()
+
+class TestExceptions(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex1.bam","rb" )
+
+    def testBadFile(self):
+        self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.bam", "rb" )
+        self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.sam","r" )
+        self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.bam", "r" )
+        self.assertRaises( IOError, pysam.Samfile, "exdoesntexist.sam","rb" )
+
+    def testBadContig(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr88" )
+
+    def testMeaninglessCrap(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "skljf" )
+
+    def testBackwardsOrderNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, 'chr1', 100, 10 )
+
+    def testBackwardsOrderOldFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, region="chr1:100-10")
+        
+    def testOutOfRangeNegativeNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 )
+
+    def testOutOfRangeNegativeOldFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5-10" )
+        self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5-0" )
+        self.assertRaises( ValueError, self.samfile.fetch, region="chr1:-5--10" )
+
+    def testOutOfRangNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 9999999999, 99999999999 )
+
+    def testOutOfRangeLargeNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 9999999999999999999999999999999, 9999999999999999999999999999999999999999 )
+
+    def testOutOfRangeLargeOldFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" )
+
+    def tearDown(self):
+        self.samfile.close()
+
+# TODOS
+# 1. finish testing all properties within pileup objects
+# 2. check exceptions and bad input problems (missing files, optional fields that aren't present, etc...)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/segfault_tests.py b/tests/segfault_tests.py

new file mode 100755 (executable)

index 0000000..ff32fec
--- /dev/null
+++ b/tests/segfault_tests.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+'''unit testing code for pysam.'''
+
+import pysam
+import unittest
+import os
+import itertools
+import subprocess
+import shutil
+
+class TestExceptions(unittest.TestCase):
+
+    def setUp(self):
+        self.samfile=pysam.Samfile( "ex1.bam","rb" )
+
+    def testOutOfRangeNegativeNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 )
+
+    def testOutOfRangeNegativeOldFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-10" )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-0" )
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5--10" )
+
+    def testOutOfRangeLargeNewFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1", 99999999999999999, 999999999999999999 )
+
+    def testOutOfRangeLargeOldFormat(self):
+        self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" )
+
+    def tearDown(self):
+        self.samfile.close()
+
+if __name__ == "__main__":
+    unittest.main()
+
author	Charles Plessy <plessy@debian.org>
	Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)
committer	Charles Plessy <plessy@debian.org>
	Thu, 13 Jan 2011 09:09:25 +0000 (18:09 +0900)
MANIFEST.in		patch \| blob \| history
PKG-INFO		patch \| blob \| history
pysam/csamtools.pyx		patch \| blob \| history
pysam/pysam_util.c		patch \| blob \| history
setup.py		patch \| blob \| history
tests/00README.txt	[new file with mode: 0644]	patch \| blob
tests/Makefile	[new file with mode: 0644]	patch \| blob
tests/ex1.fa	[new file with mode: 0644]	patch \| blob
tests/ex1.sam.gz	[new file with mode: 0644]	patch \| blob
tests/ex3.sam	[new file with mode: 0644]	patch \| blob
tests/ex4.sam	[new file with mode: 0644]	patch \| blob
tests/example.py	[new file with mode: 0644]	patch \| blob
tests/pysam_test.py	[new file with mode: 0755]	patch \| blob
tests/segfault_tests.py	[new file with mode: 0755]	patch \| blob