Fix indexing errors in KaplanMeier
authorDebian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>
Sat, 2 Mar 2019 14:59:35 +0000 (14:59 +0000)
committerRebecca N. Palmer <rebecca_palmer@zoho.com>
Sat, 2 Mar 2019 14:59:35 +0000 (14:59 +0000)
Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
Bug-Debian: https://bugs.debian.org/860449
Forwarded: not-needed (affected code no longer in upstream)

Gbp-Pq: Name kaplan_meier_860449.patch

statsmodels/sandbox/survival2.py

index bbba55503b0f916091ad183467e54bd6d8f8e2d4..bba80bedac88841ce82e728c1a0db5de86aa096a 100644 (file)
@@ -66,7 +66,7 @@ class KaplanMeier(object):
         >>> import numpy as np
         >>> from statsmodels.sandbox.survival2 import KaplanMeier
         >>> dta = sm.datasets.strikes.load()
-        >>> dta = dta.values()[-1]
+        >>> dta = dta['raw_data']
         >>> dta[range(5),:]
         array([[  7.00000000e+00,   1.13800000e-02],
                [  9.00000000e+00,   1.13800000e-02],
@@ -246,7 +246,7 @@ class KaplanMeier(object):
         if self.censoring == None:
             events = np.bincount(t)
             t = np.unique(t)
-            events = events[:,list(t)]
+            events = events[list(t)]
             events = events.astype(float)
             eventsSum = np.cumsum(events)
             eventsSum = np.r_[0,eventsSum]
@@ -257,11 +257,11 @@ class KaplanMeier(object):
             events = np.bincount(t,censoring)
             censored = np.bincount(t,reverseCensoring)
             t = np.unique(t)
-            censored = censored[:,list(t)]
+            censored = censored[list(t)]
             censored = censored.astype(float)
             censoredSum = np.cumsum(censored)
             censoredSum = np.r_[0,censoredSum]
-            events = events[:,list(t)]
+            events = events[list(t)]
             events = events.astype(float)
             eventsSum = np.cumsum(events)
             eventsSum = np.r_[0,eventsSum]
@@ -349,7 +349,7 @@ class KaplanMeier(object):
         >>> import numpy as np
         >>> from statsmodels.sandbox.survival2 import KaplanMeier
         >>> dta = sm.datasets.strikes.load()
-        >>> dta = dta.values()[-1]
+        >>> dta = dta['raw_data']
         >>> censoring = np.ones_like(dta[:,0])
         >>> censoring[dta[:,0] > 80] = 0
         >>> dta = np.c_[dta,censoring]
@@ -418,8 +418,8 @@ class KaplanMeier(object):
                     if np.max(tind) != len(dk):
                         dif = np.max(tind) - len(dk) + 1
                         dk = np.r_[dk,[0]*dif]
-                    dk = dk[:,list(tind)]
-                    d = d[:,list(tind)]
+                    dk = dk[list(tind)]
+                    d = d[list(tind)]
                     dk = dk.astype(float)
                     d = d.astype(float)
                     dkSum = np.cumsum(dk)
@@ -431,8 +431,9 @@ class KaplanMeier(object):
                     d = d[n>1]
                     dk = dk[n>1]
                     nk = nk[n>1]
+                    if s.shape == tind.shape: # s is calculated once, so only filter it once, not per-group
+                        s = s[n>1]
                     n = n[n>1]
-                    s = s[n>1]
                     ek = (nk * d)/(n)
                     Z.append(np.sum(s * (dk - ek)))
                     NK.append(nk)
@@ -452,9 +453,9 @@ class KaplanMeier(object):
                         dif = np.max(tind) - len(dk) + 1
                         dk = np.r_[dk,[0]*dif]
                         ck = np.r_[ck,[0]*dif]
-                    dk = dk[:,list(tind)]
-                    ck = ck[:,list(tind)]
-                    d = d[:,list(tind)]
+                    dk = dk[list(tind)]
+                    ck = ck[list(tind)]
+                    d = d[list(tind)]
                     dk = dk.astype(float)
                     d = d.astype(float)
                     ck = ck.astype(float)
@@ -464,7 +465,7 @@ class KaplanMeier(object):
                     ck = np.r_[0,ck]
                     dkSum = np.r_[0,dkSum]
                     dSum = np.r_[0,dSum]
-                    censored = censored[:,list(tind)]
+                    censored = censored[list(tind)]
                     censored = censored.astype(float)
                     censoredSum = np.cumsum(censored)
                     censoredSum = np.r_[0,censoredSum]
@@ -474,8 +475,9 @@ class KaplanMeier(object):
                     d = d[n>1]
                     dk = dk[n>1]
                     nk = nk[n>1]
+                    if s.shape == tind.shape: # s is calculated once, so only filter it once, not per-group
+                        s = s[n>1]
                     n = n[n>1]
-                    s = s[n>1]
                     ek = (nk * d)/(n)
                     Z.append(np.sum(s * (dk - ek)))
                     NK.append(nk)