Use cached datasets in tests and examples

author Debian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>

Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)

committer Rebecca N. Palmer <rebecca_palmer@zoho.com>

Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)
author Debian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>
Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)
committer Rebecca N. Palmer <rebecca_palmer@zoho.com>
Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)
diff --git a/docs/source/contingency_tables.rst b/docs/source/contingency_tables.rst

index 19ba2d828d8c22f7841b76cb3b9b9c6a84ba4289..10ee1322fae72ba4482536d3af0970d2289c2716 100644 (file)
--- a/docs/source/contingency_tables.rst
+++ b/docs/source/contingency_tables.rst
@@ -49,7 +49,7 @@ contingency table cell counts:
      import pandas as pd
      import statsmodels.api as sm
  
-    df = sm.datasets.get_rdataset("Arthritis", "vcd").data
+    df = sm.datasets.get_rdataset("Arthritis", "vcd", cache=True).data
  
      tab = pd.crosstab(df['Treatment'], df['Improved'])
      tab = tab.loc[:, ["None", "Some", "Marked"]]
@@ -184,7 +184,7 @@ contingency table.
  
  .. ipython:: python
  
-    df = sm.datasets.get_rdataset("VisualAcuity", "vcd").data
+    df = sm.datasets.get_rdataset("VisualAcuity", "vcd", cache=True).data
      df = df.loc[df.gender == "female", :]
      tab = df.set_index(['left', 'right'])
      del tab["gender"]
diff --git a/docs/source/datasets/index.rst b/docs/source/datasets/index.rst

index b088486995c51ddbeeea7f9ffd5ca224556a9ec5..1ea669c10f63f4a71d57c4a3bc6a7d3d6e80f2ab 100644 (file)
--- a/docs/source/datasets/index.rst
+++ b/docs/source/datasets/index.rst
@@ -30,7 +30,7 @@ The `Rdatasets project <https://vincentarelbundock.github.io/Rdatasets/>`__ give
  .. ipython:: python
  
     import statsmodels.api as sm
-   duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData")
+   duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData", cache=True)
     print(duncan_prestige.__doc__)
     duncan_prestige.data.head(5)
  
diff --git a/docs/source/duration.rst b/docs/source/duration.rst

index 08d229c22f1cffa97dc837a72a291c07b8fc8473..dfbf15ff5e9651bac7bb677026910986afda03e3 100644 (file)
--- a/docs/source/duration.rst
+++ b/docs/source/duration.rst
@@ -41,7 +41,7 @@ We fit the survival distribution only for the female subjects.
  
     import statsmodels.api as sm
  
-   data = sm.datasets.get_rdataset("flchain", "survival").data
+   data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
     df = data.loc[data.sex == "F", :]
     sf = sm.SurvfuncRight(df["futime"], df["death"])
  
@@ -152,7 +152,7 @@ depending on the value of the covariates.
     import statsmodels.api as sm
     import statsmodels.formula.api as smf
  
-   data = sm.datasets.get_rdataset("flchain", "survival").data
+   data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
     del data["chapter"]
     data = data.dropna()
     data["lam"] = data["lambda"]
diff --git a/docs/source/example_formulas.rst b/docs/source/example_formulas.rst

index a10b3488422e67160a845f73bb2c57ac5089b51a..a9b1439ede99627199d998a88b6c677b49baad16 100644 (file)
--- a/docs/source/example_formulas.rst
+++ b/docs/source/example_formulas.rst
@@ -47,7 +47,7 @@ and list-wise delete to remove missing observations:
  
  .. ipython:: python
  
-    df = sm.datasets.get_rdataset("Guerry", "HistData").data
+    df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
      df = df[['Lottery', 'Literacy', 'Wealth', 'Region']].dropna()
      df.head()
  
diff --git a/docs/source/gee.rst b/docs/source/gee.rst

index 28c5ee1053d4baf27b2b8a3c678aa76569714f9c..1349b887d74e32ff89b2b06b3d0bdd63b52cb8d5 100644 (file)
--- a/docs/source/gee.rst
+++ b/docs/source/gee.rst
@@ -24,7 +24,7 @@ within clusters using data on epilepsy seizures.
      import statsmodels.api as sm
      import statsmodels.formula.api as smf
  
-    data = sm.datasets.get_rdataset('epil', package='MASS').data
+    data = sm.datasets.get_rdataset('epil', package='MASS', cache=True).data
  
      fam = sm.families.Poisson()
      ind = sm.cov_struct.Exchangeable()
diff --git a/docs/source/gettingstarted.rst b/docs/source/gettingstarted.rst

index 8bf386341c6f41c605562682da6becaf841bfeb6..5cef84d755dd80239b44ff3a7fac556d42996d74 100644 (file)
--- a/docs/source/gettingstarted.rst
+++ b/docs/source/gettingstarted.rst
@@ -50,7 +50,7 @@ We could download the file locally and then load it using ``read_csv``, but
  
  .. ipython:: python
  
-    df = sm.datasets.get_rdataset("Guerry", "HistData").data
+    df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
  
  The `Input/Output doc page <iolib.html>`_ shows how to import from various
  other formats.
diff --git a/docs/source/index.rst b/docs/source/index.rst

index 5742a28842237a6c8e318a6cfdfdbd1d424a14ca..e48cb348179cfee18c818cd3e3a66a3995b8e282 100644 (file)
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -23,7 +23,7 @@ Here is a simple example using ordinary least squares:
      import statsmodels.formula.api as smf
  
      # Load data
-    dat = sm.datasets.get_rdataset("Guerry", "HistData").data
+    dat = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
  
      # Fit regression model (using the natural log of one of the regressors)
      results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
diff --git a/docs/source/mixed_linear.rst b/docs/source/mixed_linear.rst

index 50e1bef2cdc1be2e43cfcba4b17d741c65c661a8..397297360305900b36baa97f7de4b149ba75c40b 100644 (file)
--- a/docs/source/mixed_linear.rst
+++ b/docs/source/mixed_linear.rst
@@ -83,7 +83,7 @@ Examples
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
  
-  data = sm.datasets.get_rdataset("dietox", "geepack").data
+  data = sm.datasets.get_rdataset("dietox", "geepack", cache=True).data
  
    md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])
    mdf = md.fit()
diff --git a/docs/source/release/version0.6.rst b/docs/source/release/version0.6.rst

index 53888e9e9cdf4d2c391b26ce7e433a35da354c52..90a77fd3a54647a7b0c2af767d4f2f9035eaffdf 100644 (file)
--- a/docs/source/release/version0.6.rst
+++ b/docs/source/release/version0.6.rst
@@ -41,7 +41,7 @@ covariates.
     import statsmodels.api as sm
     import statsmodels.formula.api as smf
  
-   data = sm.datasets.get_rdataset("epil", "MASS").data
+   data = sm.datasets.get_rdataset("epil", "MASS", cache=True).data
  
     md = smf.gee("y ~ age + trt + base", "subject", data,
                  cov_struct=sm.cov_struct.Independence(), 
diff --git a/examples/notebooks/markov_regression.ipynb b/examples/notebooks/markov_regression.ipynb

index 6e93771e34617e7523a3ca9f32f16899a652d196..74064df2fd571a6bd7e7e4930273799e6767528f 100644 (file)
--- a/examples/notebooks/markov_regression.ipynb
+++ b/examples/notebooks/markov_regression.ipynb
@@ -30,14 +30,7 @@
      "import pandas as pd\n",
      "import statsmodels.api as sm\n",
      "import matplotlib.pyplot as plt\n",
-    "\n",
-    "# NBER recessions\n",
-    "from pandas_datareader.data import DataReader\n",
-    "from datetime import datetime\n",
-    "\n",
-    "usrec = DataReader(\n",
-    "    \"USREC\", \"fred\", start=datetime(1947, 1, 1), end=datetime(2013, 4, 1)\n",
-    ")"
+    "from datetime import datetime\n"
     ]
    },
    {
diff --git a/examples/notebooks/mixed_lm_example.ipynb b/examples/notebooks/mixed_lm_example.ipynb

index cab6f67210d85cff4854900f2dc08e100f04d039..d1f11b1afae96b1e33e06dbc88d236f503526861 100644 (file)
--- a/examples/notebooks/mixed_lm_example.ipynb
+++ b/examples/notebooks/mixed_lm_example.ipynb
@@ -86,7 +86,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "data = sm.datasets.get_rdataset(\"dietox\", \"geepack\").data\n",
+    "data = sm.datasets.get_rdataset(\"dietox\", \"geepack\", cache=True).data\n",
      "md = smf.mixedlm(\"Weight ~ Time\", data, groups=data[\"Pig\"])\n",
      "mdf = md.fit(method=[\"lbfgs\"])\n",
      "print(mdf.summary())"
@@ -318,7 +318,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "data = sm.datasets.get_rdataset(\"Sitka\", \"MASS\").data\n",
+    "data = sm.datasets.get_rdataset(\"Sitka\", \"MASS\", cache=True).data\n",
      "endog = data[\"size\"]\n",
      "data[\"Intercept\"] = 1\n",
      "exog = data[[\"Intercept\", \"Time\"]]"
diff --git a/examples/notebooks/regression_diagnostics.ipynb b/examples/notebooks/regression_diagnostics.ipynb

index 55a5cc047df52f7a67f538f2f06e24a82d0c6ec8..e9d03eec549e15cae4f2e938783c176f1caf6929 100644 (file)
--- a/examples/notebooks/regression_diagnostics.ipynb
+++ b/examples/notebooks/regression_diagnostics.ipynb
@@ -47,8 +47,8 @@
      "import matplotlib.pyplot as plt\n",
      "\n",
      "# Load data\n",
-    "url = \"https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Guerry.csv\"\n",
-    "dat = pd.read_csv(url)\n",
+    "import statsmodels.datasets\n",
+    "dat = statsmodels.datasets.get_rdataset(\"Guerry\", \"HistData\", cache=True).data\n",
      "\n",
      "# Fit regression model (using the natural log of one of the regressors)\n",
      "results = smf.ols(\"Lottery ~ Literacy + np.log(Pop1831)\", data=dat).fit()\n",
diff --git a/statsmodels/stats/tests/test_dist_dependant_measures.py b/statsmodels/stats/tests/test_dist_dependant_measures.py

index 9060b5492c000c50f41f6c26f4295de73e603099..2b16441b186207fff2c4a06b0cfaddff31832de0 100644 (file)
--- a/statsmodels/stats/tests/test_dist_dependant_measures.py
+++ b/statsmodels/stats/tests/test_dist_dependant_measures.py
@@ -140,7 +140,7 @@ class TestDistDependenceMeasures(object):
          0.1025087
          """
          try:
-            iris = get_rdataset("iris").data.values[:, :4]
+            iris = get_rdataset("iris", cache=True).data.values[:, :4]
          except IGNORED_EXCEPTIONS:
              pytest.skip('Failed with HTTPError or URLError, these are random')
  
@@ -180,7 +180,7 @@ class TestDistDependenceMeasures(object):
          30.01526
          """
          try:
-            quakes = get_rdataset("quakes").data.values[:, :3]
+            quakes = get_rdataset("quakes", cache=True).data.values[:, :3]
          except IGNORED_EXCEPTIONS:
              pytest.skip('Failed with HTTPError or URLError, these are random')
author	Debian Science Maintainers <debian-science-maintainers@lists.alioth.debian.org>
	Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)
committer	Rebecca N. Palmer <rebecca_palmer@zoho.com>
	Mon, 29 Nov 2021 22:05:33 +0000 (22:05 +0000)
docs/source/contingency_tables.rst		patch \| blob \| history
docs/source/datasets/index.rst		patch \| blob \| history
docs/source/duration.rst		patch \| blob \| history
docs/source/example_formulas.rst		patch \| blob \| history
docs/source/gee.rst		patch \| blob \| history
docs/source/gettingstarted.rst		patch \| blob \| history
docs/source/index.rst		patch \| blob \| history
docs/source/mixed_linear.rst		patch \| blob \| history
docs/source/release/version0.6.rst		patch \| blob \| history
examples/notebooks/markov_regression.ipynb		patch \| blob \| history
examples/notebooks/mixed_lm_example.ipynb		patch \| blob \| history
examples/notebooks/regression_diagnostics.ipynb		patch \| blob \| history
statsmodels/stats/tests/test_dist_dependant_measures.py		patch \| blob \| history