Also remove a download that isn't actually used in that example.
This allows the tests to pass, and at least some of the examples to
be built, in an offline environment such as a Debian buildd.
The cached data is extracted from R packages by debian/datasets/*.
Author: Diane Trout <diane@ghic.org>, Rebecca N. Palmer <rebecca_palmer@zoho.com>
Forwarded: not-needed
Gbp-Pq: Name use-cached-datasets
import pandas as pd
import statsmodels.api as sm
- df = sm.datasets.get_rdataset("Arthritis", "vcd").data
+ df = sm.datasets.get_rdataset("Arthritis", "vcd", cache=True).data
tab = pd.crosstab(df['Treatment'], df['Improved'])
tab = tab.loc[:, ["None", "Some", "Marked"]]
.. ipython:: python
- df = sm.datasets.get_rdataset("VisualAcuity", "vcd").data
+ df = sm.datasets.get_rdataset("VisualAcuity", "vcd", cache=True).data
df = df.loc[df.gender == "female", :]
tab = df.set_index(['left', 'right'])
del tab["gender"]
.. ipython:: python
import statsmodels.api as sm
- duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData")
+ duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData", cache=True)
print(duncan_prestige.__doc__)
duncan_prestige.data.head(5)
import statsmodels.api as sm
- data = sm.datasets.get_rdataset("flchain", "survival").data
+ data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
df = data.loc[data.sex == "F", :]
sf = sm.SurvfuncRight(df["futime"], df["death"])
import statsmodels.api as sm
import statsmodels.formula.api as smf
- data = sm.datasets.get_rdataset("flchain", "survival").data
+ data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
del data["chapter"]
data = data.dropna()
data["lam"] = data["lambda"]
.. ipython:: python
- df = sm.datasets.get_rdataset("Guerry", "HistData").data
+ df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
df = df[['Lottery', 'Literacy', 'Wealth', 'Region']].dropna()
df.head()
import statsmodels.api as sm
import statsmodels.formula.api as smf
- data = sm.datasets.get_rdataset('epil', package='MASS').data
+ data = sm.datasets.get_rdataset('epil', package='MASS', cache=True).data
fam = sm.families.Poisson()
ind = sm.cov_struct.Exchangeable()
.. ipython:: python
- df = sm.datasets.get_rdataset("Guerry", "HistData").data
+ df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
The `Input/Output doc page <iolib.html>`_ shows how to import from various
other formats.
import statsmodels.formula.api as smf
# Load data
- dat = sm.datasets.get_rdataset("Guerry", "HistData").data
+ dat = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
# Fit regression model (using the natural log of one of the regressors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
import statsmodels.api as sm
import statsmodels.formula.api as smf
- data = sm.datasets.get_rdataset("dietox", "geepack").data
+ data = sm.datasets.get_rdataset("dietox", "geepack", cache=True).data
md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])
mdf = md.fit()
import statsmodels.api as sm
import statsmodels.formula.api as smf
- data = sm.datasets.get_rdataset("epil", "MASS").data
+ data = sm.datasets.get_rdataset("epil", "MASS", cache=True).data
md = smf.gee("y ~ age + trt + base", "subject", data,
cov_struct=sm.cov_struct.Independence(),
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import matplotlib.pyplot as plt\n",
- "\n",
- "# NBER recessions\n",
- "from pandas_datareader.data import DataReader\n",
- "from datetime import datetime\n",
- "\n",
- "usrec = DataReader(\n",
- " \"USREC\", \"fred\", start=datetime(1947, 1, 1), end=datetime(2013, 4, 1)\n",
- ")"
+ "from datetime import datetime\n"
]
},
{
"metadata": {},
"outputs": [],
"source": [
- "data = sm.datasets.get_rdataset(\"dietox\", \"geepack\").data\n",
+ "data = sm.datasets.get_rdataset(\"dietox\", \"geepack\", cache=True).data\n",
"md = smf.mixedlm(\"Weight ~ Time\", data, groups=data[\"Pig\"])\n",
"mdf = md.fit(method=[\"lbfgs\"])\n",
"print(mdf.summary())"
"metadata": {},
"outputs": [],
"source": [
- "data = sm.datasets.get_rdataset(\"Sitka\", \"MASS\").data\n",
+ "data = sm.datasets.get_rdataset(\"Sitka\", \"MASS\", cache=True).data\n",
"endog = data[\"size\"]\n",
"data[\"Intercept\"] = 1\n",
"exog = data[[\"Intercept\", \"Time\"]]"
"import matplotlib.pyplot as plt\n",
"\n",
"# Load data\n",
- "url = \"https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Guerry.csv\"\n",
- "dat = pd.read_csv(url)\n",
+ "import statsmodels.datasets\n",
+ "dat = statsmodels.datasets.get_rdataset(\"Guerry\", \"HistData\", cache=True).data\n",
"\n",
"# Fit regression model (using the natural log of one of the regressors)\n",
"results = smf.ols(\"Lottery ~ Literacy + np.log(Pop1831)\", data=dat).fit()\n",
0.1025087
"""
try:
- iris = get_rdataset("iris").data.values[:, :4]
+ iris = get_rdataset("iris", cache=True).data.values[:, :4]
except IGNORED_EXCEPTIONS:
pytest.skip('Failed with HTTPError or URLError, these are random')
30.01526
"""
try:
- quakes = get_rdataset("quakes").data.values[:, :3]
+ quakes = get_rdataset("quakes", cache=True).data.values[:, :3]
except IGNORED_EXCEPTIONS:
pytest.skip('Failed with HTTPError or URLError, these are random')