Use fixed seeds for reproducible pseudorandomness

author Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>

Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)

committer Rebecca N. Palmer <rebecca_palmer@zoho.com>

Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)
author Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>
Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)
committer Rebecca N. Palmer <rebecca_palmer@zoho.com>
Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)
diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst

index a6cfcd46149846d34897dfe11d0d40c3db823d49..836e3669c6881b9b8d6b155103e77804ff8a5347 100644 (file)
--- a/doc/source/getting_started/comparison/comparison_with_r.rst
+++ b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -237,6 +237,7 @@ In pandas we may use :meth:`~pandas.pivot_table` method to handle this:
  
     import random
     import string
+   random.seed(123456) # for reproducibility
  
     baseball = pd.DataFrame(
         {
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst

index 453536098cfbb787c9d2d31cb0ad29780ac1ffbf..5126c7325bff142cb14181b6358086ae60ba5d12 100644 (file)
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -590,6 +590,7 @@ they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_in
  
     import random
  
+   random.seed(123456) # for reproducibility
     random.shuffle(tuples)
     s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))
     s
diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb

index f22a506499cf4647c716b6a416ee61ca198d7c80..46c671c5331f17b48bdf563d3459283cb90226b9 100644 (file)
--- a/doc/source/user_guide/style.ipynb
+++ b/doc/source/user_guide/style.ipynb
@@ -78,8 +78,37 @@
     "source": [
      "import pandas as pd\n",
      "import numpy as np\n",
-    "import matplotlib as mpl\n",
-    "\n",
+    "import matplotlib as mpl\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "nbsphinx": "hidden"
+   },
+   "outputs": [],
+   "source": [
+    "# For reproducibility - this doesn't respect uuid_len or positionally-passed uuid but the places here that use that coincidentally bypass this anyway\n",
+    "from pandas.io.formats.style import Styler\n",
+    "next_uuid = 1000\n",
+    "class StylerReproducible(Styler):\n",
+    "    def __init__(self, *args, uuid=None, **kwargs):\n",
+    "        global next_uuid\n",
+    "        if uuid is None:\n",
+    "            uuid = str(next_uuid)\n",
+    "            next_uuid = next_uuid + 1\n",
+    "        super().__init__(*args, uuid=uuid, **kwargs)\n",
+    "Styler = StylerReproducible\n",
+    "pd.DataFrame.style = property(lambda self: StylerReproducible(self))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
      "df = pd.DataFrame({\n",
      "    \"strings\": [\"Adam\", \"Mike\"],\n",
      "    \"ints\": [1, 3],\n",
@@ -104,6 +133,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
+    "np.random.seed(25)  # for reproducibility\n",
      "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n",
      "                          index=pd.date_range(start=\"2021-01-01\", periods=10),\n",
      "                          columns=[\"Tokyo\", \"Beijing\"])\n",
@@ -1394,7 +1424,6 @@
     "outputs": [],
     "source": [
      "# Hide the construction of the display chart from the user\n",
-    "import pandas as pd\n",
      "from IPython.display import HTML\n",
      "\n",
      "# Test series\n",
@@ -1925,6 +1954,18 @@
      "from pandas.io.formats.style import Styler"
     ]
    },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "nbsphinx": "hidden"
+   },
+   "outputs": [],
+   "source": [
+    "# For reproducibility\n",
+    "Styler = StylerReproducible\n"
+   ]
+  },
    {
     "cell_type": "markdown",
     "metadata": {},
@@ -2126,7 +2167,8 @@
     "nbconvert_exporter": "python",
     "pygments_lexer": "ipython3",
     "version": "3.9.5"
-  }
+  },
+  "record_timing": false
   },
   "nbformat": 4,
   "nbformat_minor": 1
diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst

index 9081d13ef2cf13c0b09b4097090b7653f813c7b1..b786b181dab31a23eec20911bd36770b3481c503 100644 (file)
--- a/doc/source/user_guide/visualization.rst
+++ b/doc/source/user_guide/visualization.rst
@@ -1086,6 +1086,7 @@ are what constitutes the bootstrap plot.
     :suppress:
  
     np.random.seed(123456)
+   random.seed(123456) # for reproducibility - bootstrap_plot uses random.sample
  
  .. ipython:: python
  
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py

index cb5598a98d5afbc93954d74e3ecc78b4e572606d..cb020ec6c54c71350d3210a1ec2efb07731c6946 100644 (file)
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -604,6 +604,7 @@ def boxplot_frame_groupby(
      .. plot::
          :context: close-figs
  
+        >>> np.random.seed(1234)
          >>> import itertools
          >>> tuples = [t for t in itertools.product(range(1000), range(4))]
          >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
@@ -1328,6 +1329,7 @@ class PlotAccessor(PandasObject):
          .. plot::
              :context: close-figs
  
+            >>> np.random.seed(1234)
              >>> data = np.random.randn(25, 4)
              >>> df = pd.DataFrame(data, columns=list('ABCD'))
              >>> ax = df.plot.box()
@@ -1392,6 +1394,7 @@ class PlotAccessor(PandasObject):
          .. plot::
              :context: close-figs
  
+            >>> np.random.seed(1234)
              >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
              >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
              >>> ax = df.plot.hist(bins=12, alpha=0.5)
@@ -1811,6 +1814,7 @@ class PlotAccessor(PandasObject):
          .. plot::
              :context: close-figs
  
+            >>> np.random.seed(1234)
              >>> n = 10000
              >>> df = pd.DataFrame({'x': np.random.randn(n),
              ...                    'y': np.random.randn(n)})
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py

index 18db460d388a4b748f91282ae42875206ba36cc6..fd180e625e903543191e69a4a2d30412236a7c33 100644 (file)
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -438,6 +438,8 @@ def bootstrap_plot(
      .. plot::
          :context: close-figs
  
+        >>> np.random.seed(1234)
+        >>> random.seed(1234)  # for reproducibility
          >>> s = pd.Series(np.random.uniform(size=100))
          >>> pd.plotting.bootstrap_plot(s)  # doctest: +SKIP
          <Figure size 640x480 with 6 Axes>
@@ -597,6 +599,7 @@ def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Ax
      .. plot::
          :context: close-figs
  
+        >>> np.random.seed(1234)
          >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
          >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
          >>> pd.plotting.autocorrelation_plot(s)  # doctest: +SKIP
author	Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>
	Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)
committer	Rebecca N. Palmer <rebecca_palmer@zoho.com>
	Mon, 21 Oct 2024 18:43:11 +0000 (19:43 +0100)
doc/source/getting_started/comparison/comparison_with_r.rst		patch \| blob \| history
doc/source/user_guide/advanced.rst		patch \| blob \| history
doc/source/user_guide/style.ipynb		patch \| blob \| history
doc/source/user_guide/visualization.rst		patch \| blob \| history
pandas/plotting/_core.py		patch \| blob \| history
pandas/plotting/_misc.py		patch \| blob \| history