Allow tests to use the data files in the source tree

author Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>

Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)

committer Rebecca N. Palmer <rebecca_palmer@zoho.com>

Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
author Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>
Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
committer Rebecca N. Palmer <rebecca_palmer@zoho.com>
Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
diff --git a/pandas/conftest.py b/pandas/conftest.py

index b1b35448af1340cc7902787c8b7d5d0eacd77505..50c0576c9cdec4978cd4eebaef283c3429b6121c 100644 (file)
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -35,6 +35,7 @@ from typing import (
      TYPE_CHECKING,
      Callable,
  )
+import argparse
  
  from dateutil.tz import (
      tzlocal,
@@ -107,6 +108,7 @@ def pytest_addoption(parser) -> None:
          action="store_false",
          help="Don't fail if a test is skipped for missing data file.",
      )
+    parser.addoption("--deb-data-root-dir", action="store", help=argparse.SUPPRESS)  # for internal use of the Debian CI infrastructure, may change without warning.  Security note: test_pickle can run arbitrary code from this directory
  
  
  def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
@@ -1169,17 +1171,15 @@ def strict_data_files(pytestconfig):
  
  
  @pytest.fixture
-def tests_path() -> Path:
-    return Path(__file__).parent / "tests"
+def tests_io_data_path(pytestconfig) -> Path:
+    BASE_PATH = pytestconfig.getoption("--deb-data-root-dir", default=None)
+    if BASE_PATH is None:
+        BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
+    return Path(BASE_PATH) / "io" / "data"
  
  
  @pytest.fixture
-def tests_io_data_path(tests_path) -> Path:
-    return tests_path / "io" / "data"
-
-
-@pytest.fixture
-def datapath(strict_data_files: str) -> Callable[..., str]:
+def datapath(strict_data_files: str, pytestconfig) -> Callable[..., str]:
      """
      Get the path to a data file.
  
@@ -1197,7 +1197,9 @@ def datapath(strict_data_files: str) -> Callable[..., str]:
      ValueError
          If the path doesn't exist and the --no-strict-data-files option is not set.
      """
-    BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
+    BASE_PATH = pytestconfig.getoption("--deb-data-root-dir", default=None)
+    if BASE_PATH is None:
+        BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
  
      def deco(*args):
          path = os.path.join(BASE_PATH, *args)
diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py

index 1e345eb82ed3c31e7a5e0f89fa574aea84923dd7..2c5220da960f1645f6559585ef1dbafcf074bcd4 100644 (file)
--- a/pandas/tests/io/formats/style/test_html.py
+++ b/pandas/tests/io/formats/style/test_html.py
@@ -44,10 +44,10 @@ def tpl_table(env):
      return env.get_template("html_table.tpl")
  
  
-def test_html_template_extends_options():
+def test_html_template_extends_options(datapath):
      # make sure if templates are edited tests are updated as are setup fixtures
      # to understand the dependency
-    with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file:
+    with open(datapath("../io/formats/templates/html.tpl"), encoding="utf-8") as file:
          result = file.read()
      assert "{% include html_style_tpl %}" in result
      assert "{% include html_table_tpl %}" in result
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py

index 75e4de7074e63f989c2a273c0836bf8c41d9237d..407d95f53c89ebe700d80416376264f444da5aa0 100644 (file)
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -117,7 +117,7 @@ def test_pickles(datapath):
          pytest.skip("known failure on non-little endian")
  
      # For loop for compat with --strict-data-files
-    for legacy_pickle in Path(__file__).parent.glob("data/legacy_pickle/*/*.p*kl*"):
+    for legacy_pickle in Path(datapath("io", "data", "legacy_pickle")).glob("*/*.p*kl*"):
          legacy_pickle = datapath(legacy_pickle)
  
          data = pd.read_pickle(legacy_pickle)
@@ -574,7 +574,7 @@ def test_pickle_big_dataframe_compression(protocol, compression):
  def test_pickle_frame_v124_unpickle_130(datapath):
      # GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x
      path = datapath(
-        Path(__file__).parent,
+        "io",
          "data",
          "legacy_pickle",
          "1.2.4",
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py

index 88655483800eea56a822272db9c05afa73b8473c..6c357f479a94749c917e71fd40f64c98de303ae7 100644 (file)
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -486,13 +486,14 @@ def test_empty_string_etree(val):
              read_xml(BytesIO(val), parser="etree")
  
  
-def test_wrong_file_path(parser):
+@pytest.mark.xfail(reason="broken by etree changes", strict=False)
+def test_wrong_file_path(parser, datapath):
      msg = (
          "Passing literal xml to 'read_xml' is deprecated and "
          "will be removed in a future version. To read from a "
          "literal string, wrap it in a 'StringIO' object."
      )
-    filename = os.path.join("data", "html", "books.xml")
+    filename = os.path.join(datapath("io", "data", "html"), "books.xml")
  
      with pytest.raises(
          FutureWarning,
@@ -1357,17 +1358,16 @@ def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
  
  
  @pytest.mark.parametrize("val", ["", b""])
-def test_empty_stylesheet(val):
+def test_empty_stylesheet(val, datapath):
      pytest.importorskip("lxml")
      msg = (
          "Passing literal xml to 'read_xml' is deprecated and "
          "will be removed in a future version. To read from a "
          "literal string, wrap it in a 'StringIO' object."
      )
-    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
  
-    with pytest.raises(FutureWarning, match=msg):
-        read_xml(kml, stylesheet=val)
+    read_xml(kml, stylesheet=val)
  
  
  # ITERPARSE
diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py

index 5718480fdec5ef4e9d1b15a075d97f3d7fdcc061..0776f1797cdffb4350743bdb6cd902f2dd33a1db 100644 (file)
--- a/pandas/tests/util/test_util.py
+++ b/pandas/tests/util/test_util.py
@@ -32,6 +32,7 @@ def test_datapath_missing(datapath):
          datapath("not_a_file")
  
  
+@pytest.mark.xfail(reason="--deb-data-root-dir intentionally breaks this", strict=False)
  def test_datapath(datapath):
      args = ("io", "data", "csv", "iris.csv")
author	Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>
	Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
committer	Rebecca N. Palmer <rebecca_palmer@zoho.com>
	Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
pandas/conftest.py		patch \| blob \| history
pandas/tests/io/formats/style/test_html.py		patch \| blob \| history
pandas/tests/io/test_pickle.py		patch \| blob \| history
pandas/tests/io/xml/test_xml.py		patch \| blob \| history
pandas/tests/util/test_util.py		patch \| blob \| history