Allow tests to use the data files in the source tree
authorDebian Science Team <debian-science-maintainers@lists.alioth.debian.org>
Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
committerRebecca N. Palmer <rebecca_palmer@zoho.com>
Sun, 18 Feb 2024 20:31:18 +0000 (20:31 +0000)
We don't ship these in the package,
but do want to run the tests that use them

tests_path() is removed completely because it is unclear whether it
should point to the tests code or the directory above the test data

Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
Forwarded: https://github.com/pandas-dev/pandas/issues/54907

Gbp-Pq: Name find_test_data.patch

pandas/conftest.py
pandas/tests/io/formats/style/test_html.py
pandas/tests/io/test_pickle.py
pandas/tests/io/xml/test_xml.py
pandas/tests/util/test_util.py

index b1b35448af1340cc7902787c8b7d5d0eacd77505..50c0576c9cdec4978cd4eebaef283c3429b6121c 100644 (file)
@@ -35,6 +35,7 @@ from typing import (
     TYPE_CHECKING,
     Callable,
 )
+import argparse
 
 from dateutil.tz import (
     tzlocal,
@@ -107,6 +108,7 @@ def pytest_addoption(parser) -> None:
         action="store_false",
         help="Don't fail if a test is skipped for missing data file.",
     )
+    parser.addoption("--deb-data-root-dir", action="store", help=argparse.SUPPRESS)  # for internal use of the Debian CI infrastructure, may change without warning.  Security note: test_pickle can run arbitrary code from this directory
 
 
 def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
@@ -1169,17 +1171,15 @@ def strict_data_files(pytestconfig):
 
 
 @pytest.fixture
-def tests_path() -> Path:
-    return Path(__file__).parent / "tests"
+def tests_io_data_path(pytestconfig) -> Path:
+    BASE_PATH = pytestconfig.getoption("--deb-data-root-dir", default=None)
+    if BASE_PATH is None:
+        BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
+    return Path(BASE_PATH) / "io" / "data"
 
 
 @pytest.fixture
-def tests_io_data_path(tests_path) -> Path:
-    return tests_path / "io" / "data"
-
-
-@pytest.fixture
-def datapath(strict_data_files: str) -> Callable[..., str]:
+def datapath(strict_data_files: str, pytestconfig) -> Callable[..., str]:
     """
     Get the path to a data file.
 
@@ -1197,7 +1197,9 @@ def datapath(strict_data_files: str) -> Callable[..., str]:
     ValueError
         If the path doesn't exist and the --no-strict-data-files option is not set.
     """
-    BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
+    BASE_PATH = pytestconfig.getoption("--deb-data-root-dir", default=None)
+    if BASE_PATH is None:
+        BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")
 
     def deco(*args):
         path = os.path.join(BASE_PATH, *args)
index 1e345eb82ed3c31e7a5e0f89fa574aea84923dd7..2c5220da960f1645f6559585ef1dbafcf074bcd4 100644 (file)
@@ -44,10 +44,10 @@ def tpl_table(env):
     return env.get_template("html_table.tpl")
 
 
-def test_html_template_extends_options():
+def test_html_template_extends_options(datapath):
     # make sure if templates are edited tests are updated as are setup fixtures
     # to understand the dependency
-    with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file:
+    with open(datapath("../io/formats/templates/html.tpl"), encoding="utf-8") as file:
         result = file.read()
     assert "{% include html_style_tpl %}" in result
     assert "{% include html_table_tpl %}" in result
index 75e4de7074e63f989c2a273c0836bf8c41d9237d..407d95f53c89ebe700d80416376264f444da5aa0 100644 (file)
@@ -117,7 +117,7 @@ def test_pickles(datapath):
         pytest.skip("known failure on non-little endian")
 
     # For loop for compat with --strict-data-files
-    for legacy_pickle in Path(__file__).parent.glob("data/legacy_pickle/*/*.p*kl*"):
+    for legacy_pickle in Path(datapath("io", "data", "legacy_pickle")).glob("*/*.p*kl*"):
         legacy_pickle = datapath(legacy_pickle)
 
         data = pd.read_pickle(legacy_pickle)
@@ -574,7 +574,7 @@ def test_pickle_big_dataframe_compression(protocol, compression):
 def test_pickle_frame_v124_unpickle_130(datapath):
     # GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x
     path = datapath(
-        Path(__file__).parent,
+        "io",
         "data",
         "legacy_pickle",
         "1.2.4",
index 88655483800eea56a822272db9c05afa73b8473c..6c357f479a94749c917e71fd40f64c98de303ae7 100644 (file)
@@ -486,13 +486,14 @@ def test_empty_string_etree(val):
             read_xml(BytesIO(val), parser="etree")
 
 
-def test_wrong_file_path(parser):
+@pytest.mark.xfail(reason="broken by etree changes", strict=False)
+def test_wrong_file_path(parser, datapath):
     msg = (
         "Passing literal xml to 'read_xml' is deprecated and "
         "will be removed in a future version. To read from a "
         "literal string, wrap it in a 'StringIO' object."
     )
-    filename = os.path.join("data", "html", "books.xml")
+    filename = os.path.join(datapath("io", "data", "html"), "books.xml")
 
     with pytest.raises(
         FutureWarning,
@@ -1357,17 +1358,16 @@ def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
 
 
 @pytest.mark.parametrize("val", ["", b""])
-def test_empty_stylesheet(val):
+def test_empty_stylesheet(val, datapath):
     pytest.importorskip("lxml")
     msg = (
         "Passing literal xml to 'read_xml' is deprecated and "
         "will be removed in a future version. To read from a "
         "literal string, wrap it in a 'StringIO' object."
     )
-    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
 
-    with pytest.raises(FutureWarning, match=msg):
-        read_xml(kml, stylesheet=val)
+    read_xml(kml, stylesheet=val)
 
 
 # ITERPARSE
index 5718480fdec5ef4e9d1b15a075d97f3d7fdcc061..0776f1797cdffb4350743bdb6cd902f2dd33a1db 100644 (file)
@@ -32,6 +32,7 @@ def test_datapath_missing(datapath):
         datapath("not_a_file")
 
 
+@pytest.mark.xfail(reason="--deb-data-root-dir intentionally breaks this", strict=False)
 def test_datapath(datapath):
     args = ("io", "data", "csv", "iris.csv")