Xfail NaN <-> NaT tests on non-x86 and warn on cast
authorDebian Science Team <debian-science-maintainers@lists.alioth.debian.org>
Fri, 25 Mar 2022 20:57:26 +0000 (20:57 +0000)
committerRebecca N. Palmer <rebecca_palmer@zoho.com>
Fri, 25 Mar 2022 20:57:26 +0000 (20:57 +0000)
pd.Series([np.nan]).astype('datetime64[ns]')[0] = pd.NaT on x86
but 1970-01-01 on arm* because float NaN -> int is undefined:
https://github.com/numpy/numpy/issues/8325
https://github.com/pandas-dev/pandas/issues/17792
https://github.com/pandas-dev/pandas/issues/26964

On s390x it's the maximum _positive_ value (2**63-1 ns = year 2262)

On riscv64 one test case raises an exception
(though I suspect not the general case since there aren't more).

Author: Andreas Tille <tille@debian.org>, Graham Inggs <ginggs@debian.org>, Rebecca N. Palmer <rebecca_palmer@zoho.com>
Bug-Debian: https://bugs.debian.org/877754
Forwarded: no

Gbp-Pq: Name xfail_tests_nonintel_nannat.patch

pandas/core/dtypes/cast.py
pandas/tests/dtypes/cast/test_downcast.py
pandas/tests/frame/indexing/test_where.py
pandas/tests/frame/test_reductions.py
pandas/tests/indexes/datetimes/test_datetime.py
pandas/tests/reductions/test_reductions.py
pandas/tests/series/test_constructors.py
pandas/tests/test_algos.py
pandas/tests/tools/test_to_datetime.py

index 4a518b0c6d5bf2dc1ddc05d80a8a5dbf3dcae7f0..5806b8c067737073ce687ec839310c8e2ca2d3a5 100644 (file)
@@ -100,6 +100,9 @@ if TYPE_CHECKING:
         TimedeltaArray,
     )
 
+import platform
+import re
+warn_nannat_platform = "Non-x86 system detected, float -> datetime/timedelta may not handle NaNs correctly - https://bugs.debian.org/877754" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
 _int8_max = np.iinfo(np.int8).max
 _int16_max = np.iinfo(np.int16).max
 _int32_max = np.iinfo(np.int32).max
@@ -1195,6 +1198,8 @@ def astype_nansafe(
             f"'{dtype.name}[ns]' instead."
         )
         raise ValueError(msg)
+    if warn_nannat_platform and (is_datetime64_dtype(dtype) or is_timedelta64_dtype(dtype)) and not (is_datetime64_dtype(arr.dtype) or is_timedelta64_dtype(arr.dtype)) and np.issubdtype(arr.dtype, np.floating) and not np.isfinite(arr).all():
+        warnings.warn(warn_nannat_platform)
 
     if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
         # Explicit copy, or required since NumPy can't view from / to object.
@@ -1625,6 +1630,8 @@ def maybe_cast_to_datetime(
             dtype = ensure_nanosecond_dtype(dtype)
 
             value = np.array(value, copy=False)
+            if warn_nannat_platform and not (is_datetime64_dtype(value.dtype) or is_timedelta64_dtype(value.dtype)) and np.issubdtype(value.dtype, np.floating) and not np.isfinite(value).all():
+                warnings.warn(warn_nannat_platform)
 
             # we have an array of datetime or timedeltas & nulls
             if value.size or not is_dtype_equal(value.dtype, dtype):
index 5217b38f155c86a989ffa592efa29a25347f8f40..47584edce85293907748666787f48bdc9e1754cf 100644 (file)
@@ -7,6 +7,9 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 
 from pandas import Series
 import pandas._testing as tm
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86',platform.uname()[4]))
 
 
 @pytest.mark.parametrize(
@@ -77,6 +80,7 @@ def test_downcast_conversion_empty(any_real_dtype):
     tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
 
 
+@pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
 @pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64])
 def test_datetime_likes_nan(klass):
     dtype = klass.__name__ + "[ns]"
index 0405d150c0c04d299d37b7f768307d25cd984149..a6373fd1f1bbd075185c9096bbcde8080c1172bd 100644 (file)
@@ -16,6 +16,9 @@ from pandas import (
     isna,
 )
 import pandas._testing as tm
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 
 @pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"])
@@ -360,6 +363,7 @@ class TestDataFrameIndexingWhere:
         result = a.where(do_not_replace, b)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)#not found
     def test_where_datetime(self):
 
         # GH 3311
index 8cb2c6375b5b7ba7c939fb2ea55a470a80ef99b3..6a9dbe4455dc18284be82e86a98a110a6ba43cf7 100644 (file)
@@ -28,6 +28,9 @@ from pandas import (
 import pandas._testing as tm
 import pandas.core.algorithms as algorithms
 import pandas.core.nanops as nanops
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 
 def assert_stat_op_calc(
@@ -845,6 +848,7 @@ class TestDataFrameAnalytics:
         expected = Series(result, index=["A", "B"])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     def test_sum_nanops_timedelta(self):
         # prod isn't defined on timedeltas
         idx = ["a", "b", "c"]
index 17b80fbc0afc241a8d4fd55c96ac4e2e27d378c7..bd1151219493b22f3d6638a8b98f133d381634ac 100644 (file)
@@ -14,6 +14,9 @@ from pandas import (
     offsets,
 )
 import pandas._testing as tm
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 
 class TestDatetimeIndex:
@@ -55,6 +58,7 @@ class TestDatetimeIndex:
         idx2 = date_range(end="2000", periods=periods, freq="S")
         assert len(idx2) == periods
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     def test_nat(self):
         assert DatetimeIndex([np.nan])[0] is pd.NaT
 
index c0c1c2f057c96483535c4e7c7c2eb2c947c8eb20..6e514ce6af7dc53feec3e5b7a15147cb1434c999 100644 (file)
@@ -27,6 +27,9 @@ from pandas import (
 )
 import pandas._testing as tm
 from pandas.core import nanops
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 
 def get_objs():
@@ -1346,6 +1349,7 @@ class TestSeriesMode:
         expected = Series(expected2, dtype=object)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     @pytest.mark.parametrize(
         "dropna, expected1, expected2",
         [
index 56af003c59bf58f2d75072f043886606a2935e71..a56287239a6de561bc7c99e24de670e980063d09 100644 (file)
@@ -47,6 +47,9 @@ from pandas.core.arrays import (
 )
 from pandas.core.internals.blocks import NumericBlock
 
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 class TestSeriesConstructors:
     @pytest.mark.parametrize(
@@ -1057,6 +1060,7 @@ class TestSeriesConstructors:
 
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", NaT, np.nan, None])
     def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
         # GH 17415: With naive string
@@ -1434,6 +1438,7 @@ class TestSeriesConstructors:
         series[2] = val
         assert isna(series[2])
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     def test_NaT_cast(self):
         # GH10747
         result = Series([np.nan]).astype("M8[ns]")
index 4df95d895e4752ffc443adb4b2a803d63c30e0f8..eb11a5b4669cc985d272f349e98ab18cd40d71cc 100644 (file)
@@ -45,6 +45,9 @@ import pandas._testing as tm
 import pandas.core.algorithms as algos
 from pandas.core.arrays import DatetimeArray
 import pandas.core.common as com
+import platform
+import re
+is_nannat_working=bool(re.match('i.?86|x86|s390|ppc',platform.uname()[4]))
 
 
 class TestFactorize:
@@ -1265,6 +1268,7 @@ class TestValueCounts:
         expected = Series([3, 2, 1], index=[5.0, 10.3, np.nan])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(condition=not is_nannat_working,reason="https://bugs.debian.org/877754",strict=False)
     def test_value_counts_normalized(self):
         # GH12558
         s = Series([1] * 2 + [2] * 3 + [np.nan] * 5)
index 9da7951c199ca0c2f18e3ea5ef7ff72502fa5039..4b2ec33dd85adda8ca381c1956174a0789173271 100644 (file)
@@ -25,6 +25,9 @@ from pandas.errors import (
     OutOfBoundsTimedelta,
 )
 import pandas.util._test_decorators as td
+import platform
+import re
+is_platform_x86 = bool(re.match('i.?86|x86',platform.uname()[4]))
 
 from pandas.core.dtypes.common import is_datetime64_ns_dtype
 
@@ -1484,6 +1487,7 @@ class TestToDatetimeUnit:
         tm.assert_index_equal(result, expected)
 
     # TODO: this is moved from tests.series.test_timeseries, may be redundant
+    @pytest.mark.xfail(not is_platform_x86, strict=False, raises=OutOfBoundsDatetime, reason="fails on riscv64")
     def test_to_datetime_unit(self):
 
         epoch = 1370745748