import abc
import datetime
+import inspect
from io import BufferedIOBase, BytesIO, RawIOBase
import os
from textwrap import fill
from typing import Union
+import warnings
from pandas._config import config
from pandas._libs.parsers import STR_NA_VALUES
+from pandas.compat._optional import import_optional_dependency
from pandas.errors import EmptyDataError
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
of dtype conversion.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
- Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd".
+ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
Engine compatibility :
+
- "xlrd" supports most old/new Excel file formats.
- "openpyxl" supports newer Excel file formats.
- "odf" supports OpenDocument file formats (.odf, .ods, .odt).
- "pyxlsb" supports Binary Excel files.
+
+ .. versionchanged:: 1.1.5 in Debian, 1.2.0 upstream
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
+ is no longer maintained, and is not supported with
+ python >= 3.9. When ``engine=None``, the following logic will be
+ used to determine the engine.
+
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
+ be used.
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
+ then ``openpyxl`` will be used.
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
+
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
+ indefinite future, but may require uninstalling (python3-)defusedxml.
+
converters : dict, default None
Dict of functions for converting values in certain columns. Keys can
either be integers or column labels, values are functions that take one
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
- Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
- default ``xlrd``.
+ Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
Engine compatibility :
+
- ``xlrd`` supports most old/new Excel file formats.
- ``openpyxl`` supports newer Excel file formats.
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
- ``pyxlsb`` supports Binary Excel files.
+
+ .. versionchanged:: 1.1.5 in Debian, 1.2.0 upstream
+
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
+ is no longer maintained, and is not supported with
+ python >= 3.9. When ``engine=None``, the following logic will be
+ used to determine the engine.
+
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
+ will be used.
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
+ then ``openpyxl`` will be used.
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
+
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
+ indefinite future, but may require uninstalling (python3-)defusedxml.
"""
from pandas.io.excel._odfreader import _ODFReader
def __init__(self, path_or_buffer, engine=None):
if engine is None:
- engine = "xlrd"
+ # Determine ext and use odf for ods stream/file
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
+ ext = None
if _is_ods_stream(path_or_buffer):
engine = "odf"
else:
ext = os.path.splitext(str(path_or_buffer))[-1]
if ext == ".ods":
engine = "odf"
+
+ if (
+ import_optional_dependency(
+ "xlrd", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ ):
+ from xlrd import Book
+
+ if isinstance(path_or_buffer, Book):
+ engine = "xlrd"
+
+ # GH 35029 - Prefer openpyxl except for xls files
+ if engine is None:
+ if ext is None or isinstance(path_or_buffer, bytes) or ext == ".xls":
+ engine = "xlrd"
+ elif (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ ):
+ engine = "openpyxl"
+ else:
+ caller = inspect.stack()[1]
+ if (
+ caller.filename.endswith("pandas/io/excel/_base.py")
+ and caller.function == "read_excel"
+ ):
+ stacklevel = 4
+ else:
+ stacklevel = 2
+ warnings.warn(
+ "The xlrd engine is no longer maintained and is not "
+ "supported when using pandas with python >= 3.9. However, "
+ "the engine xlrd will continue to be allowed for the "
+ "indefinite future. The "
+ "openpyxl engine will be used if it is installed and the "
+ "engine argument is not specified. Either install openpyxl "
+ "or specify engine='xlrd' to silence this warning.",
+ FutureWarning,
+ stacklevel=stacklevel,
+ )
+ engine = "xlrd"
if engine not in self._engines:
raise ValueError(f"Unknown engine: {engine}")
msg = "sheet 0 not found"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, "0")
- else:
+ elif engine == "xlwt":
import xlrd
msg = "No sheet named <'0'>"
with pytest.raises(xlrd.XLRDError, match=msg):
pd.read_excel(xl, sheet_name="0")
+ else:
+ with pytest.raises(KeyError, match="Worksheet 0 does not exist."):
+ pd.read_excel(xl, sheet_name="0")
def test_excel_writer_context_manager(self, frame, path):
with ExcelWriter(path) as writer:
write_frame = DataFrame({"A": datetimes})
write_frame.to_excel(path, "Sheet1")
- read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
+ # GH 35029 - Default changed to openpyxl, but test is for odf/xlrd
+ engine = "odf" if path.endswith("ods") else "xlrd"
+ read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0, engine=engine)
tm.assert_series_equal(write_frame["A"], read_frame["A"])
import pytest
+from pandas.compat._optional import import_optional_dependency
+
import pandas as pd
import pandas._testing as tm
# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "excel", f"test1{read_ext}")
- with pd.ExcelFile(path) as excel:
+ with pd.ExcelFile(path, engine="xlrd") as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, sheet_name="asdf")
+
+
+def test_excel_file_warning_with_xlsx_file(datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ has_openpyxl = (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ )
+ if not has_openpyxl:
+ with tm.assert_produces_warning(
+ FutureWarning,
+ raise_on_extra_warnings=False,
+ match="The xlrd engine is no longer maintained",
+ ):
+ ExcelFile(path, engine=None)
+ else:
+ with tm.assert_produces_warning(None):
+ pd.read_excel(path, "Sheet1", engine=None)
+
+
+def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ has_openpyxl = (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ )
+ if not has_openpyxl:
+ with tm.assert_produces_warning(
+ FutureWarning,
+ raise_on_extra_warnings=False,
+ match="The xlrd engine is no longer maintained",
+ ):
+ pd.read_excel(path, "Sheet1", engine=None)
+ else:
+ with tm.assert_produces_warning(None):
+ pd.read_excel(path, "Sheet1", engine=None)