From 557b0a00bcbeaf75d9176083d27be0c119f8b807 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 21 Dec 2023 17:31:51 +0100
Subject: [PATCH 1/2] ENH: support the Arrow PyCapsule Interface on
 pandas.DataFrame

---
 pandas/compat/_optional.py                 |  5 ++-
 pandas/core/frame.py                       | 24 ++++++++++++
 pandas/tests/frame/test_arrow_interface.py | 45 ++++++++++++++++++++++
 pandas/tests/test_optional_dependency.py   | 14 +++++++
 4 files changed, 86 insertions(+), 2 deletions(-)
 create mode 100644 pandas/tests/frame/test_arrow_interface.py

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 9d04d7c0a1216..2bc6cd46f09a7 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -120,9 +120,8 @@ def import_optional_dependency(
         The imported module, when found and the version is correct.
         None is returned when the package is not found and `errors`
         is False, or when the package's version is too old and `errors`
-        is ``'warn'``.
+        is ``'warn'`` or ``'ignore'``.
     """
-
     assert errors in {"warn", "raise", "ignore"}
 
     package_name = INSTALL_MAPPING.get(name)
@@ -163,5 +162,7 @@ def import_optional_dependency(
                 return None
             elif errors == "raise":
                 raise ImportError(msg)
+            else:
+                return None
 
     return module
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0d0dc476ee76a..5d0a048417fee 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -971,6 +971,30 @@ def __dataframe_consortium_standard__(
         )
         return convert_to_standard_compliant_dataframe(self, api_version=api_version)
 
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the pandas DataFrame as an Arrow C stream PyCapsule.
+
+        This relies on pyarrow to convert the pandas DataFrame to the Arrow
+        format. This conversion is not necessarily zero-copy.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the dataframe should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+        pa = import_optional_dependency("pyarrow", min_version="14.0.0")
+        if requested_schema is not None:
+            requested_schema = pa.Schema._import_from_c_capsule(requested_schema)
+        table = pa.Table.from_pandas(self, schema=requested_schema)
+        return table.__arrow_c_stream__()
+
     # ----------------------------------------------------------------------
 
     @property
diff --git a/pandas/tests/frame/test_arrow_interface.py b/pandas/tests/frame/test_arrow_interface.py
new file mode 100644
index 0000000000000..ac7b51cbdfa92
--- /dev/null
+++ b/pandas/tests/frame/test_arrow_interface.py
@@ -0,0 +1,45 @@
+import ctypes
+
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+
+pa = pytest.importorskip("pyarrow")
+
+
+@td.skip_if_no("pyarrow", min_version="14.0")
+def test_dataframe_arrow_interface():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+
+    capsule = df.__arrow_c_stream__()
+    assert (
+        ctypes.pythonapi.PyCapsule_IsValid(
+            ctypes.py_object(capsule), b"arrow_array_stream"
+        )
+        == 1
+    )
+
+    table = pa.table(df)
+    expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    assert table.equals(expected)
+
+    schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
+    table = pa.table(df, schema=schema)
+    expected = expected.cast(schema)
+    assert table.equals(expected)
+
+
+@td.skip_if_no("pyarrow", min_version="15.0")
+def test_dataframe_to_arrow():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+
+    table = pa.RecordBatchReader.from_stream(df)
+    expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    assert table.equals(expected)
+
+    schema = pa.schema([("a", pa.int8()), ("b", pa.string())])
+    table = pa.RecordBatchReader.from_stream(df, schema=schema)
+    expected = expected.cast(schema)
+    assert table.equals(expected)
diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py
index c1d1948d6c31a..52b5f636b1254 100644
--- a/pandas/tests/test_optional_dependency.py
+++ b/pandas/tests/test_optional_dependency.py
@@ -50,6 +50,20 @@ def test_bad_version(monkeypatch):
     result = import_optional_dependency("fakemodule")
     assert result is module
 
+    with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
+        import_optional_dependency("fakemodule", min_version="1.1.0")
+
+    with tm.assert_produces_warning(UserWarning):
+        result = import_optional_dependency(
+            "fakemodule", errors="warn", min_version="1.1.0"
+        )
+    assert result is None
+
+    result = import_optional_dependency(
+        "fakemodule", errors="ignore", min_version="1.1.0"
+    )
+    assert result is None
+
 
 def test_submodule(monkeypatch):
     # Create a fake module with a submodule

From 05fec034ebf6824e1b9e78c4f788f7231fdb8313 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Jan 2024 14:33:32 +0100
Subject: [PATCH 2/2] expand documentation on how index is handled

---
 pandas/core/frame.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dfbcead972813..e093d551f3ead 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -993,7 +993,10 @@ def __arrow_c_stream__(self, requested_schema=None):
         Export the pandas DataFrame as an Arrow C stream PyCapsule.
 
         This relies on pyarrow to convert the pandas DataFrame to the Arrow
-        format. This conversion is not necessarily zero-copy.
+        format (and follows the default behaviour of ``pyarrow.Table.from_pandas``
+        in its handling of the index, i.e. store the index as a column except
+        for RangeIndex).
+        This conversion is not necessarily zero-copy.
 
         Parameters
         ----------