From 2daf2263bb1aad36f19267699ce9d1abf97b4ed4 Mon Sep 17 00:00:00 2001
From: David Li
Date: Fri, 14 Feb 2025 02:14:42 -0500
Subject: [PATCH] docs: add stdout/stderr and index support to recipe directive
(#2495)
- Allow marking up stderr/stdout to be rendered in recipes
- Syntax follows Go's (`// Output:`)
- Allow adding keywords in recipes that will get added into the Sphinx
index
- Add script that parses stderr/stdout and places them in files next to
recipes for test harnesses to use
- Enforce up-to-date output in CI
- Update Python test harness to validate stdout
- Update C++ test harness to validate stdout
Fixes #2082.
---
.gitattributes | 1 +
.github/workflows/native-unix.yml | 11 +
ci/scripts/cpp_recipe.sh | 4 +-
dev/release/rat_exclude_files.txt | 1 +
docs/source/cpp/driver_example.rst | 2 -
docs/source/cpp/quickstart.rst | 1 -
docs/source/cpp/recipe/CMakeLists.txt | 14 +-
docs/source/cpp/recipe/quickstart.cc | 9 +-
.../cpp/recipe/quickstart.cc.stdout.txt | 4 +
docs/source/cpp/recipe_driver/CMakeLists.txt | 2 +
.../cpp/recipe_driver/driver_example.py | 4 +-
.../driver_example.py.stdout.txt | 1 +
docs/source/driver/flight_sql.rst | 1 -
.../sphinx_recipe/sphinx_recipe/__init__.py | 131 ++------
.../ext/sphinx_recipe/sphinx_recipe/parser.py | 279 ++++++++++++++++++
.../sphinx_recipe/update_output.py | 66 +++++
docs/source/index.rst | 1 +
.../python/recipe/driver_manager_lowlevel.py | 2 +
.../python/recipe/driver_manager_prepare.py | 2 +
.../python/recipe/flightsql_dremio_connect.py | 2 +
.../python/recipe/postgresql_authenticate.py | 5 +-
.../postgresql_authenticate.py.stdout.txt | 1 +
.../recipe/postgresql_create_append_table.py | 2 +
.../recipe/postgresql_create_dataset_table.py | 2 +
.../recipe/postgresql_create_temp_table.py | 2 +
.../python/recipe/postgresql_execute_bind.py | 2 +
.../recipe/postgresql_get_query_schema.py | 2 +
.../recipe/postgresql_get_table_schema.py | 2 +
.../python/recipe/postgresql_list_catalogs.py | 2 +
.../source/python/recipe/postgresql_pandas.py | 2 +
.../source/python/recipe/postgresql_polars.py | 2 +
docs/source/python/recipe/postgresql_pool.py | 2 +
docs/source/tests/test_cookbook.py | 77 ++++-
33 files changed, 516 insertions(+), 125 deletions(-)
create mode 100644 docs/source/cpp/recipe/quickstart.cc.stdout.txt
create mode 100644 docs/source/cpp/recipe_driver/driver_example.py.stdout.txt
create mode 100644 docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py
create mode 100644 docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py
create mode 100644 docs/source/python/recipe/postgresql_authenticate.py.stdout.txt
diff --git a/.gitattributes b/.gitattributes
index f885677cbf..745833b12b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+*.stdout.txt linguist-generated
c/vendor/* linguist-vendored
go/adbc/drivermgr/adbc.h linguist-generated
go/adbc/drivermgr/adbc_driver_manager.cc linguist-generated
diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml
index 2d3e34e75b..1b08add18b 100644
--- a/.github/workflows/native-unix.yml
+++ b/.github/workflows/native-unix.yml
@@ -710,7 +710,18 @@ jobs:
- name: Test Recipes (C++)
run: |
./ci/scripts/cpp_recipe.sh $(pwd) ~/local build/recipe
+ - name: Ensure recipes are up to date
+ run: |
+ pip install -e ./docs/source/ext/sphinx_recipe
+ # Exits 1 if any recipes were updated
+ python -m sphinx_recipe.update_output \
+ docs/source/cpp/recipe/*.cc \
+ docs/source/cpp/recipe_driver/driver_example.py \
+ docs/source/python/recipe/*.py
- name: Test Recipes (Python)
run: |
docker compose up --detach --wait dremio dremio-init flightsql-sqlite-test postgres-test
+ export ADBC_CPP_RECIPE_BIN=~/local/bin
+ # Needed for the combined C++/Python driver example
+ export LD_LIBRARY_PATH=~/local/lib
pytest -vvs docs/source/tests/
diff --git a/ci/scripts/cpp_recipe.sh b/ci/scripts/cpp_recipe.sh
index 7309e7ee33..a041ae3bc4 100755
--- a/ci/scripts/cpp_recipe.sh
+++ b/ci/scripts/cpp_recipe.sh
@@ -33,6 +33,7 @@ test_recipe() {
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${install_dir}/lib"
export GOEXPERIMENT=cgocheck2
+ rm -rf "${build_dir}"
mkdir -p "${build_dir}"
pushd "${build_dir}"
@@ -41,11 +42,12 @@ test_recipe() {
${ADBC_CMAKE_ARGS} \
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
-DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX="${install_dir}" \
-DCMAKE_PREFIX_PATH="${install_dir}" \
-DADBC_DRIVER_EXAMPLE_BUILD_TESTS=ON
set +x
- cmake --build . -j
+ cmake --build . --target install -j
ctest \
--output-on-failure \
--no-tests=error
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 7ffcfc13e2..54ec748354 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -20,6 +20,7 @@ csharp/*.props
dev/release/rat_exclude_files.txt
docs/source/format/*.drawio
docs/source/format/*.svg
+docs/source/*/*.stdout.txt
filtered_rat.txt
go/adbc/drivermgr/arrow-adbc/adbc.h
go/adbc/drivermgr/adbc_driver_manager.cc
diff --git a/docs/source/cpp/driver_example.rst b/docs/source/cpp/driver_example.rst
index 0776b5e8ef..3730328e0f 100644
--- a/docs/source/cpp/driver_example.rst
+++ b/docs/source/cpp/driver_example.rst
@@ -20,13 +20,11 @@ Driver Example
==============
.. recipe:: recipe_driver/driver_example.cc
- :language: cpp
Low-level testing
=================
.. recipe:: recipe_driver/driver_example_test.cc
- :language: cpp
High-level testing
==================
diff --git a/docs/source/cpp/quickstart.rst b/docs/source/cpp/quickstart.rst
index a382f38790..f434014c55 100644
--- a/docs/source/cpp/quickstart.rst
+++ b/docs/source/cpp/quickstart.rst
@@ -20,4 +20,3 @@ Quickstart
==========
.. recipe:: recipe/quickstart.cc
- :language: cpp
diff --git a/docs/source/cpp/recipe/CMakeLists.txt b/docs/source/cpp/recipe/CMakeLists.txt
index 5cc1492bb5..70b4f82e51 100644
--- a/docs/source/cpp/recipe/CMakeLists.txt
+++ b/docs/source/cpp/recipe/CMakeLists.txt
@@ -31,11 +31,13 @@ find_package(AdbcDriverManager REQUIRED)
fetchcontent_declare(nanoarrow
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG apache-arrow-nanoarrow-0.2.0
- GIT_SHALLOW TRUE)
+ GIT_SHALLOW TRUE
+ EXCLUDE_FROM_ALL)
fetchcontent_makeavailable(nanoarrow)
-add_executable(quickstart quickstart.cc)
-target_include_directories(quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist)
-target_link_libraries(quickstart PRIVATE AdbcDriverManager::adbc_driver_manager_shared
- nanoarrow)
-add_test(NAME quickstart COMMAND quickstart)
+add_executable(recipe-quickstart quickstart.cc)
+target_include_directories(recipe-quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist)
+target_link_libraries(recipe-quickstart
+ PRIVATE AdbcDriverManager::adbc_driver_manager_shared nanoarrow)
+add_test(NAME quickstart COMMAND recipe-quickstart)
+install(TARGETS recipe-quickstart)
diff --git a/docs/source/cpp/recipe/quickstart.cc b/docs/source/cpp/recipe/quickstart.cc
index b9ae384f01..4ce4b46617 100644
--- a/docs/source/cpp/recipe/quickstart.cc
+++ b/docs/source/cpp/recipe/quickstart.cc
@@ -155,6 +155,7 @@ int main() {
/// ahead of time, so this value will actually just be ``-1`` to
/// indicate that the value is not known.
std::cout << "Got " << rows_affected << " rows" << std::endl;
+ // Output: Got -1 rows
/// We need an Arrow implementation to read the actual results. We
/// can use `Arrow C++`_ or `Nanoarrow`_ for that. For simplicity,
@@ -172,7 +173,9 @@ int main() {
/// Then we can use Nanoarrow to print it:
char buf[1024] = {};
ArrowSchemaToString(&schema, buf, sizeof(buf), /*recursive=*/1);
- std::cout << buf << std::endl;
+ std::cout << "Result schema: " << buf << std::endl;
+ // Output:
+ // Result schema: struct
/// Now we can read the data. The data comes as a stream of Arrow
/// record batches.
@@ -197,8 +200,10 @@ int main() {
}
ArrowArrayViewReset(&view);
}
+ // Output:
+ // Got a batch with 1 rows
+ // THEANSWER[0] = 42
- std::cout << "Finished reading result set" << std::endl;
stream.release(&stream);
/// Cleanup
diff --git a/docs/source/cpp/recipe/quickstart.cc.stdout.txt b/docs/source/cpp/recipe/quickstart.cc.stdout.txt
new file mode 100644
index 0000000000..8436d53cb8
--- /dev/null
+++ b/docs/source/cpp/recipe/quickstart.cc.stdout.txt
@@ -0,0 +1,4 @@
+Got -1 rows
+Result schema: struct
+Got a batch with 1 rows
+THEANSWER[0] = 42
diff --git a/docs/source/cpp/recipe_driver/CMakeLists.txt b/docs/source/cpp/recipe_driver/CMakeLists.txt
index 8e1159a855..39d6df8c51 100644
--- a/docs/source/cpp/recipe_driver/CMakeLists.txt
+++ b/docs/source/cpp/recipe_driver/CMakeLists.txt
@@ -49,6 +49,8 @@ target_include_directories(driver_example PRIVATE ../../../../c ../../../../c/in
target_link_libraries(driver_example PRIVATE adbc_driver_framework
nanoarrow::nanoarrow_ipc)
+install(TARGETS driver_example)
+
if(ADBC_DRIVER_EXAMPLE_BUILD_TESTS)
fetchcontent_declare(googletest
URL https://github.com/google/googletest/archive/refs/tags/v1.15.1.tar.gz
diff --git a/docs/source/cpp/recipe_driver/driver_example.py b/docs/source/cpp/recipe_driver/driver_example.py
index 86743449a1..c6e5dbd4e4 100644
--- a/docs/source/cpp/recipe_driver/driver_example.py
+++ b/docs/source/cpp/recipe_driver/driver_example.py
@@ -45,7 +45,8 @@ def connect(uri: str):
driver=str(driver_lib.resolve()), db_kwargs={"uri": uri}
)
- raise RuntimeError("Can't find driver shared object")
+ # Try to find it on the dynamic loader path
+ return dbapi.connect(driver="driver_example", db_kwargs={"uri": uri})
#: Next, we can give our driver a go! The two pieces we implemented in the driver
@@ -63,5 +64,6 @@ def connect(uri: str):
with con.cursor() as cur:
cur.execute("SELECT * FROM example.arrows")
print(cur.fetchall())
+ # Output: [(1,), (2,), (3,)]
os.unlink(Path(__file__).parent / "example.arrows")
diff --git a/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt b/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt
new file mode 100644
index 0000000000..aeec2cdf53
--- /dev/null
+++ b/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt
@@ -0,0 +1 @@
+[(1,), (2,), (3,)]
diff --git a/docs/source/driver/flight_sql.rst b/docs/source/driver/flight_sql.rst
index 0dde65bf35..983a6162c7 100644
--- a/docs/source/driver/flight_sql.rst
+++ b/docs/source/driver/flight_sql.rst
@@ -129,7 +129,6 @@ the :c:struct:`AdbcDatabase`.
:sync: go
.. recipe:: ../../../go/adbc/driver/flightsql/example_usage_test.go
- :language: go
Supported Features
==================
diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py
index bd96d47437..73718c3c76 100644
--- a/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py
+++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py
@@ -15,13 +15,22 @@
# specific language governing permissions and limitations
# under the License.
-"""A directive for code recipes with a literate programming style."""
+"""A directive for code recipes with a literate-like programming style.
+
+1. Write code recipes as normal, self-contained source files.
+2. Add comments for prose containing reStructuredText markup.
+3. Use the ``recipe`` directive to include the code in your Sphinx
+ documentation. The directive will parse out the prose and render it as
+ actual documentation, with the code blocks interspersed.
+
+Effectively, this turns the code "inside out": code with embedded prose
+comments will become prose with embedded code blocks. The actual code remains
+valid code and can be tested and run like usual.
+"""
-import typing
from pathlib import Path
import docutils
-from docutils.parsers.rst import directives
from docutils.statemachine import StringList
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import nested_parse_with_titles
@@ -29,120 +38,40 @@
__all__ = ["setup"]
-
-class SourceLine(typing.NamedTuple):
- content: str
- lineno: int
-
-
-class SourceFragment(typing.NamedTuple):
- kind: str
- lines: list[SourceLine]
-
-
-PREAMBLE = "Recipe source: `{name} <{url}>`_"
+from . import parser
class RecipeDirective(SphinxDirective):
has_content = False
required_arguments = 1
optional_arguments = 0
- option_spec: OptionSpec = {
- "language": directives.unchanged_required,
- "prose-prefix": directives.unchanged_required,
- }
+ option_spec: OptionSpec = {}
@staticmethod
- def default_prose_prefix(language: str) -> str:
- return {
- "cpp": "///",
- "go": "///",
- "python": "#:",
- }.get(language, "#:")
+ def source_language(filename: str) -> parser.SourceSyntax:
+ path = Path(filename)
+ language = parser.LANGUAGES.get(path.suffix)
+ if not language:
+ raise ValueError(f"Unknown language for file {filename}")
+ return language
def run(self):
rel_filename, filename = self.env.relfn2path(self.arguments[0])
+ # Ask Sphinx to rebuild when either the recipe or the directive are changed
self.env.note_dependency(rel_filename)
self.env.note_dependency(__file__)
- language = self.options.get("language", "python")
- prefix = self.options.get("prose-prefix", self.default_prose_prefix(language))
-
- # --- Split the source into runs of prose or code
-
- fragments = []
-
- fragment = []
- fragment_type = None
- state = "before"
- lineno = 1
- for line in open(filename):
- if state == "before":
- if "RECIPE STARTS HERE" in line:
- state = "reading"
- elif state == "reading":
- if line.strip().startswith(prefix):
- line_type = "prose"
- # Remove prefix and next whitespace
- line = line.lstrip()[len(prefix) + 1 :]
- else:
- line_type = "code"
-
- if line_type != fragment_type:
- if fragment:
- fragments.append(
- SourceFragment(kind=fragment_type, lines=fragment)
- )
- fragment = []
- fragment_type = line_type
-
- # Skip blank code lines
- if line_type != "code" or line.strip():
- # Remove trailing newline
- fragment.append(SourceLine(content=line[:-1], lineno=lineno))
-
- lineno += 1
-
- if fragment:
- fragments.append(SourceFragment(kind=fragment_type, lines=fragment))
-
- # --- Generate the final reST as a whole and parse it
- # That way, section hierarchy works properly
-
- generated_lines = []
-
- # Link to the source on GitHub
+ syntax = self.source_language(filename)
repo_url_template = self.env.config.recipe_repo_url_template
- if repo_url_template is not None:
- repo_url = repo_url_template.format(rel_filename=rel_filename)
- generated_lines.append(
- PREAMBLE.format(
- name=Path(rel_filename).name,
- url=repo_url,
- )
- )
- # Paragraph break
- generated_lines.append("")
-
- for fragment in fragments:
- if fragment.kind == "prose":
- generated_lines.extend([line.content for line in fragment.lines])
- generated_lines.append("")
- elif fragment.kind == "code":
- line_min = fragment.lines[0].lineno
- line_max = fragment.lines[-1].lineno
- lines = [
- f".. literalinclude:: {self.arguments[0]}",
- f" :language: {language}",
- " :linenos:",
- " :lineno-match:",
- f" :lines: {line_min}-{line_max}",
- "",
- ]
- generated_lines.extend(lines)
- else:
- raise RuntimeError("Unknown fragment kind")
+ with open(filename) as source:
+ generated_lines = parser.parse_recipe_to_rest(
+ source,
+ filename=self.arguments[0],
+ rel_filename=rel_filename,
+ syntax=syntax,
+ repo_url_template=repo_url_template,
+ )
parsed = docutils.nodes.Element()
nested_parse_with_titles(
diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py
new file mode 100644
index 0000000000..c13825f518
--- /dev/null
+++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py
@@ -0,0 +1,279 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import typing
+from pathlib import Path
+
+
+class SourceLine(typing.NamedTuple):
+ """A reference into the recipe source file."""
+
+ content: str
+ #: 1-indexed. Used for proper line numbers for code blocks.
+ lineno: int
+
+
+class SourceFragment(typing.NamedTuple):
+ """A run of source or prose lines in a recipe."""
+
+ kind: typing.Literal["source", "prose", "stderr", "stdout"]
+ lines: list[SourceLine]
+
+
+class SourceSyntax(typing.NamedTuple):
+ """Language-specific configuration for parsing recipes."""
+
+ #: Language name to use for syntax highlighting.
+ pygments_language: str
+ #: Prefix for prose comments.
+ prose_prefix: str
+ #: Prefix for output blocks.
+ stdout_prefix: str
+ #: Prefix for stderr blocks.
+ stderr_prefix: str
+ #: Prefix for continuation lines.
+ output_continuation_prefix: str
+
+
+class ParsedRecipe(typing.NamedTuple):
+ """The result of parsing a recipe."""
+
+ fragments: list[SourceFragment]
+ stdout: list[str]
+ stderr: list[str]
+ category: str | None
+ keywords: list[str]
+
+
+#: Prepended to the Sphinx output to link to the source of the recipe.
+PREAMBLE = "Recipe source: `{name} <{url}>`_"
+#: Indicates the start of the recipe content.
+START = "RECIPE STARTS HERE"
+#: Allows you to specify the category (used in the index).
+CATEGORY_PREFIX = "RECIPE CATEGORY:"
+#: Allows you to specify comma-separated keywords (used in the index).
+KEYWORDS_PREFIX = "RECIPE KEYWORDS:"
+
+
+_LANGUAGES = {
+ (".cc", ".cpp"): SourceSyntax(
+ pygments_language="cpp",
+ prose_prefix="///",
+ stdout_prefix="// Output:",
+ stderr_prefix="// Standard Error:",
+ output_continuation_prefix="//",
+ ),
+ (".go",): SourceSyntax(
+ pygments_language="go",
+ prose_prefix="///",
+ stdout_prefix="// Output:",
+ stderr_prefix="// Standard Error:",
+ output_continuation_prefix="//",
+ ),
+ (".java",): SourceSyntax(
+ pygments_language="java",
+ prose_prefix="///",
+ stdout_prefix="// Output:",
+ stderr_prefix="// Standard Error:",
+ output_continuation_prefix="//",
+ ),
+ (".py",): SourceSyntax(
+ pygments_language="python",
+ prose_prefix="#:",
+ stdout_prefix="# Output:",
+ stderr_prefix="# Standard Error:",
+ output_continuation_prefix="#",
+ ),
+}
+LANGUAGES = {ext: lang for exts, lang in _LANGUAGES.items() for ext in exts}
+
+
+def parse_recipe_to_fragments(
+ source: typing.Iterable[str],
+ *,
+ syntax: SourceSyntax,
+):
+ # --- Split the source into runs of prose or code
+
+ fragments = []
+ category = None
+ keywords = []
+
+ fragment = []
+ fragment_type = None
+ # "before" --> ignore code lines (e.g. for a license header)
+ # "reading" --> parse code lines
+ state = "before"
+ lineno = 1
+ for line in source:
+ if state == "before":
+ if START in line:
+ state = "reading"
+ elif CATEGORY_PREFIX in line:
+ index = line.find(CATEGORY_PREFIX)
+ category = line[index + len(CATEGORY_PREFIX) :].strip()
+ elif KEYWORDS_PREFIX in line:
+ index = line.find(KEYWORDS_PREFIX)
+ keywords = [
+ keyword.strip()
+ for keyword in line[index + len(KEYWORDS_PREFIX) :]
+ .strip()
+ .split(",")
+ ]
+ elif state == "reading":
+ trimmed = line.lstrip()
+ if trimmed.startswith(syntax.prose_prefix):
+ line_type = "prose"
+ # Remove prefix and next whitespace
+ line = trimmed[len(syntax.prose_prefix) + 1 :]
+ elif trimmed.startswith(syntax.stdout_prefix):
+ line_type = "stdout"
+ line = trimmed[len(syntax.stdout_prefix) + 1 :]
+ elif trimmed.startswith(syntax.stderr_prefix):
+ line_type = "stderr"
+ line = trimmed[len(syntax.stderr_prefix) + 1 :]
+ elif fragment_type in ("stdout", "stderr") and trimmed.startswith(
+ syntax.output_continuation_prefix
+ ):
+ line = trimmed[len(syntax.output_continuation_prefix) + 1 :]
+ else:
+ line_type = "code"
+
+ if line_type != fragment_type:
+ if fragment:
+ fragments.append(SourceFragment(kind=fragment_type, lines=fragment))
+ fragment = []
+ fragment_type = line_type
+
+ # Skip blank code lines (blank lines in reST are significant)
+ if line_type != "code" or line.strip():
+ # Remove trailing newline
+ fragment.append(SourceLine(content=line[:-1], lineno=lineno))
+
+ lineno += 1
+
+ if fragment:
+ fragments.append(SourceFragment(kind=fragment_type, lines=fragment))
+
+ # --- Split out output fragments, merge adjacent fragments
+ # We render output blocks at the end, so remove them here. Merging
+ # adjacent fragments avoids odd breaks in the source code.
+
+ stdout = []
+ stderr = []
+ new_fragments = []
+ for fragment in fragments:
+ if fragment.kind == "stdout":
+ lines = fragment.lines
+ if lines and lines[0].content == "":
+ # Avoid blank line when using format like
+ # // Output:
+ # // theanswer = 42
+ lines = lines[1:]
+ stdout.extend(line.content for line in lines)
+ elif fragment.kind == "stderr":
+ stderr.extend(line.content for line in fragment.lines)
+ else:
+ if (
+ new_fragments
+ and fragment.kind == "code"
+ and new_fragments[-1].kind == fragment.kind
+ ):
+ new_fragments[-1].lines.extend(fragment.lines)
+ else:
+ new_fragments.append(fragment)
+ fragments = new_fragments
+ return ParsedRecipe(
+ fragments=fragments,
+ stdout=stdout,
+ stderr=stderr,
+ category=category,
+ keywords=keywords,
+ )
+
+
+def parse_recipe_to_rest(
+ source: typing.Iterable[str],
+ *,
+ filename: str,
+ rel_filename: str,
+ syntax: SourceSyntax,
+ repo_url_template: str | None = None,
+) -> list[str]:
+ parsed = parse_recipe_to_fragments(source, syntax=syntax)
+
+ # --- Generate the final reST as a whole and parse it
+ # That way, section hierarchy works properly
+
+ generated_lines = []
+
+ if parsed.category and parsed.keywords:
+ generated_lines.append(".. index::")
+ for keyword in parsed.keywords:
+ generated_lines.append(f" pair: {parsed.category}; {keyword} (recipe)")
+ generated_lines.append("")
+
+ # Link to the source on GitHub
+ if repo_url_template is not None:
+ repo_url = repo_url_template.format(rel_filename=rel_filename)
+ generated_lines.append(
+ PREAMBLE.format(
+ name=Path(rel_filename).name,
+ url=repo_url,
+ )
+ )
+
+ # Paragraph break
+ generated_lines.append("")
+
+ for fragment in parsed.fragments:
+ if fragment.kind == "prose":
+ generated_lines.extend([line.content for line in fragment.lines])
+ generated_lines.append("")
+ elif fragment.kind == "code":
+ line_min = fragment.lines[0].lineno
+ line_max = fragment.lines[-1].lineno
+ lines = [
+ f".. literalinclude:: {filename}",
+ f" :language: {syntax.pygments_language}",
+ " :linenos:",
+ " :lineno-match:",
+ f" :lines: {line_min}-{line_max}",
+ "",
+ ]
+ generated_lines.extend(lines)
+ else:
+ raise RuntimeError(f"Unknown fragment kind {fragment.kind}")
+
+ if parsed.stdout:
+ generated_lines.append(".. code-block:: text")
+ generated_lines.append(" :caption: stdout")
+ generated_lines.append("")
+ for line in parsed.stdout:
+ # reST escapes the content of a code-block directive
+ generated_lines.append(" " + line)
+ generated_lines.append("")
+
+ if parsed.stderr:
+ generated_lines.append(".. code-block:: text")
+ generated_lines.append(" :caption: stderr")
+ generated_lines.append("")
+ for line in parsed.stderr:
+ generated_lines.append(" " + line)
+ generated_lines.append("")
+
+ return generated_lines
diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py
new file mode 100644
index 0000000000..1b998ac224
--- /dev/null
+++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Regenerate .stdout.txt files from recipes for the test harness."""
+
+import argparse
+import sys
+from pathlib import Path
+
+from . import parser as recipe_parser
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("recipes", nargs="+", type=Path, help="Recipe files to update")
+
+ args = parser.parse_args()
+
+ updated = False
+ for path in args.recipes:
+ syntax = recipe_parser.LANGUAGES[path.suffix]
+ with path.open("r") as source:
+ recipe = recipe_parser.parse_recipe_to_fragments(source, syntax=syntax)
+
+ stdout = [line for line in recipe.stdout if line]
+ if not stdout:
+ continue
+
+ target = path.with_suffix(path.suffix + ".stdout.txt")
+ if target.is_file():
+ with target.open("r") as source:
+ if source.read().strip() == "\n".join(stdout).strip():
+ print(path, "is up to date")
+ continue
+
+ with target.open("w") as sink:
+ for line in stdout:
+ sink.write(line)
+ sink.write("\n")
+ print(path, "updated")
+ updated = True
+
+ if updated:
+ print("----------------------------------------")
+ print("Some .stdout.txt files were updated.")
+ print("Please commit the result.")
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 92fd98c9a6..7fd42535da 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -209,6 +209,7 @@ Why ADBC?
faq
glossary
+ genindex
.. toctree::
:maxdepth: 1
diff --git a/docs/source/python/recipe/driver_manager_lowlevel.py b/docs/source/python/recipe/driver_manager_lowlevel.py
index e8137184d2..bbff7b9a1b 100644
--- a/docs/source/python/recipe/driver_manager_lowlevel.py
+++ b/docs/source/python/recipe/driver_manager_lowlevel.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: driver manager
+# RECIPE KEYWORDS: dynamic driver loading
# RECIPE STARTS HERE
#: While the DB-API_ bindings are recommended for general use, the low-level
#: bindings are also available. These mostly mirror the ADBC C API directly.
diff --git a/docs/source/python/recipe/driver_manager_prepare.py b/docs/source/python/recipe/driver_manager_prepare.py
index b99e944841..c780a07029 100644
--- a/docs/source/python/recipe/driver_manager_prepare.py
+++ b/docs/source/python/recipe/driver_manager_prepare.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: driver manager
+# RECIPE KEYWORDS: prepared statements
# RECIPE STARTS HERE
#: The DBAPI bindings prepare all statements before execution, because of this
#: part of the `DB-API specification`_:
diff --git a/docs/source/python/recipe/flightsql_dremio_connect.py b/docs/source/python/recipe/flightsql_dremio_connect.py
index 9ba5a9e9ee..a8bd1a62b1 100644
--- a/docs/source/python/recipe/flightsql_dremio_connect.py
+++ b/docs/source/python/recipe/flightsql_dremio_connect.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: Flight SQL
+# RECIPE KEYWORDS: connecting to Dremio
# RECIPE STARTS HERE
#: Dremio requires a username and password. To connect to a Flight SQL
diff --git a/docs/source/python/recipe/postgresql_authenticate.py b/docs/source/python/recipe/postgresql_authenticate.py
index d0ccd49ee9..12fc3aceda 100644
--- a/docs/source/python/recipe/postgresql_authenticate.py
+++ b/docs/source/python/recipe/postgresql_authenticate.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: authentication
# RECIPE STARTS HERE
#: To connect to a PostgreSQL database, the username and password must
#: be provided in the URI. For example,
@@ -36,6 +38,7 @@
with conn.cursor() as cur:
cur.execute("SELECT 1")
- assert cur.fetchone() == (1,)
+ print(cur.fetchone())
+ # Output: (1,)
conn.close()
diff --git a/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt b/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt
new file mode 100644
index 0000000000..3fa519830e
--- /dev/null
+++ b/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt
@@ -0,0 +1 @@
+(1,)
diff --git a/docs/source/python/recipe/postgresql_create_append_table.py b/docs/source/python/recipe/postgresql_create_append_table.py
index 36e29b9386..d21722eafd 100644
--- a/docs/source/python/recipe/postgresql_create_append_table.py
+++ b/docs/source/python/recipe/postgresql_create_append_table.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: bulk append
# RECIPE STARTS HERE
#: ADBC allows creating and appending to database tables using Arrow
#: tables.
diff --git a/docs/source/python/recipe/postgresql_create_dataset_table.py b/docs/source/python/recipe/postgresql_create_dataset_table.py
index e26093a308..3aba1fb72d 100644
--- a/docs/source/python/recipe/postgresql_create_dataset_table.py
+++ b/docs/source/python/recipe/postgresql_create_dataset_table.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: bulk ingestion from PyArrow Dataset
# RECIPE STARTS HERE
#: ADBC makes it easy to load PyArrow datasets into your datastore.
diff --git a/docs/source/python/recipe/postgresql_create_temp_table.py b/docs/source/python/recipe/postgresql_create_temp_table.py
index 2d762b9a49..b9e6acca60 100644
--- a/docs/source/python/recipe/postgresql_create_temp_table.py
+++ b/docs/source/python/recipe/postgresql_create_temp_table.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: bulk ingestion to temporary table
# RECIPE STARTS HERE
#: ADBC allows creating and appending to temporary tables as well.
diff --git a/docs/source/python/recipe/postgresql_execute_bind.py b/docs/source/python/recipe/postgresql_execute_bind.py
index f6ad0c67c4..d4d782ab48 100644
--- a/docs/source/python/recipe/postgresql_execute_bind.py
+++ b/docs/source/python/recipe/postgresql_execute_bind.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: bind parameters
# RECIPE STARTS HERE
#: ADBC allows using Python and Arrow values as bind parameters.
diff --git a/docs/source/python/recipe/postgresql_get_query_schema.py b/docs/source/python/recipe/postgresql_get_query_schema.py
index 2568453924..5e9ccf15fd 100644
--- a/docs/source/python/recipe/postgresql_get_query_schema.py
+++ b/docs/source/python/recipe/postgresql_get_query_schema.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: get query result set schema
# RECIPE STARTS HERE
#: ADBC lets you get the schema of a result set, without executing the query.
diff --git a/docs/source/python/recipe/postgresql_get_table_schema.py b/docs/source/python/recipe/postgresql_get_table_schema.py
index 59034c0c11..f81ae2e96d 100644
--- a/docs/source/python/recipe/postgresql_get_table_schema.py
+++ b/docs/source/python/recipe/postgresql_get_table_schema.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: get table schema
# RECIPE STARTS HERE
#: ADBC lets you get the schema of a table as an Arrow schema.
diff --git a/docs/source/python/recipe/postgresql_list_catalogs.py b/docs/source/python/recipe/postgresql_list_catalogs.py
index 01203f002b..610674dcdc 100644
--- a/docs/source/python/recipe/postgresql_list_catalogs.py
+++ b/docs/source/python/recipe/postgresql_list_catalogs.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: query catalog
# RECIPE STARTS HERE
#: ADBC allows listing tables, catalogs, and schemas in the database.
diff --git a/docs/source/python/recipe/postgresql_pandas.py b/docs/source/python/recipe/postgresql_pandas.py
index 651d4effd0..eaf7973fba 100644
--- a/docs/source/python/recipe/postgresql_pandas.py
+++ b/docs/source/python/recipe/postgresql_pandas.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: pandas integration
# RECIPE STARTS HERE
#: ADBC is integrated into pandas_, a popular dataframe library. Pandas can
diff --git a/docs/source/python/recipe/postgresql_polars.py b/docs/source/python/recipe/postgresql_polars.py
index cf5525924c..3424adbadd 100644
--- a/docs/source/python/recipe/postgresql_polars.py
+++ b/docs/source/python/recipe/postgresql_polars.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: polars integration
# RECIPE STARTS HERE
#: ADBC can be used with Polars_, a dataframe library written in Rust. As per
diff --git a/docs/source/python/recipe/postgresql_pool.py b/docs/source/python/recipe/postgresql_pool.py
index c06e0869a0..c5e0764e8c 100644
--- a/docs/source/python/recipe/postgresql_pool.py
+++ b/docs/source/python/recipe/postgresql_pool.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+# RECIPE CATEGORY: PostgreSQL
+# RECIPE KEYWORDS: connection pooling
# RECIPE STARTS HERE
#: ADBC does not implement connection pooling, as this is not generally a
diff --git a/docs/source/tests/test_cookbook.py b/docs/source/tests/test_cookbook.py
index 9093ec1753..06b83f6b07 100644
--- a/docs/source/tests/test_cookbook.py
+++ b/docs/source/tests/test_cookbook.py
@@ -16,20 +16,79 @@
# under the License.
import importlib
+import os
+import subprocess
+import typing
from pathlib import Path
import pytest
+class Recipe(typing.NamedTuple):
+ py_source: Path | None
+ executable: Path | None
+ output: Path | None
+
+
def pytest_generate_tests(metafunc) -> None:
- root = (Path(__file__).parent.parent / "python/recipe/").resolve()
- recipes = root.rglob("*.py")
- metafunc.parametrize(
- "recipe", [pytest.param(path, id=path.stem) for path in recipes]
- )
+ params = []
+ for root in (
+ (Path(__file__).parent.parent / "cpp/recipe_driver/").resolve(),
+ (Path(__file__).parent.parent / "python/recipe/").resolve(),
+ ):
+ recipes = root.rglob("*.py")
+ for path in recipes:
+ output = path.with_suffix(path.suffix + ".stdout.txt")
+ if output.is_file():
+ recipe = Recipe(py_source=path, executable=None, output=output)
+ else:
+ recipe = Recipe(py_source=path, executable=None, output=None)
+ params.append(pytest.param(recipe, id=f"py_{path.stem}"))
+
+ # Find C++ examples with output
+ cpp_bin = os.environ.get("ADBC_CPP_RECIPE_BIN")
+ if cpp_bin:
+ cpp_bin = Path(cpp_bin).resolve()
+ recipes = (Path(__file__).parent.parent / "cpp/").resolve().rglob("*.cc")
+ for path in recipes:
+ output = path.with_suffix(path.suffix + ".stdout.txt")
+ if not output.is_file():
+ continue
+
+ name = f"recipe-{path.stem}"
+ executable = cpp_bin / name
+ if not executable.is_file():
+ raise ValueError(f"Not found: {executable} for {path}")
+
+ recipe = Recipe(py_source=None, executable=executable, output=output)
+ params.append(pytest.param(recipe, id=f"cpp_{path.stem}"))
+
+ metafunc.parametrize("recipe", params)
+
+
+def test_cookbook_recipe(recipe: Recipe, capsys: pytest.CaptureFixture) -> None:
+ if recipe.py_source:
+ spec = importlib.util.spec_from_file_location("__main__", recipe.py_source)
+ mod = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(mod)
+
+ if recipe.output:
+ with recipe.output.open("r") as source:
+ expected = [line for line in source.read().strip().splitlines() if line]
+
+ captured = [
+ line for line in capsys.readouterr().out.strip().splitlines() if line
+ ]
+ assert captured == expected
+ elif recipe.executable:
+ assert recipe.output is not None
+
+ with recipe.output.open("r") as source:
+ expected = [line for line in source.read().strip().splitlines() if line]
+ output = subprocess.check_output(recipe.executable, text=True)
+ captured = [line for line in output.strip().splitlines() if line]
-def test_cookbook_recipe(recipe: Path) -> None:
- spec = importlib.util.spec_from_file_location(f"cookbook.{recipe.stem}", recipe)
- mod = importlib.util.module_from_spec(spec)
- spec.loader.exec_module(mod)
+ assert captured == expected
+ else:
+ assert False