docs: add stdout/stderr and index support to recipe directive (#2495)

- Allow marking up stderr/stdout to be rendered in recipes - Syntax follows Go's (`// Output:`) - Allow adding keywords in recipes that will get added into the Sphinx index - Add script that parses stderr/stdout and places them in files next to recipes for test harnesses to use - Enforce up-to-date output in CI - Update Python test harness to validate stdout - Update C++ test harness to validate stdout Fixes #2082.
apache · Feb 14, 2025 · 2daf226 · 2daf226
1 parent 7c88ff8
commit 2daf226
Show file tree

Hide file tree

Showing 33 changed files with 516 additions and 125 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+*.stdout.txt linguist-generated
 c/vendor/* linguist-vendored
 go/adbc/drivermgr/adbc.h linguist-generated
 go/adbc/drivermgr/adbc_driver_manager.cc linguist-generated

diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml
@@ -710,7 +710,18 @@ jobs:
       - name: Test Recipes (C++)
         run: |
           ./ci/scripts/cpp_recipe.sh $(pwd) ~/local build/recipe
+      - name: Ensure recipes are up to date
+        run: |
+          pip install -e ./docs/source/ext/sphinx_recipe
+          # Exits 1 if any recipes were updated
+          python -m sphinx_recipe.update_output \
+            docs/source/cpp/recipe/*.cc \
+            docs/source/cpp/recipe_driver/driver_example.py \
+            docs/source/python/recipe/*.py
       - name: Test Recipes (Python)
         run: |
           docker compose up --detach --wait dremio dremio-init flightsql-sqlite-test postgres-test
+          export ADBC_CPP_RECIPE_BIN=~/local/bin
+          # Needed for the combined C++/Python driver example
+          export LD_LIBRARY_PATH=~/local/lib
           pytest -vvs docs/source/tests/
diff --git a/ci/scripts/cpp_recipe.sh b/ci/scripts/cpp_recipe.sh
@@ -33,6 +33,7 @@ test_recipe() {
     export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${install_dir}/lib"
     export GOEXPERIMENT=cgocheck2
 
+    rm -rf "${build_dir}"
     mkdir -p "${build_dir}"
     pushd "${build_dir}"
 
@@ -41,11 +42,12 @@ test_recipe() {
           ${ADBC_CMAKE_ARGS} \
           -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
           -DCMAKE_INSTALL_LIBDIR=lib \
+          -DCMAKE_INSTALL_PREFIX="${install_dir}" \
           -DCMAKE_PREFIX_PATH="${install_dir}" \
           -DADBC_DRIVER_EXAMPLE_BUILD_TESTS=ON
     set +x
 
-    cmake --build . -j
+    cmake --build . --target install -j
     ctest \
         --output-on-failure \
         --no-tests=error

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
@@ -20,6 +20,7 @@ csharp/*.props
 dev/release/rat_exclude_files.txt
 docs/source/format/*.drawio
 docs/source/format/*.svg
+docs/source/*/*.stdout.txt
 filtered_rat.txt
 go/adbc/drivermgr/arrow-adbc/adbc.h
 go/adbc/drivermgr/adbc_driver_manager.cc

diff --git a/docs/source/cpp/driver_example.rst b/docs/source/cpp/driver_example.rst
@@ -20,13 +20,11 @@ Driver Example
 ==============
 
 .. recipe:: recipe_driver/driver_example.cc
-   :language: cpp
 
 Low-level testing
 =================
 
 .. recipe:: recipe_driver/driver_example_test.cc
-   :language: cpp
 
 High-level testing
 ==================

diff --git a/docs/source/cpp/quickstart.rst b/docs/source/cpp/quickstart.rst
@@ -20,4 +20,3 @@ Quickstart
 ==========
 
 .. recipe:: recipe/quickstart.cc
-   :language: cpp
diff --git a/docs/source/cpp/recipe/CMakeLists.txt b/docs/source/cpp/recipe/CMakeLists.txt
@@ -31,11 +31,13 @@ find_package(AdbcDriverManager REQUIRED)
 fetchcontent_declare(nanoarrow
                      GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
                      GIT_TAG apache-arrow-nanoarrow-0.2.0
-                     GIT_SHALLOW TRUE)
+                     GIT_SHALLOW TRUE
+                     EXCLUDE_FROM_ALL)
 fetchcontent_makeavailable(nanoarrow)
 
-add_executable(quickstart quickstart.cc)
-target_include_directories(quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist)
-target_link_libraries(quickstart PRIVATE AdbcDriverManager::adbc_driver_manager_shared
-                                         nanoarrow)
-add_test(NAME quickstart COMMAND quickstart)
+add_executable(recipe-quickstart quickstart.cc)
+target_include_directories(recipe-quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist)
+target_link_libraries(recipe-quickstart
+                      PRIVATE AdbcDriverManager::adbc_driver_manager_shared nanoarrow)
+add_test(NAME quickstart COMMAND recipe-quickstart)
+install(TARGETS recipe-quickstart)
diff --git a/docs/source/cpp/recipe/quickstart.cc b/docs/source/cpp/recipe/quickstart.cc
@@ -155,6 +155,7 @@ int main() {
   /// ahead of time, so this value will actually just be ``-1`` to
   /// indicate that the value is not known.
   std::cout << "Got " << rows_affected << " rows" << std::endl;
+  // Output: Got -1 rows
 
   /// We need an Arrow implementation to read the actual results.  We
   /// can use `Arrow C++`_ or `Nanoarrow`_ for that.  For simplicity,
@@ -172,7 +173,9 @@ int main() {
   /// Then we can use Nanoarrow to print it:
   char buf[1024] = {};
   ArrowSchemaToString(&schema, buf, sizeof(buf), /*recursive=*/1);
-  std::cout << buf << std::endl;
+  std::cout << "Result schema: " << buf << std::endl;
+  // Output:
+  // Result schema: struct<THEANSWER: int64>
 
   /// Now we can read the data.  The data comes as a stream of Arrow
   /// record batches.
@@ -197,8 +200,10 @@ int main() {
     }
     ArrowArrayViewReset(&view);
   }
+  // Output:
+  // Got a batch with 1 rows
+  // THEANSWER[0] = 42
 
-  std::cout << "Finished reading result set" << std::endl;
   stream.release(&stream);
 
   /// Cleanup

diff --git a/docs/source/cpp/recipe/quickstart.cc.stdout.txt b/docs/source/cpp/recipe/quickstart.cc.stdout.txt
diff --git a/docs/source/cpp/recipe_driver/CMakeLists.txt b/docs/source/cpp/recipe_driver/CMakeLists.txt
@@ -49,6 +49,8 @@ target_include_directories(driver_example PRIVATE ../../../../c ../../../../c/in
 target_link_libraries(driver_example PRIVATE adbc_driver_framework
                                              nanoarrow::nanoarrow_ipc)
 
+install(TARGETS driver_example)
+
 if(ADBC_DRIVER_EXAMPLE_BUILD_TESTS)
   fetchcontent_declare(googletest
                        URL https://github.com/google/googletest/archive/refs/tags/v1.15.1.tar.gz

diff --git a/docs/source/cpp/recipe_driver/driver_example.py b/docs/source/cpp/recipe_driver/driver_example.py
@@ -45,7 +45,8 @@ def connect(uri: str):
                 driver=str(driver_lib.resolve()), db_kwargs={"uri": uri}
             )
 
-    raise RuntimeError("Can't find driver shared object")
+    # Try to find it on the dynamic loader path
+    return dbapi.connect(driver="driver_example", db_kwargs={"uri": uri})
 
 
 #: Next, we can give our driver a go! The two pieces we implemented in the driver
@@ -63,5 +64,6 @@ def connect(uri: str):
         with con.cursor() as cur:
             cur.execute("SELECT * FROM example.arrows")
             print(cur.fetchall())
+            # Output: [(1,), (2,), (3,)]
 
         os.unlink(Path(__file__).parent / "example.arrows")
diff --git a/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt b/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt
diff --git a/docs/source/driver/flight_sql.rst b/docs/source/driver/flight_sql.rst
@@ -129,7 +129,6 @@ the :c:struct:`AdbcDatabase`.
       :sync: go
 
       .. recipe:: ../../../go/adbc/driver/flightsql/example_usage_test.go
-         :language: go
 
 Supported Features
 ==================

diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py
@@ -15,134 +15,63 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""A directive for code recipes with a literate programming style."""
+"""A directive for code recipes with a literate-like programming style.
+
+1. Write code recipes as normal, self-contained source files.
+2. Add comments for prose containing reStructuredText markup.
+3. Use the ``recipe`` directive to include the code in your Sphinx
+   documentation. The directive will parse out the prose and render it as
+   actual documentation, with the code blocks interspersed.
+
+Effectively, this turns the code "inside out": code with embedded prose
+comments will become prose with embedded code blocks.  The actual code remains
+valid code and can be tested and run like usual.
+"""
 
-import typing
 from pathlib import Path
 
 import docutils
-from docutils.parsers.rst import directives
 from docutils.statemachine import StringList
 from sphinx.util.docutils import SphinxDirective
 from sphinx.util.nodes import nested_parse_with_titles
 from sphinx.util.typing import OptionSpec
 
 __all__ = ["setup"]
 
-
-class SourceLine(typing.NamedTuple):
-    content: str
-    lineno: int
-
-
-class SourceFragment(typing.NamedTuple):
-    kind: str
-    lines: list[SourceLine]
-
-
-PREAMBLE = "Recipe source: `{name} <{url}>`_"
+from . import parser
 
 
 class RecipeDirective(SphinxDirective):
     has_content = False
     required_arguments = 1
     optional_arguments = 0
-    option_spec: OptionSpec = {
-        "language": directives.unchanged_required,
-        "prose-prefix": directives.unchanged_required,
-    }
+    option_spec: OptionSpec = {}
 
     @staticmethod
-    def default_prose_prefix(language: str) -> str:
-        return {
-            "cpp": "///",
-            "go": "///",
-            "python": "#:",
-        }.get(language, "#:")
+    def source_language(filename: str) -> parser.SourceSyntax:
+        path = Path(filename)
+        language = parser.LANGUAGES.get(path.suffix)
+        if not language:
+            raise ValueError(f"Unknown language for file {filename}")
+        return language
 
     def run(self):
         rel_filename, filename = self.env.relfn2path(self.arguments[0])
+        # Ask Sphinx to rebuild when either the recipe or the directive are changed
         self.env.note_dependency(rel_filename)
         self.env.note_dependency(__file__)
 
-        language = self.options.get("language", "python")
-        prefix = self.options.get("prose-prefix", self.default_prose_prefix(language))
-
-        # --- Split the source into runs of prose or code
-
-        fragments = []
-
-        fragment = []
-        fragment_type = None
-        state = "before"
-        lineno = 1
-        for line in open(filename):
-            if state == "before":
-                if "RECIPE STARTS HERE" in line:
-                    state = "reading"
-            elif state == "reading":
-                if line.strip().startswith(prefix):
-                    line_type = "prose"
-                    # Remove prefix and next whitespace
-                    line = line.lstrip()[len(prefix) + 1 :]
-                else:
-                    line_type = "code"
-
-                if line_type != fragment_type:
-                    if fragment:
-                        fragments.append(
-                            SourceFragment(kind=fragment_type, lines=fragment)
-                        )
-                        fragment = []
-                    fragment_type = line_type
-
-                # Skip blank code lines
-                if line_type != "code" or line.strip():
-                    # Remove trailing newline
-                    fragment.append(SourceLine(content=line[:-1], lineno=lineno))
-
-            lineno += 1
-
-        if fragment:
-            fragments.append(SourceFragment(kind=fragment_type, lines=fragment))
-
-        # --- Generate the final reST as a whole and parse it
-        # That way, section hierarchy works properly
-
-        generated_lines = []
-
-        # Link to the source on GitHub
+        syntax = self.source_language(filename)
         repo_url_template = self.env.config.recipe_repo_url_template
-        if repo_url_template is not None:
-            repo_url = repo_url_template.format(rel_filename=rel_filename)
-            generated_lines.append(
-                PREAMBLE.format(
-                    name=Path(rel_filename).name,
-                    url=repo_url,
-                )
-            )
 
-        # Paragraph break
-        generated_lines.append("")
-
-        for fragment in fragments:
-            if fragment.kind == "prose":
-                generated_lines.extend([line.content for line in fragment.lines])
-                generated_lines.append("")
-            elif fragment.kind == "code":
-                line_min = fragment.lines[0].lineno
-                line_max = fragment.lines[-1].lineno
-                lines = [
-                    f".. literalinclude:: {self.arguments[0]}",
-                    f"   :language: {language}",
-                    "   :linenos:",
-                    "   :lineno-match:",
-                    f"   :lines: {line_min}-{line_max}",
-                    "",
-                ]
-                generated_lines.extend(lines)
-            else:
-                raise RuntimeError("Unknown fragment kind")
+        with open(filename) as source:
+            generated_lines = parser.parse_recipe_to_rest(
+                source,
+                filename=self.arguments[0],
+                rel_filename=rel_filename,
+                syntax=syntax,
+                repo_url_template=repo_url_template,
+            )
 
         parsed = docutils.nodes.Element()
         nested_parse_with_titles(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,4 +20,3 @@ Quickstart
		==========

		.. recipe:: recipe/quickstart.cc
		:language: cpp