From 2daf2263bb1aad36f19267699ce9d1abf97b4ed4 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 14 Feb 2025 02:14:42 -0500 Subject: [PATCH] docs: add stdout/stderr and index support to recipe directive (#2495) - Allow marking up stderr/stdout to be rendered in recipes - Syntax follows Go's (`// Output:`) - Allow adding keywords in recipes that will get added into the Sphinx index - Add script that parses stderr/stdout and places them in files next to recipes for test harnesses to use - Enforce up-to-date output in CI - Update Python test harness to validate stdout - Update C++ test harness to validate stdout Fixes #2082. --- .gitattributes | 1 + .github/workflows/native-unix.yml | 11 + ci/scripts/cpp_recipe.sh | 4 +- dev/release/rat_exclude_files.txt | 1 + docs/source/cpp/driver_example.rst | 2 - docs/source/cpp/quickstart.rst | 1 - docs/source/cpp/recipe/CMakeLists.txt | 14 +- docs/source/cpp/recipe/quickstart.cc | 9 +- .../cpp/recipe/quickstart.cc.stdout.txt | 4 + docs/source/cpp/recipe_driver/CMakeLists.txt | 2 + .../cpp/recipe_driver/driver_example.py | 4 +- .../driver_example.py.stdout.txt | 1 + docs/source/driver/flight_sql.rst | 1 - .../sphinx_recipe/sphinx_recipe/__init__.py | 131 ++------ .../ext/sphinx_recipe/sphinx_recipe/parser.py | 279 ++++++++++++++++++ .../sphinx_recipe/update_output.py | 66 +++++ docs/source/index.rst | 1 + .../python/recipe/driver_manager_lowlevel.py | 2 + .../python/recipe/driver_manager_prepare.py | 2 + .../python/recipe/flightsql_dremio_connect.py | 2 + .../python/recipe/postgresql_authenticate.py | 5 +- .../postgresql_authenticate.py.stdout.txt | 1 + .../recipe/postgresql_create_append_table.py | 2 + .../recipe/postgresql_create_dataset_table.py | 2 + .../recipe/postgresql_create_temp_table.py | 2 + .../python/recipe/postgresql_execute_bind.py | 2 + .../recipe/postgresql_get_query_schema.py | 2 + .../recipe/postgresql_get_table_schema.py | 2 + .../python/recipe/postgresql_list_catalogs.py | 2 + .../source/python/recipe/postgresql_pandas.py | 2 + .../source/python/recipe/postgresql_polars.py | 2 + docs/source/python/recipe/postgresql_pool.py | 2 + docs/source/tests/test_cookbook.py | 77 ++++- 33 files changed, 516 insertions(+), 125 deletions(-) create mode 100644 docs/source/cpp/recipe/quickstart.cc.stdout.txt create mode 100644 docs/source/cpp/recipe_driver/driver_example.py.stdout.txt create mode 100644 docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py create mode 100644 docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py create mode 100644 docs/source/python/recipe/postgresql_authenticate.py.stdout.txt diff --git a/.gitattributes b/.gitattributes index f885677cbf..745833b12b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +*.stdout.txt linguist-generated c/vendor/* linguist-vendored go/adbc/drivermgr/adbc.h linguist-generated go/adbc/drivermgr/adbc_driver_manager.cc linguist-generated diff --git a/.github/workflows/native-unix.yml b/.github/workflows/native-unix.yml index 2d3e34e75b..1b08add18b 100644 --- a/.github/workflows/native-unix.yml +++ b/.github/workflows/native-unix.yml @@ -710,7 +710,18 @@ jobs: - name: Test Recipes (C++) run: | ./ci/scripts/cpp_recipe.sh $(pwd) ~/local build/recipe + - name: Ensure recipes are up to date + run: | + pip install -e ./docs/source/ext/sphinx_recipe + # Exits 1 if any recipes were updated + python -m sphinx_recipe.update_output \ + docs/source/cpp/recipe/*.cc \ + docs/source/cpp/recipe_driver/driver_example.py \ + docs/source/python/recipe/*.py - name: Test Recipes (Python) run: | docker compose up --detach --wait dremio dremio-init flightsql-sqlite-test postgres-test + export ADBC_CPP_RECIPE_BIN=~/local/bin + # Needed for the combined C++/Python driver example + export LD_LIBRARY_PATH=~/local/lib pytest -vvs docs/source/tests/ diff --git a/ci/scripts/cpp_recipe.sh b/ci/scripts/cpp_recipe.sh index 7309e7ee33..a041ae3bc4 100755 --- a/ci/scripts/cpp_recipe.sh +++ b/ci/scripts/cpp_recipe.sh @@ -33,6 +33,7 @@ test_recipe() { export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${install_dir}/lib" export GOEXPERIMENT=cgocheck2 + rm -rf "${build_dir}" mkdir -p "${build_dir}" pushd "${build_dir}" @@ -41,11 +42,12 @@ test_recipe() { ${ADBC_CMAKE_ARGS} \ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \ -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX="${install_dir}" \ -DCMAKE_PREFIX_PATH="${install_dir}" \ -DADBC_DRIVER_EXAMPLE_BUILD_TESTS=ON set +x - cmake --build . -j + cmake --build . --target install -j ctest \ --output-on-failure \ --no-tests=error diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 7ffcfc13e2..54ec748354 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -20,6 +20,7 @@ csharp/*.props dev/release/rat_exclude_files.txt docs/source/format/*.drawio docs/source/format/*.svg +docs/source/*/*.stdout.txt filtered_rat.txt go/adbc/drivermgr/arrow-adbc/adbc.h go/adbc/drivermgr/adbc_driver_manager.cc diff --git a/docs/source/cpp/driver_example.rst b/docs/source/cpp/driver_example.rst index 0776b5e8ef..3730328e0f 100644 --- a/docs/source/cpp/driver_example.rst +++ b/docs/source/cpp/driver_example.rst @@ -20,13 +20,11 @@ Driver Example ============== .. recipe:: recipe_driver/driver_example.cc - :language: cpp Low-level testing ================= .. recipe:: recipe_driver/driver_example_test.cc - :language: cpp High-level testing ================== diff --git a/docs/source/cpp/quickstart.rst b/docs/source/cpp/quickstart.rst index a382f38790..f434014c55 100644 --- a/docs/source/cpp/quickstart.rst +++ b/docs/source/cpp/quickstart.rst @@ -20,4 +20,3 @@ Quickstart ========== .. recipe:: recipe/quickstart.cc - :language: cpp diff --git a/docs/source/cpp/recipe/CMakeLists.txt b/docs/source/cpp/recipe/CMakeLists.txt index 5cc1492bb5..70b4f82e51 100644 --- a/docs/source/cpp/recipe/CMakeLists.txt +++ b/docs/source/cpp/recipe/CMakeLists.txt @@ -31,11 +31,13 @@ find_package(AdbcDriverManager REQUIRED) fetchcontent_declare(nanoarrow GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git GIT_TAG apache-arrow-nanoarrow-0.2.0 - GIT_SHALLOW TRUE) + GIT_SHALLOW TRUE + EXCLUDE_FROM_ALL) fetchcontent_makeavailable(nanoarrow) -add_executable(quickstart quickstart.cc) -target_include_directories(quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist) -target_link_libraries(quickstart PRIVATE AdbcDriverManager::adbc_driver_manager_shared - nanoarrow) -add_test(NAME quickstart COMMAND quickstart) +add_executable(recipe-quickstart quickstart.cc) +target_include_directories(recipe-quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist) +target_link_libraries(recipe-quickstart + PRIVATE AdbcDriverManager::adbc_driver_manager_shared nanoarrow) +add_test(NAME quickstart COMMAND recipe-quickstart) +install(TARGETS recipe-quickstart) diff --git a/docs/source/cpp/recipe/quickstart.cc b/docs/source/cpp/recipe/quickstart.cc index b9ae384f01..4ce4b46617 100644 --- a/docs/source/cpp/recipe/quickstart.cc +++ b/docs/source/cpp/recipe/quickstart.cc @@ -155,6 +155,7 @@ int main() { /// ahead of time, so this value will actually just be ``-1`` to /// indicate that the value is not known. std::cout << "Got " << rows_affected << " rows" << std::endl; + // Output: Got -1 rows /// We need an Arrow implementation to read the actual results. We /// can use `Arrow C++`_ or `Nanoarrow`_ for that. For simplicity, @@ -172,7 +173,9 @@ int main() { /// Then we can use Nanoarrow to print it: char buf[1024] = {}; ArrowSchemaToString(&schema, buf, sizeof(buf), /*recursive=*/1); - std::cout << buf << std::endl; + std::cout << "Result schema: " << buf << std::endl; + // Output: + // Result schema: struct /// Now we can read the data. The data comes as a stream of Arrow /// record batches. @@ -197,8 +200,10 @@ int main() { } ArrowArrayViewReset(&view); } + // Output: + // Got a batch with 1 rows + // THEANSWER[0] = 42 - std::cout << "Finished reading result set" << std::endl; stream.release(&stream); /// Cleanup diff --git a/docs/source/cpp/recipe/quickstart.cc.stdout.txt b/docs/source/cpp/recipe/quickstart.cc.stdout.txt new file mode 100644 index 0000000000..8436d53cb8 --- /dev/null +++ b/docs/source/cpp/recipe/quickstart.cc.stdout.txt @@ -0,0 +1,4 @@ +Got -1 rows +Result schema: struct +Got a batch with 1 rows +THEANSWER[0] = 42 diff --git a/docs/source/cpp/recipe_driver/CMakeLists.txt b/docs/source/cpp/recipe_driver/CMakeLists.txt index 8e1159a855..39d6df8c51 100644 --- a/docs/source/cpp/recipe_driver/CMakeLists.txt +++ b/docs/source/cpp/recipe_driver/CMakeLists.txt @@ -49,6 +49,8 @@ target_include_directories(driver_example PRIVATE ../../../../c ../../../../c/in target_link_libraries(driver_example PRIVATE adbc_driver_framework nanoarrow::nanoarrow_ipc) +install(TARGETS driver_example) + if(ADBC_DRIVER_EXAMPLE_BUILD_TESTS) fetchcontent_declare(googletest URL https://github.com/google/googletest/archive/refs/tags/v1.15.1.tar.gz diff --git a/docs/source/cpp/recipe_driver/driver_example.py b/docs/source/cpp/recipe_driver/driver_example.py index 86743449a1..c6e5dbd4e4 100644 --- a/docs/source/cpp/recipe_driver/driver_example.py +++ b/docs/source/cpp/recipe_driver/driver_example.py @@ -45,7 +45,8 @@ def connect(uri: str): driver=str(driver_lib.resolve()), db_kwargs={"uri": uri} ) - raise RuntimeError("Can't find driver shared object") + # Try to find it on the dynamic loader path + return dbapi.connect(driver="driver_example", db_kwargs={"uri": uri}) #: Next, we can give our driver a go! The two pieces we implemented in the driver @@ -63,5 +64,6 @@ def connect(uri: str): with con.cursor() as cur: cur.execute("SELECT * FROM example.arrows") print(cur.fetchall()) + # Output: [(1,), (2,), (3,)] os.unlink(Path(__file__).parent / "example.arrows") diff --git a/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt b/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt new file mode 100644 index 0000000000..aeec2cdf53 --- /dev/null +++ b/docs/source/cpp/recipe_driver/driver_example.py.stdout.txt @@ -0,0 +1 @@ +[(1,), (2,), (3,)] diff --git a/docs/source/driver/flight_sql.rst b/docs/source/driver/flight_sql.rst index 0dde65bf35..983a6162c7 100644 --- a/docs/source/driver/flight_sql.rst +++ b/docs/source/driver/flight_sql.rst @@ -129,7 +129,6 @@ the :c:struct:`AdbcDatabase`. :sync: go .. recipe:: ../../../go/adbc/driver/flightsql/example_usage_test.go - :language: go Supported Features ================== diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py index bd96d47437..73718c3c76 100644 --- a/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py +++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/__init__.py @@ -15,13 +15,22 @@ # specific language governing permissions and limitations # under the License. -"""A directive for code recipes with a literate programming style.""" +"""A directive for code recipes with a literate-like programming style. + +1. Write code recipes as normal, self-contained source files. +2. Add comments for prose containing reStructuredText markup. +3. Use the ``recipe`` directive to include the code in your Sphinx + documentation. The directive will parse out the prose and render it as + actual documentation, with the code blocks interspersed. + +Effectively, this turns the code "inside out": code with embedded prose +comments will become prose with embedded code blocks. The actual code remains +valid code and can be tested and run like usual. +""" -import typing from pathlib import Path import docutils -from docutils.parsers.rst import directives from docutils.statemachine import StringList from sphinx.util.docutils import SphinxDirective from sphinx.util.nodes import nested_parse_with_titles @@ -29,120 +38,40 @@ __all__ = ["setup"] - -class SourceLine(typing.NamedTuple): - content: str - lineno: int - - -class SourceFragment(typing.NamedTuple): - kind: str - lines: list[SourceLine] - - -PREAMBLE = "Recipe source: `{name} <{url}>`_" +from . import parser class RecipeDirective(SphinxDirective): has_content = False required_arguments = 1 optional_arguments = 0 - option_spec: OptionSpec = { - "language": directives.unchanged_required, - "prose-prefix": directives.unchanged_required, - } + option_spec: OptionSpec = {} @staticmethod - def default_prose_prefix(language: str) -> str: - return { - "cpp": "///", - "go": "///", - "python": "#:", - }.get(language, "#:") + def source_language(filename: str) -> parser.SourceSyntax: + path = Path(filename) + language = parser.LANGUAGES.get(path.suffix) + if not language: + raise ValueError(f"Unknown language for file {filename}") + return language def run(self): rel_filename, filename = self.env.relfn2path(self.arguments[0]) + # Ask Sphinx to rebuild when either the recipe or the directive are changed self.env.note_dependency(rel_filename) self.env.note_dependency(__file__) - language = self.options.get("language", "python") - prefix = self.options.get("prose-prefix", self.default_prose_prefix(language)) - - # --- Split the source into runs of prose or code - - fragments = [] - - fragment = [] - fragment_type = None - state = "before" - lineno = 1 - for line in open(filename): - if state == "before": - if "RECIPE STARTS HERE" in line: - state = "reading" - elif state == "reading": - if line.strip().startswith(prefix): - line_type = "prose" - # Remove prefix and next whitespace - line = line.lstrip()[len(prefix) + 1 :] - else: - line_type = "code" - - if line_type != fragment_type: - if fragment: - fragments.append( - SourceFragment(kind=fragment_type, lines=fragment) - ) - fragment = [] - fragment_type = line_type - - # Skip blank code lines - if line_type != "code" or line.strip(): - # Remove trailing newline - fragment.append(SourceLine(content=line[:-1], lineno=lineno)) - - lineno += 1 - - if fragment: - fragments.append(SourceFragment(kind=fragment_type, lines=fragment)) - - # --- Generate the final reST as a whole and parse it - # That way, section hierarchy works properly - - generated_lines = [] - - # Link to the source on GitHub + syntax = self.source_language(filename) repo_url_template = self.env.config.recipe_repo_url_template - if repo_url_template is not None: - repo_url = repo_url_template.format(rel_filename=rel_filename) - generated_lines.append( - PREAMBLE.format( - name=Path(rel_filename).name, - url=repo_url, - ) - ) - # Paragraph break - generated_lines.append("") - - for fragment in fragments: - if fragment.kind == "prose": - generated_lines.extend([line.content for line in fragment.lines]) - generated_lines.append("") - elif fragment.kind == "code": - line_min = fragment.lines[0].lineno - line_max = fragment.lines[-1].lineno - lines = [ - f".. literalinclude:: {self.arguments[0]}", - f" :language: {language}", - " :linenos:", - " :lineno-match:", - f" :lines: {line_min}-{line_max}", - "", - ] - generated_lines.extend(lines) - else: - raise RuntimeError("Unknown fragment kind") + with open(filename) as source: + generated_lines = parser.parse_recipe_to_rest( + source, + filename=self.arguments[0], + rel_filename=rel_filename, + syntax=syntax, + repo_url_template=repo_url_template, + ) parsed = docutils.nodes.Element() nested_parse_with_titles( diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py new file mode 100644 index 0000000000..c13825f518 --- /dev/null +++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/parser.py @@ -0,0 +1,279 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import typing +from pathlib import Path + + +class SourceLine(typing.NamedTuple): + """A reference into the recipe source file.""" + + content: str + #: 1-indexed. Used for proper line numbers for code blocks. + lineno: int + + +class SourceFragment(typing.NamedTuple): + """A run of source or prose lines in a recipe.""" + + kind: typing.Literal["source", "prose", "stderr", "stdout"] + lines: list[SourceLine] + + +class SourceSyntax(typing.NamedTuple): + """Language-specific configuration for parsing recipes.""" + + #: Language name to use for syntax highlighting. + pygments_language: str + #: Prefix for prose comments. + prose_prefix: str + #: Prefix for output blocks. + stdout_prefix: str + #: Prefix for stderr blocks. + stderr_prefix: str + #: Prefix for continuation lines. + output_continuation_prefix: str + + +class ParsedRecipe(typing.NamedTuple): + """The result of parsing a recipe.""" + + fragments: list[SourceFragment] + stdout: list[str] + stderr: list[str] + category: str | None + keywords: list[str] + + +#: Prepended to the Sphinx output to link to the source of the recipe. +PREAMBLE = "Recipe source: `{name} <{url}>`_" +#: Indicates the start of the recipe content. +START = "RECIPE STARTS HERE" +#: Allows you to specify the category (used in the index). +CATEGORY_PREFIX = "RECIPE CATEGORY:" +#: Allows you to specify comma-separated keywords (used in the index). +KEYWORDS_PREFIX = "RECIPE KEYWORDS:" + + +_LANGUAGES = { + (".cc", ".cpp"): SourceSyntax( + pygments_language="cpp", + prose_prefix="///", + stdout_prefix="// Output:", + stderr_prefix="// Standard Error:", + output_continuation_prefix="//", + ), + (".go",): SourceSyntax( + pygments_language="go", + prose_prefix="///", + stdout_prefix="// Output:", + stderr_prefix="// Standard Error:", + output_continuation_prefix="//", + ), + (".java",): SourceSyntax( + pygments_language="java", + prose_prefix="///", + stdout_prefix="// Output:", + stderr_prefix="// Standard Error:", + output_continuation_prefix="//", + ), + (".py",): SourceSyntax( + pygments_language="python", + prose_prefix="#:", + stdout_prefix="# Output:", + stderr_prefix="# Standard Error:", + output_continuation_prefix="#", + ), +} +LANGUAGES = {ext: lang for exts, lang in _LANGUAGES.items() for ext in exts} + + +def parse_recipe_to_fragments( + source: typing.Iterable[str], + *, + syntax: SourceSyntax, +): + # --- Split the source into runs of prose or code + + fragments = [] + category = None + keywords = [] + + fragment = [] + fragment_type = None + # "before" --> ignore code lines (e.g. for a license header) + # "reading" --> parse code lines + state = "before" + lineno = 1 + for line in source: + if state == "before": + if START in line: + state = "reading" + elif CATEGORY_PREFIX in line: + index = line.find(CATEGORY_PREFIX) + category = line[index + len(CATEGORY_PREFIX) :].strip() + elif KEYWORDS_PREFIX in line: + index = line.find(KEYWORDS_PREFIX) + keywords = [ + keyword.strip() + for keyword in line[index + len(KEYWORDS_PREFIX) :] + .strip() + .split(",") + ] + elif state == "reading": + trimmed = line.lstrip() + if trimmed.startswith(syntax.prose_prefix): + line_type = "prose" + # Remove prefix and next whitespace + line = trimmed[len(syntax.prose_prefix) + 1 :] + elif trimmed.startswith(syntax.stdout_prefix): + line_type = "stdout" + line = trimmed[len(syntax.stdout_prefix) + 1 :] + elif trimmed.startswith(syntax.stderr_prefix): + line_type = "stderr" + line = trimmed[len(syntax.stderr_prefix) + 1 :] + elif fragment_type in ("stdout", "stderr") and trimmed.startswith( + syntax.output_continuation_prefix + ): + line = trimmed[len(syntax.output_continuation_prefix) + 1 :] + else: + line_type = "code" + + if line_type != fragment_type: + if fragment: + fragments.append(SourceFragment(kind=fragment_type, lines=fragment)) + fragment = [] + fragment_type = line_type + + # Skip blank code lines (blank lines in reST are significant) + if line_type != "code" or line.strip(): + # Remove trailing newline + fragment.append(SourceLine(content=line[:-1], lineno=lineno)) + + lineno += 1 + + if fragment: + fragments.append(SourceFragment(kind=fragment_type, lines=fragment)) + + # --- Split out output fragments, merge adjacent fragments + # We render output blocks at the end, so remove them here. Merging + # adjacent fragments avoids odd breaks in the source code. + + stdout = [] + stderr = [] + new_fragments = [] + for fragment in fragments: + if fragment.kind == "stdout": + lines = fragment.lines + if lines and lines[0].content == "": + # Avoid blank line when using format like + # // Output: + # // theanswer = 42 + lines = lines[1:] + stdout.extend(line.content for line in lines) + elif fragment.kind == "stderr": + stderr.extend(line.content for line in fragment.lines) + else: + if ( + new_fragments + and fragment.kind == "code" + and new_fragments[-1].kind == fragment.kind + ): + new_fragments[-1].lines.extend(fragment.lines) + else: + new_fragments.append(fragment) + fragments = new_fragments + return ParsedRecipe( + fragments=fragments, + stdout=stdout, + stderr=stderr, + category=category, + keywords=keywords, + ) + + +def parse_recipe_to_rest( + source: typing.Iterable[str], + *, + filename: str, + rel_filename: str, + syntax: SourceSyntax, + repo_url_template: str | None = None, +) -> list[str]: + parsed = parse_recipe_to_fragments(source, syntax=syntax) + + # --- Generate the final reST as a whole and parse it + # That way, section hierarchy works properly + + generated_lines = [] + + if parsed.category and parsed.keywords: + generated_lines.append(".. index::") + for keyword in parsed.keywords: + generated_lines.append(f" pair: {parsed.category}; {keyword} (recipe)") + generated_lines.append("") + + # Link to the source on GitHub + if repo_url_template is not None: + repo_url = repo_url_template.format(rel_filename=rel_filename) + generated_lines.append( + PREAMBLE.format( + name=Path(rel_filename).name, + url=repo_url, + ) + ) + + # Paragraph break + generated_lines.append("") + + for fragment in parsed.fragments: + if fragment.kind == "prose": + generated_lines.extend([line.content for line in fragment.lines]) + generated_lines.append("") + elif fragment.kind == "code": + line_min = fragment.lines[0].lineno + line_max = fragment.lines[-1].lineno + lines = [ + f".. literalinclude:: {filename}", + f" :language: {syntax.pygments_language}", + " :linenos:", + " :lineno-match:", + f" :lines: {line_min}-{line_max}", + "", + ] + generated_lines.extend(lines) + else: + raise RuntimeError(f"Unknown fragment kind {fragment.kind}") + + if parsed.stdout: + generated_lines.append(".. code-block:: text") + generated_lines.append(" :caption: stdout") + generated_lines.append("") + for line in parsed.stdout: + # reST escapes the content of a code-block directive + generated_lines.append(" " + line) + generated_lines.append("") + + if parsed.stderr: + generated_lines.append(".. code-block:: text") + generated_lines.append(" :caption: stderr") + generated_lines.append("") + for line in parsed.stderr: + generated_lines.append(" " + line) + generated_lines.append("") + + return generated_lines diff --git a/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py b/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py new file mode 100644 index 0000000000..1b998ac224 --- /dev/null +++ b/docs/source/ext/sphinx_recipe/sphinx_recipe/update_output.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Regenerate .stdout.txt files from recipes for the test harness.""" + +import argparse +import sys +from pathlib import Path + +from . import parser as recipe_parser + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("recipes", nargs="+", type=Path, help="Recipe files to update") + + args = parser.parse_args() + + updated = False + for path in args.recipes: + syntax = recipe_parser.LANGUAGES[path.suffix] + with path.open("r") as source: + recipe = recipe_parser.parse_recipe_to_fragments(source, syntax=syntax) + + stdout = [line for line in recipe.stdout if line] + if not stdout: + continue + + target = path.with_suffix(path.suffix + ".stdout.txt") + if target.is_file(): + with target.open("r") as source: + if source.read().strip() == "\n".join(stdout).strip(): + print(path, "is up to date") + continue + + with target.open("w") as sink: + for line in stdout: + sink.write(line) + sink.write("\n") + print(path, "updated") + updated = True + + if updated: + print("----------------------------------------") + print("Some .stdout.txt files were updated.") + print("Please commit the result.") + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/source/index.rst b/docs/source/index.rst index 92fd98c9a6..7fd42535da 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -209,6 +209,7 @@ Why ADBC? faq glossary + genindex .. toctree:: :maxdepth: 1 diff --git a/docs/source/python/recipe/driver_manager_lowlevel.py b/docs/source/python/recipe/driver_manager_lowlevel.py index e8137184d2..bbff7b9a1b 100644 --- a/docs/source/python/recipe/driver_manager_lowlevel.py +++ b/docs/source/python/recipe/driver_manager_lowlevel.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: driver manager +# RECIPE KEYWORDS: dynamic driver loading # RECIPE STARTS HERE #: While the DB-API_ bindings are recommended for general use, the low-level #: bindings are also available. These mostly mirror the ADBC C API directly. diff --git a/docs/source/python/recipe/driver_manager_prepare.py b/docs/source/python/recipe/driver_manager_prepare.py index b99e944841..c780a07029 100644 --- a/docs/source/python/recipe/driver_manager_prepare.py +++ b/docs/source/python/recipe/driver_manager_prepare.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: driver manager +# RECIPE KEYWORDS: prepared statements # RECIPE STARTS HERE #: The DBAPI bindings prepare all statements before execution, because of this #: part of the `DB-API specification`_: diff --git a/docs/source/python/recipe/flightsql_dremio_connect.py b/docs/source/python/recipe/flightsql_dremio_connect.py index 9ba5a9e9ee..a8bd1a62b1 100644 --- a/docs/source/python/recipe/flightsql_dremio_connect.py +++ b/docs/source/python/recipe/flightsql_dremio_connect.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: Flight SQL +# RECIPE KEYWORDS: connecting to Dremio # RECIPE STARTS HERE #: Dremio requires a username and password. To connect to a Flight SQL diff --git a/docs/source/python/recipe/postgresql_authenticate.py b/docs/source/python/recipe/postgresql_authenticate.py index d0ccd49ee9..12fc3aceda 100644 --- a/docs/source/python/recipe/postgresql_authenticate.py +++ b/docs/source/python/recipe/postgresql_authenticate.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: authentication # RECIPE STARTS HERE #: To connect to a PostgreSQL database, the username and password must #: be provided in the URI. For example, @@ -36,6 +38,7 @@ with conn.cursor() as cur: cur.execute("SELECT 1") - assert cur.fetchone() == (1,) + print(cur.fetchone()) + # Output: (1,) conn.close() diff --git a/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt b/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt new file mode 100644 index 0000000000..3fa519830e --- /dev/null +++ b/docs/source/python/recipe/postgresql_authenticate.py.stdout.txt @@ -0,0 +1 @@ +(1,) diff --git a/docs/source/python/recipe/postgresql_create_append_table.py b/docs/source/python/recipe/postgresql_create_append_table.py index 36e29b9386..d21722eafd 100644 --- a/docs/source/python/recipe/postgresql_create_append_table.py +++ b/docs/source/python/recipe/postgresql_create_append_table.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: bulk append # RECIPE STARTS HERE #: ADBC allows creating and appending to database tables using Arrow #: tables. diff --git a/docs/source/python/recipe/postgresql_create_dataset_table.py b/docs/source/python/recipe/postgresql_create_dataset_table.py index e26093a308..3aba1fb72d 100644 --- a/docs/source/python/recipe/postgresql_create_dataset_table.py +++ b/docs/source/python/recipe/postgresql_create_dataset_table.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: bulk ingestion from PyArrow Dataset # RECIPE STARTS HERE #: ADBC makes it easy to load PyArrow datasets into your datastore. diff --git a/docs/source/python/recipe/postgresql_create_temp_table.py b/docs/source/python/recipe/postgresql_create_temp_table.py index 2d762b9a49..b9e6acca60 100644 --- a/docs/source/python/recipe/postgresql_create_temp_table.py +++ b/docs/source/python/recipe/postgresql_create_temp_table.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: bulk ingestion to temporary table # RECIPE STARTS HERE #: ADBC allows creating and appending to temporary tables as well. diff --git a/docs/source/python/recipe/postgresql_execute_bind.py b/docs/source/python/recipe/postgresql_execute_bind.py index f6ad0c67c4..d4d782ab48 100644 --- a/docs/source/python/recipe/postgresql_execute_bind.py +++ b/docs/source/python/recipe/postgresql_execute_bind.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: bind parameters # RECIPE STARTS HERE #: ADBC allows using Python and Arrow values as bind parameters. diff --git a/docs/source/python/recipe/postgresql_get_query_schema.py b/docs/source/python/recipe/postgresql_get_query_schema.py index 2568453924..5e9ccf15fd 100644 --- a/docs/source/python/recipe/postgresql_get_query_schema.py +++ b/docs/source/python/recipe/postgresql_get_query_schema.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: get query result set schema # RECIPE STARTS HERE #: ADBC lets you get the schema of a result set, without executing the query. diff --git a/docs/source/python/recipe/postgresql_get_table_schema.py b/docs/source/python/recipe/postgresql_get_table_schema.py index 59034c0c11..f81ae2e96d 100644 --- a/docs/source/python/recipe/postgresql_get_table_schema.py +++ b/docs/source/python/recipe/postgresql_get_table_schema.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: get table schema # RECIPE STARTS HERE #: ADBC lets you get the schema of a table as an Arrow schema. diff --git a/docs/source/python/recipe/postgresql_list_catalogs.py b/docs/source/python/recipe/postgresql_list_catalogs.py index 01203f002b..610674dcdc 100644 --- a/docs/source/python/recipe/postgresql_list_catalogs.py +++ b/docs/source/python/recipe/postgresql_list_catalogs.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: query catalog # RECIPE STARTS HERE #: ADBC allows listing tables, catalogs, and schemas in the database. diff --git a/docs/source/python/recipe/postgresql_pandas.py b/docs/source/python/recipe/postgresql_pandas.py index 651d4effd0..eaf7973fba 100644 --- a/docs/source/python/recipe/postgresql_pandas.py +++ b/docs/source/python/recipe/postgresql_pandas.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: pandas integration # RECIPE STARTS HERE #: ADBC is integrated into pandas_, a popular dataframe library. Pandas can diff --git a/docs/source/python/recipe/postgresql_polars.py b/docs/source/python/recipe/postgresql_polars.py index cf5525924c..3424adbadd 100644 --- a/docs/source/python/recipe/postgresql_polars.py +++ b/docs/source/python/recipe/postgresql_polars.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: polars integration # RECIPE STARTS HERE #: ADBC can be used with Polars_, a dataframe library written in Rust. As per diff --git a/docs/source/python/recipe/postgresql_pool.py b/docs/source/python/recipe/postgresql_pool.py index c06e0869a0..c5e0764e8c 100644 --- a/docs/source/python/recipe/postgresql_pool.py +++ b/docs/source/python/recipe/postgresql_pool.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# RECIPE CATEGORY: PostgreSQL +# RECIPE KEYWORDS: connection pooling # RECIPE STARTS HERE #: ADBC does not implement connection pooling, as this is not generally a diff --git a/docs/source/tests/test_cookbook.py b/docs/source/tests/test_cookbook.py index 9093ec1753..06b83f6b07 100644 --- a/docs/source/tests/test_cookbook.py +++ b/docs/source/tests/test_cookbook.py @@ -16,20 +16,79 @@ # under the License. import importlib +import os +import subprocess +import typing from pathlib import Path import pytest +class Recipe(typing.NamedTuple): + py_source: Path | None + executable: Path | None + output: Path | None + + def pytest_generate_tests(metafunc) -> None: - root = (Path(__file__).parent.parent / "python/recipe/").resolve() - recipes = root.rglob("*.py") - metafunc.parametrize( - "recipe", [pytest.param(path, id=path.stem) for path in recipes] - ) + params = [] + for root in ( + (Path(__file__).parent.parent / "cpp/recipe_driver/").resolve(), + (Path(__file__).parent.parent / "python/recipe/").resolve(), + ): + recipes = root.rglob("*.py") + for path in recipes: + output = path.with_suffix(path.suffix + ".stdout.txt") + if output.is_file(): + recipe = Recipe(py_source=path, executable=None, output=output) + else: + recipe = Recipe(py_source=path, executable=None, output=None) + params.append(pytest.param(recipe, id=f"py_{path.stem}")) + + # Find C++ examples with output + cpp_bin = os.environ.get("ADBC_CPP_RECIPE_BIN") + if cpp_bin: + cpp_bin = Path(cpp_bin).resolve() + recipes = (Path(__file__).parent.parent / "cpp/").resolve().rglob("*.cc") + for path in recipes: + output = path.with_suffix(path.suffix + ".stdout.txt") + if not output.is_file(): + continue + + name = f"recipe-{path.stem}" + executable = cpp_bin / name + if not executable.is_file(): + raise ValueError(f"Not found: {executable} for {path}") + + recipe = Recipe(py_source=None, executable=executable, output=output) + params.append(pytest.param(recipe, id=f"cpp_{path.stem}")) + + metafunc.parametrize("recipe", params) + + +def test_cookbook_recipe(recipe: Recipe, capsys: pytest.CaptureFixture) -> None: + if recipe.py_source: + spec = importlib.util.spec_from_file_location("__main__", recipe.py_source) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + + if recipe.output: + with recipe.output.open("r") as source: + expected = [line for line in source.read().strip().splitlines() if line] + + captured = [ + line for line in capsys.readouterr().out.strip().splitlines() if line + ] + assert captured == expected + elif recipe.executable: + assert recipe.output is not None + + with recipe.output.open("r") as source: + expected = [line for line in source.read().strip().splitlines() if line] + output = subprocess.check_output(recipe.executable, text=True) + captured = [line for line in output.strip().splitlines() if line] -def test_cookbook_recipe(recipe: Path) -> None: - spec = importlib.util.spec_from_file_location(f"cookbook.{recipe.stem}", recipe) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) + assert captured == expected + else: + assert False