From 9053ce48bd95ddae1ec2c15767fa34fa45eb9b44 Mon Sep 17 00:00:00 2001
From: David Li
Date: Sun, 16 Feb 2025 20:20:38 -0500
Subject: [PATCH] docs: add SQLite cookbook example for batch size/inference
(#2523)
Related to #581.
---
docs/source/driver/sqlite.rst | 2 +
docs/source/python/recipe/index.rst | 1 +
docs/source/python/recipe/sqlite.rst | 25 ++++++
.../source/python/recipe/sqlite_batch_rows.py | 82 +++++++++++++++++++
.../recipe/sqlite_batch_rows.py.stdout.txt | 2 +
5 files changed, 112 insertions(+)
create mode 100644 docs/source/python/recipe/sqlite.rst
create mode 100644 docs/source/python/recipe/sqlite_batch_rows.py
create mode 100644 docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
diff --git a/docs/source/driver/sqlite.rst b/docs/source/driver/sqlite.rst
index 64a8453052..fc6b413845 100644
--- a/docs/source/driver/sqlite.rst
+++ b/docs/source/driver/sqlite.rst
@@ -104,6 +104,8 @@ shared across all connections.
with adbc_driver_sqlite.dbapi.connect() as conn:
pass
+ For more examples, see :doc:`../python/recipe/sqlite`.
+
.. tab-item:: R
:sync: r
diff --git a/docs/source/python/recipe/index.rst b/docs/source/python/recipe/index.rst
index 3645e0aa62..deff425ba2 100644
--- a/docs/source/python/recipe/index.rst
+++ b/docs/source/python/recipe/index.rst
@@ -28,3 +28,4 @@ Python.
driver_manager
flight_sql
postgresql
+ sqlite
diff --git a/docs/source/python/recipe/sqlite.rst b/docs/source/python/recipe/sqlite.rst
new file mode 100644
index 0000000000..5b9f1f795f
--- /dev/null
+++ b/docs/source/python/recipe/sqlite.rst
@@ -0,0 +1,25 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+..
+.. http://www.apache.org/licenses/LICENSE-2.0
+..
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==============
+SQLite Recipes
+==============
+
+Change the batch size of the result set
+=======================================
+
+.. recipe:: sqlite_batch_rows.py
diff --git a/docs/source/python/recipe/sqlite_batch_rows.py b/docs/source/python/recipe/sqlite_batch_rows.py
new file mode 100644
index 0000000000..c880f3a9d3
--- /dev/null
+++ b/docs/source/python/recipe/sqlite_batch_rows.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# RECIPE CATEGORY: SQLite
+# RECIPE KEYWORDS: batch size, type inference
+# RECIPE STARTS HERE
+
+#: The ADBC SQLite driver allows control over the size of batches in result
+#: sets. Because the driver performs type inference, this also controls how
+#: many rows the driver will look at to figure out the type. If you know your
+#: result set has many NULL rows up front, you may consider increasing the
+#: batch size so that the driver can infer the correct types.
+
+import adbc_driver_sqlite.dbapi
+
+conn = adbc_driver_sqlite.dbapi.connect()
+
+#: First we'll set up a demo table with 1024 NULL values.
+
+with conn.cursor() as cur:
+ cur.execute("CREATE TABLE demo (val TEXT)")
+
+ cur.execute(
+ """
+ WITH RECURSIVE series(n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1
+ FROM series
+ WHERE n + 1 <= 1024
+ )
+ INSERT INTO demo (val)
+ SELECT NULL
+ FROM series
+ """
+ )
+
+ cur.execute("INSERT INTO demo VALUES ('foo'), ('bar'), ('baz')")
+
+#: If we query the table naively, we'll get an error, because the driver first
+#: looks at the first 1024 values to determine the column type. But since
+#: every value is NULL, it falls back to the default type of int64, which poses
+#: a problem when it then encounters a string in the next batch.
+
+with conn.cursor() as cur:
+ try:
+ cur.execute("SELECT * FROM demo")
+ print(cur.fetchallarrow().schema)
+ except OSError as e:
+ print(e)
+ # Output:
+ # [SQLite] Type mismatch in column 0: expected INT64 but got STRING/BINARY
+ else:
+ raise RuntimeError("Expected an error")
+
+#: We can tell the driver to increase the batch size (and hence look at more
+#: rows).
+
+with conn.cursor() as cur:
+ cur.adbc_statement.set_options(
+ **{
+ adbc_driver_sqlite.StatementOptions.BATCH_ROWS.value: 2048,
+ }
+ )
+ cur.execute("SELECT * FROM demo")
+ print(cur.fetchallarrow().schema)
+ # Output:
+ # val: string
diff --git a/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt b/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
new file mode 100644
index 0000000000..33fc458a34
--- /dev/null
+++ b/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
@@ -0,0 +1,2 @@
+[SQLite] Type mismatch in column 0: expected INT64 but got STRING/BINARY
+val: string