diff --git a/news/changelog-1.5.md b/news/changelog-1.5.md
index b83d9ac899..211557aeb1 100644
--- a/news/changelog-1.5.md
+++ b/news/changelog-1.5.md
@@ -65,6 +65,7 @@ All changes included in 1.5:
 - ([#8919](https://github.com/quarto-dev/quarto-cli/issues/8919)): Ensure enough backticks in `quarto convert` from `.ipynb` to `.qmd` files.
 - ([#8998](https://github.com/quarto-dev/quarto-cli/issues/8998)): Interpret slide separation markers `---` correctly when creating the `.ipynb` intermediate notebook from a `.qmd` file.
 - ([#9133](https://github.com/quarto-dev/quarto-cli/issues/9133)): Fix issue with Jupyter engine when using paths containing special characters.
+- ([#9255](https://github.com/quarto-dev/quarto-cli/issues/9255)): Support cell source fields of type `string`.
 
 ## Website Listings
 
diff --git a/src/command/convert/jupyter.ts b/src/command/convert/jupyter.ts
index 2a99e314c6..0fe28cab82 100644
--- a/src/command/convert/jupyter.ts
+++ b/src/command/convert/jupyter.ts
@@ -26,6 +26,10 @@ import { partitionCellOptions } from "../../core/lib/partition-cell-options.ts";
 import { Metadata } from "../../config/types.ts";
 import { jupyterKernelspec } from "../../core/jupyter/kernels.ts";
 import { fixupFrontMatter } from "../../core/jupyter/jupyter-fixups.ts";
+import {
+  jupyterCellSrcAsLines,
+  jupyterCellSrcAsStr,
+} from "../../core/jupyter/jupyter-shared.ts";
 
 export async function markdownToJupyterNotebook(
   file: string,
@@ -67,7 +71,9 @@ export async function jupyterNotebookToMarkdown(
         case "raw":
           // see if this is the front matter
           if (frontMatter === undefined) {
-            frontMatter = partitionYamlFrontMatter(cell.source.join(""))?.yaml;
+            frontMatter = partitionYamlFrontMatter(
+              jupyterCellSrcAsStr(cell),
+            )?.yaml;
             if (!frontMatter) {
               md.push(...mdFromRawCell(cellWithOptions));
             }
@@ -145,8 +151,11 @@ async function mdFromCodeCell(
   }
 
   // determine the largest number of backticks in the cell
+
   const maxBackticks = Math.max(
-    ...cell.source.map((line) => line.match(/^`+/g)?.[0].length || 0),
+    ...jupyterCellSrcAsLines(cell).map((line) =>
+      line.match(/^`+/g)?.[0].length || 0
+    ),
     2,
   );
   const backticks = "`".repeat(maxBackticks + 1);
@@ -155,7 +164,10 @@ async function mdFromCodeCell(
   const md: string[] = [backticks + "{" + language + "}\n"];
 
   // partition
-  const { yaml, source } = await partitionCellOptions(language, cell.source);
+  const { yaml, source } = await partitionCellOptions(
+    language,
+    jupyterCellSrcAsLines(cell),
+  );
   const options = yaml ? yaml as JupyterCellOptions : {};
 
   if (!includeIds) {
diff --git a/src/core/jupyter/jupyter-filters.ts b/src/core/jupyter/jupyter-filters.ts
index 9919be6ec3..d84e40b23a 100644
--- a/src/core/jupyter/jupyter-filters.ts
+++ b/src/core/jupyter/jupyter-filters.ts
@@ -1,9 +1,8 @@
 /*
-* jupyter-filters.ts
-*
-* Copyright (C) 2020-2022 Posit Software, PBC
-*
-*/
+ * jupyter-filters.ts
+ *
+ * Copyright (C) 2020-2022 Posit Software, PBC
+ */
 
 import { existsSync } from "fs/exists.ts";
 import { basename, dirname, isAbsolute, join } from "../../deno_ral/path.ts";
@@ -19,6 +18,7 @@ import {
 } from "./filtered-notebook-cache.ts";
 import { fixupFrontMatter } from "./jupyter-fixups.ts";
 import { JupyterNotebook } from "./types.ts";
+import { jupyterCellSrcAsStr } from "./jupyter-shared.ts";
 
 export async function markdownFromNotebookFile(file: string, format?: Format) {
   // read file with any filters
@@ -36,7 +36,7 @@ export function markdownFromNotebookJSON(nb: JupyterNotebook) {
 
   const markdown = nb.cells.reduce((md, cell) => {
     if (["markdown", "raw"].includes(cell.cell_type)) {
-      return md + "\n" + cell.source.join("") + "\n";
+      return md + "\n" + jupyterCellSrcAsStr(cell) + "\n";
     } else {
       return md;
     }
diff --git a/src/core/jupyter/jupyter-fixups.ts b/src/core/jupyter/jupyter-fixups.ts
index ab6c413c51..c6f207bf88 100644
--- a/src/core/jupyter/jupyter-fixups.ts
+++ b/src/core/jupyter/jupyter-fixups.ts
@@ -13,6 +13,10 @@ import { lines } from "../lib/text.ts";
 import { markdownWithExtractedHeading } from "../pandoc/pandoc-partition.ts";
 import { partitionYamlFrontMatter, readYamlFromMarkdown } from "../yaml.ts";
 import { JupyterNotebook, JupyterOutput } from "./types.ts";
+import {
+  jupyterCellSrcAsLines,
+  jupyterCellSrcAsStr,
+} from "./jupyter-shared.ts";
 
 export function fixupStreams(nb: JupyterNotebook): JupyterNotebook {
   for (const cell of nb.cells) {
@@ -155,7 +159,8 @@ export function fixupFrontMatter(nb: JupyterNotebook): JupyterNotebook {
   let partitioned: { yaml: string; markdown: string } | undefined;
   const frontMatterCellIndex = nb.cells.findIndex((cell) => {
     if (cell.cell_type === "raw" || cell.cell_type === "markdown") {
-      partitioned = partitionYamlFrontMatter(cell.source.join("")) || undefined;
+      partitioned = partitionYamlFrontMatter(jupyterCellSrcAsStr(cell)) ||
+        undefined;
       if (partitioned) {
         cell.cell_type = "raw";
         return true;
@@ -179,7 +184,7 @@ export function fixupFrontMatter(nb: JupyterNotebook): JupyterNotebook {
     if (cell.cell_type === "markdown") {
       const { lines, headingText, contentBeforeHeading } =
         markdownWithExtractedHeading(
-          nbLines(cell.source).join(""),
+          nbLines(jupyterCellSrcAsLines(cell)).join(""),
         );
       if (headingText && !contentBeforeHeading) {
         title = headingText;
diff --git a/src/core/jupyter/jupyter-shared.ts b/src/core/jupyter/jupyter-shared.ts
index 84a5084bc2..4fb3084b07 100644
--- a/src/core/jupyter/jupyter-shared.ts
+++ b/src/core/jupyter/jupyter-shared.ts
@@ -12,8 +12,21 @@ import { pathWithForwardSlashes } from "../path.ts";
 
 import { pythonExecForCaps } from "./exec.ts";
 import { jupyterKernelspecs } from "./kernels.ts";
-import { JupyterCapabilities, JupyterKernelspec } from "./types.ts";
+import {
+  JupyterCapabilities,
+  JupyterCell,
+  JupyterKernelspec,
+} from "./types.ts";
 import { isEnvDir } from "./capabilities.ts";
+import { lines } from "../lib/text.ts";
+
+export const jupyterCellSrcAsStr = (cell: JupyterCell) => {
+  return typeof cell.source === "string" ? cell.source : cell.source.join("");
+};
+
+export const jupyterCellSrcAsLines = (cell: JupyterCell) => {
+  return typeof cell.source === "string" ? lines(cell.source) : cell.source;
+};
 
 export async function jupyterCapabilitiesMessage(
   caps: JupyterCapabilities,
diff --git a/src/core/jupyter/jupyter.ts b/src/core/jupyter/jupyter.ts
index 9002f4d842..d39801011a 100644
--- a/src/core/jupyter/jupyter.ts
+++ b/src/core/jupyter/jupyter.ts
@@ -169,6 +169,10 @@ import {
   resolveUserExpressions,
   userExpressionsFromCell,
 } from "./jupyter-inline.ts";
+import {
+  jupyterCellSrcAsLines,
+  jupyterCellSrcAsStr,
+} from "./jupyter-shared.ts";
 
 export const kQuartoMimeType = "quarto_mimetype";
 export const kQuartoOutputOrder = "quarto_order";
@@ -349,7 +353,7 @@ export async function quartoMdToJupyter(
       }
       if (cell_type === "raw" && frontMatter) {
         // delete 'jupyter' metadata since we've already transferred it
-        const yaml = readYamlFromMarkdown(cell.source.join(""));
+        const yaml = readYamlFromMarkdown(jupyterCellSrcAsStr(cell));
         if (yaml.jupyter) {
           delete yaml.jupyter;
           // write the cell only if there is metadata to write
@@ -371,9 +375,12 @@ export async function quartoMdToJupyter(
         }
       } else if (cell_type === "code") {
         // see if there is embedded metadata we should forward into the cell metadata
+        const cellSrcLines = typeof cell.source === "string"
+          ? lines(cell.source)
+          : cell.source;
         const { yaml, source } = partitionCellOptions(
           kernelspec.language.toLowerCase(),
-          cell.source,
+          cellSrcLines,
         );
         if (yaml && !Array.isArray(yaml) && typeof yaml === "object") {
           // use label as id if necessary
@@ -412,7 +419,10 @@ export async function quartoMdToJupyter(
       }
 
       // if the source is empty then don't add it
-      cell.source = trimEmptyLines(cell.source);
+      const cellSrcLines = typeof cell.source === "string"
+        ? lines(cell.source)
+        : cell.source;
+      cell.source = trimEmptyLines(cellSrcLines);
       if (cell.source.length > 0) {
         nb.cells.push(cell);
       }
@@ -777,7 +787,9 @@ export async function jupyterToMarkdown(
           // If this is the front matter cell, don't wrap it in
           // a cell envelope, as it need to be remain discoverable
           if (frontMatter === undefined) {
-            frontMatter = partitionYamlFrontMatter(cell.source.join(""))?.yaml;
+            frontMatter = partitionYamlFrontMatter(
+              jupyterCellSrcAsStr(cell),
+            )?.yaml;
             if (frontMatter) {
               markdownOptions.preserveCellMetadata = false;
             }
@@ -858,7 +870,7 @@ export function jupyterCellWithOptions(
 ): JupyterCellWithOptions {
   const { yaml, optionsSource, source } = partitionCellOptions(
     language,
-    cell.source,
+    jupyterCellSrcAsLines(cell),
   );
 
   // read any options defined in cell metadata
@@ -1026,7 +1038,7 @@ export function mdFromRawCell(
 
   const mimeType = cell.metadata?.[kCellRawMimeType];
   if (mimeType) {
-    const rawOutput = mdRawOutput(mimeType, cell.source);
+    const rawOutput = mdRawOutput(mimeType, jupyterCellSrcAsLines(cell));
     if (rawOutput) {
       return rawCellEnvelope(cell.id, rawOutput);
     }
@@ -1402,7 +1414,7 @@ async function mdFromCodeCell(
   if (includeCode(cell, options) || options.preserveCodeCellYaml) {
     const fenced = echoFenced(cell, options);
     const ticks = "`".repeat(
-      Math.max(countTicks(cell.source) + 1, fenced ? 4 : 3),
+      Math.max(countTicks(jupyterCellSrcAsLines(cell)) + 1, fenced ? 4 : 3),
     );
 
     md.push(ticks + " {");
diff --git a/src/core/jupyter/types.ts b/src/core/jupyter/types.ts
index 2320c13747..4d273ae117 100644
--- a/src/core/jupyter/types.ts
+++ b/src/core/jupyter/types.ts
@@ -104,7 +104,7 @@ export interface JupyterCell {
   cell_type: "markdown" | "code" | "raw";
   execution_count?: null | number;
   metadata: JupyterCellMetadata;
-  source: string[];
+  source: string | string[];
   attachments?: Record<string, Record<string, string>>;
   outputs?: JupyterOutput[];
 }
diff --git a/tests/docs/smoke-all/2024/04/02/9255.ipynb b/tests/docs/smoke-all/2024/04/02/9255.ipynb
new file mode 100644
index 0000000000..874aa69e97
--- /dev/null
+++ b/tests/docs/smoke-all/2024/04/02/9255.ipynb
@@ -0,0 +1 @@
+{"metadata": {"kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"name": "python", "version": "3.7.10", "mimetype": "text/x-python", "codemirror_mode": {"name": "ipython", "version": 3}, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py"}}, "nbformat_minor": 5, "nbformat": 4, "cells": [{"cell_type": "markdown", "source": "# Dense Array Basics", "metadata": {}, "id": "98873b91"}, {"cell_type": "markdown", "source": "In this tutorial you will learn how to:\n* create a dense array\n* inspect the array schema\n* write to and read from the array\n* write and read array metadata\n* create arrays with multiple attributes and var-sized attributes\n* treat dense arrays as dataframes and even run SQL queries", "metadata": {}, "id": "ebf09493"}, {"cell_type": "markdown", "source": "## Necessary Libraries\n\nYou need to install [TileDB-Py](https://github.com/TileDB-Inc/TileDB-Py), the Python wrapper of [TileDB Embedded](https://github.com/TileDB-Inc/TileDB), as follows:\n\n```bash\n# Pip:\n$ pip install tiledb\n\n# Or Conda:\n$ conda install -c conda-forge tiledb-py\n```\n\nYou'll also need to install pandas and numpy\n\n```bash\n# Pip:\n$ pip install numpy pandas\n\n# Or Conda:\n$ conda install numpy pandas\n```\n\nNote that the TileDB core is a C++ library. To boost performance when integrating with pandas, we use Apache Arrow to achieve zero-copy when returning results from TileDB into pandas dataframes. You need to **install pyarrow** to take advantage of this optimization.\n\n```bash\n# Pip:\n$ pip install pyarrow\n\n# Or Conda:\n$ conda install -c conda-forge pyarrow\n```\n\nOne of the cool things about TileDB is that it offers a powerful integration with embedded MariaDB. This allows for execution of arbitrary SQL queries directly on TileDB arrays (both dense and sparse). We took appropriate care to push the fast slicing, column subselecting and column conditions of the query down to TileDB, leaving the rest of the SQL execution to MariaDB.\n\nTo install this capability, run:\n```bash\nconda install -c conda-forge libtiledb-sql-py\n```", "metadata": {"tags": []}, "id": "4a292233-655e-454c-b42b-70eebe4f63e7"}, {"cell_type": "markdown", "source": "## Setup\n\nWe first start by importing the libraries we will use in this tutorial (ignore any thrown mysql errors/warnings, they are benign).", "metadata": {}, "id": "738744d2-e6b2-48a3-84bc-c34536b09d7c"}, {"cell_type": "code", "source": "import pandas as pd, numpy as np\nimport shutil, urllib.request, os.path\nimport tiledb, tiledb.sql\n\nprint(\"TileDB core version: {}\".format(tiledb.libtiledb.version()))\nprint(\"TileDB-Py version: {}\".format(tiledb.version()))\ndb = tiledb.sql.connect()\nprint(\"TileDB-SQL-Py version: {}\".format(pd.read_sql(\"SELECT PLUGIN_AUTH_VERSION FROM information_schema.PLUGINS WHERE PLUGIN_NAME='mytile'\", con=db)['PLUGIN_AUTH_VERSION'][0]))", "metadata": {"trusted": true}, "execution_count": 1, "outputs": [{"name": "stdout", "text": "TileDB core version: (2, 8, 2)\nTileDB-Py version: (0, 14, 5)\nTileDB-SQL-Py version: 0.14.0\n", "output_type": "stream"}, {"name": "stderr", "text": "Got ERROR: \"Could not open mysql.plugin table: \"Table 'mysql.plugin' doesn't exist\". Some plugins may be not loaded\" errno: 2000\nGot ERROR: \"Can't open and lock privilege tables: Table 'mysql.servers' doesn't exist\" errno: 2000\nGot ERROR: \"Can't open the mysql.func table. Please run mysql_upgrade to create it.\" errno: 2000\n", "output_type": "stream"}], "id": "a296d765-afa1-416c-b020-4003f1cd6342"}, {"cell_type": "markdown", "source": "Before we start, we create the TileDB context passing a **configuration parameter** around memory allocation during read queries that will be explained in a later tutorial. That needs to be set at the *very beginning* of the code and before any other TileDB function is called.", "metadata": {}, "id": "83e8ac3a-e9f8-4db8-96da-50a75ca20f69"}, {"cell_type": "code", "source": "# Sets the buffer size parameter to 50MB. TileDB will allocate 50MB per attribute at the\n# start of the query, but if the result is larger, TileDB will automatically expand \n# the buffers to fit the whole result.\ncfg = tiledb.Ctx().config()\ncfg.update(\n  {\n    'py.init_buffer_bytes': 1024**2 * 50\n  }\n)\ntiledb.default_ctx(cfg)", "metadata": {"trusted": true}, "execution_count": 2, "outputs": [{"execution_count": 2, "output_type": "execute_result", "data": {"text/plain": "tiledb.Ctx() [see Ctx.config() for configuration]"}, "metadata": {}}], "id": "50c64345-88c5-4040-8d4a-8da75ace117a"}, {"cell_type": "markdown", "source": "Below are the names of the arrays we will create. If you are running this on TileDB Cloud, you should use `~/` to store everything in the home directory of your dedicated EBS volume.", "metadata": {}, "id": "d87e66e8-0bfd-4487-a139-6874c78ccb5a"}, {"cell_type": "code", "source": "array_dense_1 = os.path.expanduser(\"~/array_dense_1\")\narray_dense_2 = os.path.expanduser(\"~/array_dense_2\")\narray_dense_3 = os.path.expanduser(\"~/array_dense_3\")", "metadata": {"trusted": true}, "execution_count": 3, "outputs": [], "id": "72f510ff-c2ad-4bc0-84de-02d0c4fa178d"}, {"cell_type": "markdown", "source": "Remove the arrays if they already exist.", "metadata": {}, "id": "c3c4d0a8-b529-40b4-8e7c-9972318925b7"}, {"cell_type": "code", "source": "if os.path.exists(array_dense_1):\n    shutil.rmtree(array_dense_1)\nif os.path.exists(array_dense_2):\n    shutil.rmtree(array_dense_2)\nif os.path.exists(array_dense_3):\n    shutil.rmtree(array_dense_3)", "metadata": {"trusted": true}, "execution_count": 4, "outputs": [], "id": "389ae239-edae-4ba7-a6f0-40dd2f23fa4a"}, {"cell_type": "markdown", "source": "## A simple 2D dense array", "metadata": {}, "id": "6efad429"}, {"cell_type": "markdown", "source": "We will create a 2D dense array, with dimensions `d1` and `d2` and domains `[1,4]`. The array will also have a single integer attribute `a`.", "metadata": {}, "id": "5a0ef1a5"}, {"cell_type": "code", "source": "# Create the two dimensions\nd1 = tiledb.Dim(name=\"d1\", domain=(1, 4), tile=2, dtype=np.int32)\nd2 = tiledb.Dim(name=\"d2\", domain=(1, 4), tile=2, dtype=np.int32)\n\n# Create a domain using the two dimensions\ndom1 = tiledb.Domain(d1, d2)\n\n# Create an attribute\na = tiledb.Attr(name=\"a\", dtype=np.int32)\n\n# Create the array schema, setting `sparse=False` to indicate a dense array\nschema1 = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=[a])\n\n# Create the array on disk (it will initially be empty)\ntiledb.Array.create(array_dense_1, schema1)", "metadata": {"trusted": true}, "execution_count": 5, "outputs": [], "id": "9ce48a99"}, {"cell_type": "markdown", "source": "Let's view the contents of the array so far (we use the `tree` package):", "metadata": {}, "id": "a92a56d4"}, {"cell_type": "code", "source": "!tree $array_dense_1", "metadata": {"trusted": true}, "execution_count": 6, "outputs": [{"name": "stdout", "text": "\u001b[01;34m/home/jovyan/array_dense_1\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__commits\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragment_meta\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragments\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__meta\u001b[0m\n\u2514\u2500\u2500 \u001b[01;34m__schema\u001b[0m\n    \u2514\u2500\u2500 \u001b[00m__1653659358842_1653659358842_7e48216466f74d2eafe0b1d55adbbfe5\u001b[0m\n\n5 directories, 1 file\n", "output_type": "stream"}], "id": "26b39a6f"}, {"cell_type": "markdown", "source": "The array does not contain any data yet. It only has an array schema file inside the `__schema` folder that describes the array (e.g., the number of dimensions, their names and types, etc).", "metadata": {}, "id": "16fdb0f1"}, {"cell_type": "markdown", "source": "To inspect the array schema, simply run:", "metadata": {}, "id": "0bd2ca7a"}, {"cell_type": "code", "source": "# Read the array schema\nschema = tiledb.ArraySchema.load(array_dense_1)\nschema", "metadata": {"trusted": true}, "execution_count": 7, "outputs": [{"execution_count": 7, "output_type": "execute_result", "data": {"text/plain": "ArraySchema(\n  domain=Domain(*[\n    Dim(name='d1', domain=(1, 4), tile=2, dtype='int32'),\n    Dim(name='d2', domain=(1, 4), tile=2, dtype='int32'),\n  ]),\n  attrs=[\n    Attr(name='a', dtype='int32', var=False, nullable=False),\n  ],\n  cell_order='row-major',\n  tile_order='row-major',\n  capacity=10000,\n  sparse=False,\n)", "text/html": "<table><tr><th>Domain</th></tr><tr><td><table><tr><th>Name</th><th>Domain</th><th>Tile</th><th>Data Type</th><th>Is Var-length</th><th>Filters</th></tr><tr><td>d1</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr><tr><td>d2</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Attributes</th></tr><tr><td><table><tr><th>Name</th><th>Data Type</th><th>Is Var-Len</th><th>Is Nullable</th><th>Filters</th></tr><tr><td>a</td><td>int32</td><td>False</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Cell Order</th></tr><tr><td>row-major</td></tr><tr><th>Tile Order</th></tr><tr><td>row-major</td></tr><tr><th>Capacity</th></tr><tr><td>10000</td></tr><tr><th>Sparse</th></tr><tr><td>False</td></tr></table>"}, "metadata": {}}], "id": "bdcf6e9e"}, {"cell_type": "markdown", "source": "Now let's write some data to the array, using a 2D numpy array:", "metadata": {}, "id": "44b28dfc"}, {"cell_type": "code", "source": "# Prepare some data in a numpy array\ndata = np.array([\n    [1, 2, 3, 4],\n    [5, 6, 7, 8],\n    [9, 10, 11, 12],\n    [13, 14, 15, 16]], dtype=np.int32)\ndata", "metadata": {"trusted": true}, "execution_count": 8, "outputs": [{"execution_count": 8, "output_type": "execute_result", "data": {"text/plain": "array([[ 1,  2,  3,  4],\n       [ 5,  6,  7,  8],\n       [ 9, 10, 11, 12],\n       [13, 14, 15, 16]], dtype=int32)"}, "metadata": {}}], "id": "a21d0b5a"}, {"cell_type": "code", "source": "# Open the array in write mode and write to the whole array domain\nwith tiledb.open(array_dense_1, 'w') as A:\n    A[:] = data", "metadata": {"trusted": true}, "execution_count": 9, "outputs": [], "id": "35e1ccf2"}, {"cell_type": "markdown", "source": "Let's inspect the array again:", "metadata": {}, "id": "cb90c18e"}, {"cell_type": "code", "source": "!tree $array_dense_1", "metadata": {"trusted": true}, "execution_count": 10, "outputs": [{"name": "stdout", "text": "\u001b[01;34m/home/jovyan/array_dense_1\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__commits\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[00m__1653659359536_1653659359536_a7686fa0ed674c19841b413520b136a8_12.wrt\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragment_meta\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragments\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[01;34m__1653659359536_1653659359536_a7686fa0ed674c19841b413520b136a8_12\u001b[0m\n\u2502\u00a0\u00a0     \u251c\u2500\u2500 \u001b[00ma0.tdb\u001b[0m\n\u2502\u00a0\u00a0     \u2514\u2500\u2500 \u001b[00m__fragment_metadata.tdb\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__meta\u001b[0m\n\u2514\u2500\u2500 \u001b[01;34m__schema\u001b[0m\n    \u2514\u2500\u2500 \u001b[00m__1653659358842_1653659358842_7e48216466f74d2eafe0b1d55adbbfe5\u001b[0m\n\n6 directories, 4 files\n", "output_type": "stream"}], "id": "afbf06f2"}, {"cell_type": "markdown", "source": "Now there is a fragment directory in the `fragments` folder and a commit file with the same name and suffix `.wrt` in the `commits` folder. ", "metadata": {}, "id": "0db38c15"}, {"cell_type": "markdown", "source": "Let's read the array:", "metadata": {}, "id": "4700fcb5"}, {"cell_type": "code", "source": "# Open the array in read mode and read the whole array\nA = tiledb.open(array_dense_1, 'r')", "metadata": {"trusted": true}, "execution_count": 11, "outputs": [], "id": "79d7c4a7-e675-435b-a7fd-bd5c7a2d951a"}, {"cell_type": "code", "source": "print(A[:])        # dictionary of 2D numpy arrays\nprint(A[:]['a'])   # numpy array", "metadata": {"trusted": true}, "execution_count": 12, "outputs": [{"name": "stdout", "text": "OrderedDict([('a', array([[ 1,  2,  3,  4],\n       [ 5,  6,  7,  8],\n       [ 9, 10, 11, 12],\n       [13, 14, 15, 16]], dtype=int32))])\n[[ 1  2  3  4]\n [ 5  6  7  8]\n [ 9 10 11 12]\n [13 14 15 16]]\n", "output_type": "stream"}], "id": "bc650555"}, {"cell_type": "markdown", "source": "Note that the result of `A[:]` is a dictionary containing a 2D numpy array per attribute.", "metadata": {}, "id": "72ac7d72-dc7f-43ca-a8ea-2d58319f9cb7"}, {"cell_type": "markdown", "source": "We can also efficiently slice a portion of the array (very useful when the arrays are too big to fit in main memory):", "metadata": {}, "id": "5654c8fb"}, {"cell_type": "code", "source": "print(A[1:3, 1:2][\"a\"])", "metadata": {"trusted": true}, "execution_count": 13, "outputs": [{"name": "stdout", "text": "[[1]\n [5]]\n", "output_type": "stream"}], "id": "cb4920cb"}, {"cell_type": "markdown", "source": "We can even slice a **multi-rage subarray** (note that `multi_index` uses *closed ranges*).", "metadata": {}, "id": "a8cab392"}, {"cell_type": "code", "source": "print(A.multi_index[[slice(1,2), 4], slice(1,3)][\"a\"])", "metadata": {"trusted": true}, "execution_count": 14, "outputs": [{"name": "stdout", "text": "[[ 1  2  3]\n [ 5  6  7]\n [13 14 15]]\n", "output_type": "stream"}], "id": "5785d39b"}, {"cell_type": "markdown", "source": "Remember to close the array.", "metadata": {}, "id": "44b3b1b7-4201-47fa-a908-0a9ce455cb80"}, {"cell_type": "code", "source": "A.close()", "metadata": {"trusted": true}, "execution_count": 15, "outputs": [], "id": "7616b00d-a5c9-4b75-b198-862a0c8416a0"}, {"cell_type": "markdown", "source": "## Array metadata", "metadata": {}, "id": "749a0cf9"}, {"cell_type": "markdown", "source": "You can also attach any **key-value** metadata to an array:", "metadata": {}, "id": "bc10117c"}, {"cell_type": "code", "source": "# Open the array for writing\nwith tiledb.open(array_dense_1, \"w\") as A:\n    A.meta[\"author\"] = \"Stavros\"\n    A.meta[\"volume\"] = 2.1\n    # multiple values of the same type may be written as a tuple:\n    A.meta[\"tuple_int\"] = (1,2,3,4)", "metadata": {"trusted": true}, "execution_count": 16, "outputs": [], "id": "58e40524"}, {"cell_type": "markdown", "source": "Let's inspect the array again:", "metadata": {}, "id": "f9e492c9"}, {"cell_type": "code", "source": "!tree $array_dense_1", "metadata": {"trusted": true}, "execution_count": 17, "outputs": [{"name": "stdout", "text": "\u001b[01;34m/home/jovyan/array_dense_1\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__commits\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[00m__1653659359536_1653659359536_a7686fa0ed674c19841b413520b136a8_12.wrt\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragment_meta\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragments\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[01;34m__1653659359536_1653659359536_a7686fa0ed674c19841b413520b136a8_12\u001b[0m\n\u2502\u00a0\u00a0     \u251c\u2500\u2500 \u001b[00ma0.tdb\u001b[0m\n\u2502\u00a0\u00a0     \u2514\u2500\u2500 \u001b[00m__fragment_metadata.tdb\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__meta\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[00m__1653659360170_1653659360170_e1f3fc5ddb074268933d7714b4c772bd\u001b[0m\n\u2514\u2500\u2500 \u001b[01;34m__schema\u001b[0m\n    \u2514\u2500\u2500 \u001b[00m__1653659358842_1653659358842_7e48216466f74d2eafe0b1d55adbbfe5\u001b[0m\n\n6 directories, 5 files\n", "output_type": "stream"}], "id": "7c8b1d2e"}, {"cell_type": "markdown", "source": "Now observe that there is a new file inside the `__meta` folder, which contains the key-value pairs we have written.", "metadata": {}, "id": "1425c3d9"}, {"cell_type": "markdown", "source": "Let's read those values back from the array:", "metadata": {}, "id": "afb7bf99"}, {"cell_type": "code", "source": "# Open the array for reading\nwith tiledb.open(array_dense_1, \"r\") as A:\n    # print values from specific keys\n    print(A.meta[\"author\"])\n    print(A.meta[\"volume\"])\n    print(A.meta[\"tuple_int\"])\n    \n    # print all keys:\n    print(A.meta.keys())", "metadata": {"trusted": true}, "execution_count": 18, "outputs": [{"name": "stdout", "text": "Stavros\n2.1\n(1, 2, 3, 4)\n['author', 'tuple_int', 'volume']\n", "output_type": "stream"}], "id": "f392cf3b"}, {"cell_type": "markdown", "source": "## Storing multiple attributes", "metadata": {}, "id": "a795c98c"}, {"cell_type": "markdown", "source": "TileDB allows you to store more than one values in each cell, potentially of different types. This means that the array can have more than one attributes. TileDB is a **\"columnar\"** format, in that it stores the values of each attribute in a separate file, allowing for better compression and faster attribute subselection.", "metadata": {}, "id": "35f20071"}, {"cell_type": "markdown", "source": "Let's create the same 2D array, but now with an extra `char` attribute.", "metadata": {}, "id": "19617751"}, {"cell_type": "code", "source": "# Create the two dimensions\nd1 = tiledb.Dim(name=\"d1\", domain=(1, 4), tile=2, dtype=np.int32)\nd2 = tiledb.Dim(name=\"d2\", domain=(1, 4), tile=2, dtype=np.int32)\n\n# Create a domain using the two dimensions\ndom2 = tiledb.Domain(d1, d2)\n\n# Create two attributes\na1 = tiledb.Attr(name=\"a1\", dtype=np.int32)\na2 = tiledb.Attr(name=\"a2\", dtype=\"S1\")\n\n# Create the array schema, setting `sparse=False` to indicate a dense array\nschema2 = tiledb.ArraySchema(domain=dom2, sparse=False, attrs=[a1, a2])\n\n# Create the array on disk (it will initially be empty)\ntiledb.Array.create(array_dense_2, schema2)", "metadata": {"trusted": true}, "execution_count": 19, "outputs": [], "id": "52f1a000"}, {"cell_type": "markdown", "source": "Let's inspect the array schema to see the two attributes listed:", "metadata": {}, "id": "5685ff32"}, {"cell_type": "code", "source": "# Read the array schema\nschema = tiledb.ArraySchema.load(array_dense_2)\nschema", "metadata": {"trusted": true}, "execution_count": 20, "outputs": [{"execution_count": 20, "output_type": "execute_result", "data": {"text/plain": "ArraySchema(\n  domain=Domain(*[\n    Dim(name='d1', domain=(1, 4), tile=2, dtype='int32'),\n    Dim(name='d2', domain=(1, 4), tile=2, dtype='int32'),\n  ]),\n  attrs=[\n    Attr(name='a1', dtype='int32', var=False, nullable=False),\n    Attr(name='a2', dtype='|S1', var=False, nullable=False),\n  ],\n  cell_order='row-major',\n  tile_order='row-major',\n  capacity=10000,\n  sparse=False,\n)", "text/html": "<table><tr><th>Domain</th></tr><tr><td><table><tr><th>Name</th><th>Domain</th><th>Tile</th><th>Data Type</th><th>Is Var-length</th><th>Filters</th></tr><tr><td>d1</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr><tr><td>d2</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Attributes</th></tr><tr><td><table><tr><th>Name</th><th>Data Type</th><th>Is Var-Len</th><th>Is Nullable</th><th>Filters</th></tr><tr><td>a1</td><td>int32</td><td>False</td><td>False</td><td>-</td></tr><tr><td>a2</td><td>|S1</td><td>False</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Cell Order</th></tr><tr><td>row-major</td></tr><tr><th>Tile Order</th></tr><tr><td>row-major</td></tr><tr><th>Capacity</th></tr><tr><td>10000</td></tr><tr><th>Sparse</th></tr><tr><td>False</td></tr></table>"}, "metadata": {}}], "id": "a4a86c52"}, {"cell_type": "markdown", "source": "Now let's prepare and write some data to the array:", "metadata": {}, "id": "afbba0c8"}, {"cell_type": "code", "source": "# Prepare some data for the first attribute\na1_data = np.array([\n    [1, 2, 3, 4],\n    [5, 6, 7, 8],\n    [9, 10, 11, 12],\n    [13, 14, 15, 16]], dtype=np.int32)\na1_data", "metadata": {"trusted": true}, "execution_count": 21, "outputs": [{"execution_count": 21, "output_type": "execute_result", "data": {"text/plain": "array([[ 1,  2,  3,  4],\n       [ 5,  6,  7,  8],\n       [ 9, 10, 11, 12],\n       [13, 14, 15, 16]], dtype=int32)"}, "metadata": {}}], "id": "872b4157"}, {"cell_type": "code", "source": "# Prepare some data for the second attribute\na2_data = np.array([\n    ['a', 'b', 'c', 'd'],\n    ['e', 'f', 'g', 'h'],\n    ['i', 'j', 'k', 'l'],\n    ['m', 'n', 'o', 'p']], dtype=\"S1\")\na2_data", "metadata": {"trusted": true}, "execution_count": 22, "outputs": [{"execution_count": 22, "output_type": "execute_result", "data": {"text/plain": "array([[b'a', b'b', b'c', b'd'],\n       [b'e', b'f', b'g', b'h'],\n       [b'i', b'j', b'k', b'l'],\n       [b'm', b'n', b'o', b'p']], dtype='|S1')"}, "metadata": {}}], "id": "4d128758"}, {"cell_type": "code", "source": "# Write an ordered dictionary, passing one numpy array per attribute\nwith tiledb.open(array_dense_2, 'w') as A:\n    A[:] = {'a1': a1_data, 'a2': a2_data}", "metadata": {"trusted": true}, "execution_count": 23, "outputs": [], "id": "f9ad008e"}, {"cell_type": "markdown", "source": "Let's read all the data back:", "metadata": {}, "id": "ffe1ee62"}, {"cell_type": "code", "source": "A = tiledb.open(array_dense_2, 'r')", "metadata": {"trusted": true}, "execution_count": 24, "outputs": [], "id": "97fc8cec-331a-4522-b403-c0109cb6c45b"}, {"cell_type": "code", "source": "print(A[:][\"a2\"])", "metadata": {"trusted": true}, "execution_count": 25, "outputs": [{"name": "stdout", "text": "[[b'a' b'b' b'c' b'd']\n [b'e' b'f' b'g' b'h']\n [b'i' b'j' b'k' b'l']\n [b'm' b'n' b'o' b'p']]\n", "output_type": "stream"}], "id": "0439d3b1"}, {"cell_type": "markdown", "source": "The above always reads **all** attributes for the given slice. If you wish to read a subset of attributes, there is a faster way that allows fetching only the desired data:", "metadata": {}, "id": "0e4b3083"}, {"cell_type": "code", "source": "print(A.query(attrs=['a1'])[:])\nprint(A.query(attrs=['a2'])[:])", "metadata": {"trusted": true}, "execution_count": 26, "outputs": [{"name": "stdout", "text": "OrderedDict([('a1', array([[ 1,  2,  3,  4],\n       [ 5,  6,  7,  8],\n       [ 9, 10, 11, 12],\n       [13, 14, 15, 16]], dtype=int32))])\nOrderedDict([('a2', array([[b'a', b'b', b'c', b'd'],\n       [b'e', b'f', b'g', b'h'],\n       [b'i', b'j', b'k', b'l'],\n       [b'm', b'n', b'o', b'p']], dtype='|S1'))])\n", "output_type": "stream"}], "id": "370472f5"}, {"cell_type": "markdown", "source": "## Variable-length attributes", "metadata": {}, "id": "42b0b17e"}, {"cell_type": "markdown", "source": "TileDB supports also variable-length attributes, such as strings. Here is the same 2D array we used above, but now having a single string attribute.", "metadata": {"tags": []}, "id": "d1381bf9"}, {"cell_type": "code", "source": "# Create the two dimensions\nd1 = tiledb.Dim(name=\"d1\", domain=(1, 4), tile=2, dtype=np.int32)\nd2 = tiledb.Dim(name=\"d2\", domain=(1, 4), tile=2, dtype=np.int32)\n\n# Create a domain using the two dimensions\ndom3 = tiledb.Domain(d1, d2)\n\n# Create a string attribute\na = tiledb.Attr(name=\"a\", dtype=\"S0\")\n\n# Create the array schema, setting `sparse=False` to indicate a dense array\nschema3 = tiledb.ArraySchema(domain=dom3, sparse=False, attrs=[a])\n\n# Create the array on disk (it will initially be empty)\ntiledb.Array.create(array_dense_3, schema3)", "metadata": {"trusted": true}, "execution_count": 27, "outputs": [], "id": "c6301b3a"}, {"cell_type": "markdown", "source": "Let's inspect the array schema:", "metadata": {}, "id": "40067d6b"}, {"cell_type": "code", "source": "# Read the array schema\nschema = tiledb.ArraySchema.load(array_dense_3)\nschema", "metadata": {"trusted": true}, "execution_count": 28, "outputs": [{"execution_count": 28, "output_type": "execute_result", "data": {"text/plain": "ArraySchema(\n  domain=Domain(*[\n    Dim(name='d1', domain=(1, 4), tile=2, dtype='int32'),\n    Dim(name='d2', domain=(1, 4), tile=2, dtype='int32'),\n  ]),\n  attrs=[\n    Attr(name='a', dtype='|S0', var=True, nullable=False),\n  ],\n  cell_order='row-major',\n  tile_order='row-major',\n  capacity=10000,\n  sparse=False,\n)", "text/html": "<table><tr><th>Domain</th></tr><tr><td><table><tr><th>Name</th><th>Domain</th><th>Tile</th><th>Data Type</th><th>Is Var-length</th><th>Filters</th></tr><tr><td>d1</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr><tr><td>d2</td><td>(1, 4)</td><td>2</td><td>int32</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Attributes</th></tr><tr><td><table><tr><th>Name</th><th>Data Type</th><th>Is Var-Len</th><th>Is Nullable</th><th>Filters</th></tr><tr><td>a</td><td>|S0</td><td>True</td><td>False</td><td>-</td></tr></table></td></tr><tr><th>Cell Order</th></tr><tr><td>row-major</td></tr><tr><th>Tile Order</th></tr><tr><td>row-major</td></tr><tr><th>Capacity</th></tr><tr><td>10000</td></tr><tr><th>Sparse</th></tr><tr><td>False</td></tr></table>"}, "metadata": {}}], "id": "38da4914"}, {"cell_type": "markdown", "source": "Here is how we write to arrays with string attributes:", "metadata": {}, "id": "0a33117b"}, {"cell_type": "code", "source": "# Prepare some data\na_data = np.array([\n    [\"a\", \"bb\", \"ccc\", \"dddd\"],\n    [\"e\", \"ff\", \"ggg\", \"hhhh\"],\n    [\"i\", \"jj\", \"kkk\", \"llll\"],\n    [\"m\", \"nn\", \"ooo\", \"pppp\"]], dtype=object)\na_data", "metadata": {"trusted": true}, "execution_count": 29, "outputs": [{"execution_count": 29, "output_type": "execute_result", "data": {"text/plain": "array([['a', 'bb', 'ccc', 'dddd'],\n       ['e', 'ff', 'ggg', 'hhhh'],\n       ['i', 'jj', 'kkk', 'llll'],\n       ['m', 'nn', 'ooo', 'pppp']], dtype=object)"}, "metadata": {}}], "id": "c07c193b"}, {"cell_type": "code", "source": "# Write to the array\nwith tiledb.open(array_dense_3, 'w') as A:\n    A[:] = a_data", "metadata": {"trusted": true}, "execution_count": 30, "outputs": [], "id": "5099b9b0"}, {"cell_type": "markdown", "source": "Let's inspect the array:", "metadata": {}, "id": "97e826a1"}, {"cell_type": "code", "source": "!tree $array_dense_3", "metadata": {"trusted": true}, "execution_count": 31, "outputs": [{"name": "stdout", "text": "\u001b[01;34m/home/jovyan/array_dense_3\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__commits\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[00m__1653659360844_1653659360844_f605f4f1e3d845309f43407c0cebf186_12.wrt\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragment_meta\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__fragments\u001b[0m\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 \u001b[01;34m__1653659360844_1653659360844_f605f4f1e3d845309f43407c0cebf186_12\u001b[0m\n\u2502\u00a0\u00a0     \u251c\u2500\u2500 \u001b[00ma0.tdb\u001b[0m\n\u2502\u00a0\u00a0     \u251c\u2500\u2500 \u001b[00ma0_var.tdb\u001b[0m\n\u2502\u00a0\u00a0     \u2514\u2500\u2500 \u001b[00m__fragment_metadata.tdb\u001b[0m\n\u251c\u2500\u2500 \u001b[01;34m__meta\u001b[0m\n\u2514\u2500\u2500 \u001b[01;34m__schema\u001b[0m\n    \u2514\u2500\u2500 \u001b[00m__1653659360821_1653659360821_c09aade4a9374e0ba8980b4bb2e0795b\u001b[0m\n\n6 directories, 5 files\n", "output_type": "stream"}], "id": "fd2ba5e8"}, {"cell_type": "markdown", "source": "Observe that TileDB stores **two files** for a variable-length attribute inside the written fragment folder: `a0_var.tdb` that stores the actual variable-length cell values, and `a0.tdb` that stores the starting offset of each of the cell values for fast identification retrieval.", "metadata": {}, "id": "6151ae4b"}, {"cell_type": "markdown", "source": "Now let's perform a couple of read queries:", "metadata": {}, "id": "2998ad8b"}, {"cell_type": "code", "source": "with tiledb.open(array_dense_3, 'r') as A:\n    print(A[:][\"a\"]) # whole array\n    print(A[1:3, 1:2][\"a\"]) # slice", "metadata": {"trusted": true}, "execution_count": 32, "outputs": [{"name": "stdout", "text": "[[b'a' b'bb' b'ccc' b'dddd']\n [b'e' b'ff' b'ggg' b'hhhh']\n [b'i' b'jj' b'kkk' b'llll']\n [b'm' b'nn' b'ooo' b'pppp']]\n[[b'a']\n [b'e']]\n", "output_type": "stream"}], "id": "abb57619"}, {"cell_type": "markdown", "source": "## Arrays as dataframes", "metadata": {"tags": []}, "id": "0a647315"}, {"cell_type": "markdown", "source": "Arrays are essentially dataframes where dimensions are special (indexed) columns that allow very fast slicing. Revisiting the very first array we created, we can also slice it via the `df` object that returns the results in a `pandas` dataframe:", "metadata": {}, "id": "f1a1e062"}, {"cell_type": "code", "source": "A = tiledb.open(array_dense_1, 'r')", "metadata": {"trusted": true}, "execution_count": 33, "outputs": [], "id": "e2a5f2e8-0d50-4869-8af5-7572d72d45e2"}, {"cell_type": "code", "source": "A.df[:]        # whole array", "metadata": {"trusted": true}, "execution_count": 34, "outputs": [{"execution_count": 34, "output_type": "execute_result", "data": {"text/plain": "    d1  d2   a\n0    1   1   1\n1    1   2   2\n2    1   3   3\n3    1   4   4\n4    2   1   5\n5    2   2   6\n6    2   3   7\n7    2   4   8\n8    3   1   9\n9    3   2  10\n10   3   3  11\n11   3   4  12\n12   4   1  13\n13   4   2  14\n14   4   3  15\n15   4   4  16", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>d1</th>\n      <th>d2</th>\n      <th>a</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>3</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>4</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n      <td>1</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2</td>\n      <td>2</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>2</td>\n      <td>3</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>2</td>\n      <td>4</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>3</td>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>3</td>\n      <td>2</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>3</td>\n      <td>3</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>3</td>\n      <td>4</td>\n      <td>12</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>4</td>\n      <td>1</td>\n      <td>13</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>4</td>\n      <td>2</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>4</td>\n      <td>3</td>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>4</td>\n      <td>4</td>\n      <td>16</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "9873cb76"}, {"cell_type": "code", "source": "A.df[1:3, 1:2] # slice", "metadata": {"trusted": true}, "execution_count": 35, "outputs": [{"execution_count": 35, "output_type": "execute_result", "data": {"text/plain": "   d1  d2   a\n0   1   1   1\n1   1   2   2\n2   2   1   5\n3   2   2   6\n4   3   1   9\n5   3   2  10", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>d1</th>\n      <th>d2</th>\n      <th>a</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>1</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>2</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>3</td>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>3</td>\n      <td>2</td>\n      <td>10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "e9f163b2-454a-45c2-b384-d50d68f09f72"}, {"cell_type": "markdown", "source": "We can also subselect on the dimensions and attributes via the `query` object:", "metadata": {}, "id": "53ef212a"}, {"cell_type": "code", "source": "A.query(attrs=[], dims=[\"d1\"]).df[:]        # whole array", "metadata": {"trusted": true}, "execution_count": 36, "outputs": [{"execution_count": 36, "output_type": "execute_result", "data": {"text/plain": "    d1\n0    1\n1    1\n2    1\n3    1\n4    2\n5    2\n6    2\n7    2\n8    3\n9    3\n10   3\n11   3\n12   4\n13   4\n14   4\n15   4", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>d1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>4</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "b0047d17-0784-4cc0-8ebd-ae1d04c5a9dc"}, {"cell_type": "code", "source": "A.query(attrs=[], dims=[\"d1\"]).df[1:3, 1:2] # slice", "metadata": {"trusted": true}, "execution_count": 37, "outputs": [{"execution_count": 37, "output_type": "execute_result", "data": {"text/plain": "   d1\n0   1\n1   1\n2   2\n3   2\n4   3\n5   3", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>d1</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "bba1735f"}, {"cell_type": "markdown", "source": "Note that the ranges inside the `df` object are **inclusive**.", "metadata": {}, "id": "b50cc955"}, {"cell_type": "code", "source": "pd.read_sql(sql=f\"select * from `{array_dense_1}`\", con=db)", "metadata": {"trusted": true}, "execution_count": 38, "outputs": [{"execution_count": 38, "output_type": "execute_result", "data": {"text/plain": "    d1  d2   a\n0    1   1   1\n1    1   2   2\n2    1   3   3\n3    1   4   4\n4    2   1   5\n5    2   2   6\n6    2   3   7\n7    2   4   8\n8    3   1   9\n9    3   2  10\n10   3   3  11\n11   3   4  12\n12   4   1  13\n13   4   2  14\n14   4   3  15\n15   4   4  16", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>d1</th>\n      <th>d2</th>\n      <th>a</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>3</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>4</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n      <td>1</td>\n      <td>5</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2</td>\n      <td>2</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>2</td>\n      <td>3</td>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>2</td>\n      <td>4</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>3</td>\n      <td>1</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>3</td>\n      <td>2</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>3</td>\n      <td>3</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>3</td>\n      <td>4</td>\n      <td>12</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>4</td>\n      <td>1</td>\n      <td>13</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>4</td>\n      <td>2</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>4</td>\n      <td>3</td>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>4</td>\n      <td>4</td>\n      <td>16</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "775f5ce5"}, {"cell_type": "code", "source": "pd.read_sql(sql=f\"select a from `{array_dense_1}` where d2 >= 2\", con=db)", "metadata": {"trusted": true}, "execution_count": 39, "outputs": [{"execution_count": 39, "output_type": "execute_result", "data": {"text/plain": "     a\n0    2\n1    3\n2    4\n3    6\n4    7\n5    8\n6   10\n7   11\n8   12\n9   14\n10  15\n11  16", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>12</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>16</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}], "id": "8d8dc83b"}, {"cell_type": "markdown", "source": "Remember to close the array.", "metadata": {}, "id": "1ccefd72-e369-4bbb-b393-a3964ae33e0a"}, {"cell_type": "code", "source": "A.close()", "metadata": {"trusted": true}, "execution_count": 40, "outputs": [], "id": "b6a43a46-17d8-4661-8d86-9b55b605c013"}, {"cell_type": "code", "source": "# Optionally delete the created arrays\n# if os.path.exists(array_dense_1):\n#    shutil.rmtree(array_dense_1)\n# if os.path.exists(array_dense_2):\n#    shutil.rmtree(array_dense_2)\n# if os.path.exists(array_dense_3):\n#    shutil.rmtree(array_dense_3)", "metadata": {"trusted": true}, "execution_count": 41, "outputs": [], "id": "1ba65831"}]}
\ No newline at end of file