From ab8500817fe90b2dc6a896fa15c4e6746c141f92 Mon Sep 17 00:00:00 2001 From: jsj Date: Sun, 1 Dec 2024 20:33:51 +0100 Subject: [PATCH 1/4] dispose notebookutils --- src/msfabricutils/fabric_duckdb_connection.py | 52 +++++++++++++------ 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/src/msfabricutils/fabric_duckdb_connection.py b/src/msfabricutils/fabric_duckdb_connection.py index 0a91007..19f9394 100644 --- a/src/msfabricutils/fabric_duckdb_connection.py +++ b/src/msfabricutils/fabric_duckdb_connection.py @@ -328,7 +328,14 @@ def _create_or_replace_fabric_lakehouse_secret(self, catalog_name: str) -> None: def _register_lakehouse_tables( self, workspace_name: str, workspace_id: str, lakehouse_id: str, lakehouse_name: str ) -> None: - tables = notebookutils.lakehouse.listTables(lakehouse_name, workspace_id) # noqa: F821 + from sempy import fabric + + client = fabric.FabricRestClient() + + response = client.get(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables") + response.raise_for_status() + + tables = response.json()["data"] if not tables: table_information = { @@ -401,33 +408,46 @@ def register_workspace_lakehouses(self, workspace_id: str, lakehouses: str | lis ... lakehouses=['sales_lakehouse', 'marketing_lakehouse'] ... ) ``` - """ + """ + + if isinstance(lakehouses, str): + lakehouses = [lakehouses] + + from sempy import fabric + workspaces = fabric.list_workspaces() workspace_name = workspaces[workspaces.Id == workspace_id]["Name"].iat[0] - if isinstance(lakehouses, str): - lakehouses = [lakehouses] + client = fabric.FabricRestClient() - for lakehouse in lakehouses: - lakehouse_properties = notebookutils.lakehouse.getWithProperties( # noqa: F821 - lakehouse, workspace_id - ) + response = client.get(f"v1/workspaces/{workspace_id}/lakehouses") + response.raise_for_status() + lakehouse_properties = response["value"] - is_schema_enabled = ( - lakehouse_properties.get("properties").get("defaultSchema") is not None - ) - lakehouse_id = lakehouse_properties.get("id") + selected_lakehouses = [ + lakehouse + for lakehouse in lakehouse_properties + if lakehouse["displayName"] in lakehouses + ] + + for lakehouse in selected_lakehouses: + is_schema_enabled = lakehouse.get("properties").get("defaultSchema") is not None + + lakehouse_name = lakehouse.get("displayName") + lakehouse_id = lakehouse.get("id") if is_schema_enabled: raise Exception(f""" - The lakehouse `{lakehouse}` is using the schema-enabled preview feature.\n + The lakehouse `{lakehouse_name}` is using the schema-enabled preview feature.\n This utility class does support schema-enabled lakehouses (yet). """) - self._attach_lakehouse(workspace_name, lakehouse) - self._create_or_replace_fabric_lakehouse_secret(f"{workspace_name}{lakehouse}") - self._register_lakehouse_tables(workspace_name, workspace_id, lakehouse_id, lakehouse) + self._attach_lakehouse(workspace_name, lakehouse_name) + self._create_or_replace_fabric_lakehouse_secret(f"{workspace_name}{lakehouse_name}") + self._register_lakehouse_tables( + workspace_name, workspace_id, lakehouse_id, lakehouse_name + ) def print_lakehouse_catalog(self): """Print a hierarchical view of all registered lakehouses, schemas, and tables. From 7ec067742d678cd0bb0e4e394b953f9e5458580b Mon Sep 17 00:00:00 2001 From: jsj Date: Sun, 1 Dec 2024 20:34:13 +0100 Subject: [PATCH 2/4] up version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d3d582c..d7eaff1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "msfabricutils" -version = "0.1.2" +version = "0.1.3" description = "A Python library exposes additional functionality to work with Python Notebooks in Microsoft Fabric." authors = [ { name = "Jimmy Jensen" }, From 449031d96d73ed94aec1b3e760e8a6ab7d8c9d3e Mon Sep 17 00:00:00 2001 From: jsj Date: Sun, 1 Dec 2024 20:35:08 +0100 Subject: [PATCH 3/4] ruff format --- src/msfabricutils/fabric_duckdb_connection.py | 37 ++++++++++--------- .../helpers/separator_indices.py | 6 +-- tests/helpers/test_separator_indices.py | 13 +++---- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/msfabricutils/fabric_duckdb_connection.py b/src/msfabricutils/fabric_duckdb_connection.py index 19f9394..f776828 100644 --- a/src/msfabricutils/fabric_duckdb_connection.py +++ b/src/msfabricutils/fabric_duckdb_connection.py @@ -9,12 +9,12 @@ from sqlglot import exp # Avoid import errors outside Fabric environments -try: +try: from sempy import fabric - import notebookutils except ModuleNotFoundError: pass + class FabricDuckDBConnection: """A DuckDB connection wrapper for Microsoft Fabric Lakehouses. @@ -28,7 +28,7 @@ class FabricDuckDBConnection: - Delta Lake table support Args: - access_token (str): The Microsoft Fabric access token for authentication. + access_token (str): The Microsoft Fabric access token for authentication. In a notebook, use `notebookutils.credentials.getToken('storage')`. config (dict, optional): DuckDB configuration options. Defaults to {}. @@ -37,7 +37,7 @@ class FabricDuckDBConnection: >>> # Initialize connection >>> access_token = notebookutils.credentials.getToken('storage') >>> conn = FabricDuckDBConnection(access_token=access_token) - >>> + >>> >>> # Register lakehouses from different workspaces >>> conn.register_workspace_lakehouses( ... workspace_id='12345678-1234-5678-1234-567812345678', @@ -46,23 +46,24 @@ class FabricDuckDBConnection: >>> conn.register_workspace_lakehouses( ... workspace_id='87654321-8765-4321-8765-432187654321', ... lakehouses=['marketing'] - ... ) - >>> + ... ) + >>> >>> # Query across workspaces using fully qualified names >>> df = conn.sql(''' - ... SELECT + ... SELECT ... c.customer_id, ... c.name, ... c.region, ... s.segment, ... s.lifetime_value ... FROM sales_workspace.sales.main.customers c - ... JOIN marketing_workspace.marketing.main.customer_segments s + ... JOIN marketing_workspace.marketing.main.customer_segments s ... ON c.customer_id = s.customer_id ... WHERE c.region = 'EMEA' ... ''').df() ``` - """ + """ + def __init__(self, access_token: str, config: dict = {}): self._registered_tables = [] self._connection = duckdb.connect(config=config) @@ -95,11 +96,11 @@ def refresh_access_token(self, access_token: str): ```python >>> # Initialize connection >>> conn = FabricDuckDBConnection(access_token='old_token') - >>> + >>> >>> # When token expires, refresh it >>> new_token = notebookutils.credentials.getToken('storage') - >>> conn.refresh_access_token(new_token) - ``` + >>> conn.refresh_access_token(new_token) + ``` """ self._access_token = access_token @@ -123,7 +124,7 @@ def _preprocess_sql_query(self, query: str): Raises: Exception: If table references are ambiguous or tables don't exist - """ + """ parsed = sqlglot.parse_one(sql=query, read="duckdb") tables_in_query = parsed.find_all(exp.Table) replace_by = {} @@ -276,7 +277,7 @@ def _modify_input_query(self, args, kwargs): Returns: tuple: Modified (args, kwargs) - """ + """ modified_args = list(args) if "query" in kwargs: @@ -295,7 +296,7 @@ def _preprocess_input(self, query: str) -> str: Returns: str: The preprocessed SQL statements - """ + """ query_separator_indices = _separator_indices(query, ";") query_separator_indices = [0] + [idx + 1 for idx in query_separator_indices] @@ -395,13 +396,13 @@ def register_workspace_lakehouses(self, workspace_id: str, lakehouses: str | lis >>> # Initialize connection with access token >>> access_token = notebookutils.credentials.getToken('storage') >>> conn = FabricDuckDBConnection(access_token=access_token) - >>> + >>> >>> # Register a single lakehouse >>> conn.register_workspace_lakehouses( ... workspace_id='12345678-1234-5678-1234-567812345678', ... lakehouses='sales_lakehouse' ... ) - >>> + >>> >>> # Register multiple lakehouses >>> conn.register_workspace_lakehouses( ... workspace_id='12345678-1234-5678-1234-567812345678', @@ -500,7 +501,7 @@ def write( workspace_id: str = None, workspace_name: str = None, *args: Any, - **kwargs: Any + **kwargs: Any, ): """Write a DataFrame to a Fabric Lakehouse table. diff --git a/src/msfabricutils/helpers/separator_indices.py b/src/msfabricutils/helpers/separator_indices.py index de1ef2f..3fc53cd 100644 --- a/src/msfabricutils/helpers/separator_indices.py +++ b/src/msfabricutils/helpers/separator_indices.py @@ -1,5 +1,3 @@ - - def _separator_indices(string: str, separator: str): """Find indices of a separator character in a string, ignoring separators inside quotes. @@ -23,9 +21,9 @@ def _separator_indices(string: str, separator: str): inside_double_quotes = not inside_double_quotes elif char == "'" and not inside_double_quotes: inside_single_quotes = not inside_single_quotes - elif (inside_double_quotes or inside_single_quotes): + elif inside_double_quotes or inside_single_quotes: continue elif char == separator: indices.append(idx) - return indices \ No newline at end of file + return indices diff --git a/tests/helpers/test_separator_indices.py b/tests/helpers/test_separator_indices.py index d45a76c..dd38f0e 100644 --- a/tests/helpers/test_separator_indices.py +++ b/tests/helpers/test_separator_indices.py @@ -2,21 +2,20 @@ def test_seperator_indices_simple_string(): - chars = "text.to.be.tested" - assert _separator_indices(chars, ".") == [4,7,10] + assert _separator_indices(chars, ".") == [4, 7, 10] -def test_seperator_indices_string_with_quotes(): +def test_seperator_indices_string_with_quotes(): chars = "'te.xt'.to.be.tested" - assert _separator_indices(chars, ".") == [7,10,13] + assert _separator_indices(chars, ".") == [7, 10, 13] -def test_seperator_indices_string_with_both_quotes(): +def test_seperator_indices_string_with_both_quotes(): chars = """'te"."xt'.to.be.tested""" assert _separator_indices(chars, ".") == [9, 12, 15] -def test_seperator_indices_only_dots(): +def test_seperator_indices_only_dots(): chars = "..." - assert _separator_indices(chars, ".") == [0, 1, 2] \ No newline at end of file + assert _separator_indices(chars, ".") == [0, 1, 2] From 2747181f294e47d8c724562f3a21e544b50d72ba Mon Sep 17 00:00:00 2001 From: jsj Date: Sun, 1 Dec 2024 20:37:15 +0100 Subject: [PATCH 4/4] noqa --- src/msfabricutils/fabric_duckdb_connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/msfabricutils/fabric_duckdb_connection.py b/src/msfabricutils/fabric_duckdb_connection.py index f776828..e6f36d8 100644 --- a/src/msfabricutils/fabric_duckdb_connection.py +++ b/src/msfabricutils/fabric_duckdb_connection.py @@ -10,7 +10,7 @@ # Avoid import errors outside Fabric environments try: - from sempy import fabric + from sempy import fabric #noqa: F401 except ModuleNotFoundError: pass