From 8897af6fff309da8518c88643905222a91597a3a Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 12 Feb 2025 17:36:55 -0600 Subject: [PATCH 1/3] Allow for sampling of seeds --- core/dbt/context/providers.py | 14 +++- .../sample_mode/test_sample_mode.py | 67 +++++++++++++++++++ tests/unit/context/test_providers.py | 44 ++++++++++-- 3 files changed, 118 insertions(+), 7 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 46b57b140a8..874f4738d28 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -28,7 +28,13 @@ get_adapter_package_names, get_adapter_type_names, ) -from dbt.artifacts.resources import NodeConfig, NodeVersion, RefArgs, SourceConfig +from dbt.artifacts.resources import ( + NodeConfig, + NodeVersion, + RefArgs, + SeedConfig, + SourceConfig, +) from dbt.clients.jinja import ( MacroGenerator, MacroStack, @@ -247,7 +253,11 @@ def resolve_event_time_filter(self, target: ManifestNode) -> Optional[EventTimeF # Only do event time filtering if the base node has the necessary event time configs if ( - (isinstance(target.config, NodeConfig) or isinstance(target.config, SourceConfig)) + ( + isinstance(target.config, NodeConfig) + or isinstance(target.config, SourceConfig) + or isinstance(target.config, SeedConfig) + ) and target.config.event_time and isinstance(self.model, ModelNode) ): diff --git a/tests/functional/sample_mode/test_sample_mode.py b/tests/functional/sample_mode/test_sample_mode.py index e4883e55287..cffb0f2af4e 100644 --- a/tests/functional/sample_mode/test_sample_mode.py +++ b/tests/functional/sample_mode/test_sample_mode.py @@ -38,6 +38,19 @@ select 6 as id, TIMESTAMP '2025-01-06 12:32:00-0' as event_time """ +input_seed_csv = """id,event_time +1,'2020-01-01 01:25:00-0' +2,'2025-01-02 13:47:00-0' +3,'2025-01-03 01:32:00-0' +""" + +seed_properties_yml = """ +seeds: + - name: input_seed + config: + event_time: event_time +""" + sample_mode_model_sql = """ {{ config(materialized='table', event_time='event_time') }} @@ -48,6 +61,12 @@ SELECT * FROM {{ ref("input_model") }} """ +sample_input_seed_sql = """ +{{ config(materialized='table') }} + +SELECT * FROM {{ ref("input_seed") }} +""" + sample_microbatch_model_sql = """ {{ config(materialized='incremental', incremental_strategy='microbatch', event_time='event_time', batch_size='day', lookback=3, begin='2024-12-25', unique_key='id')}} @@ -368,3 +387,51 @@ def test_incremental_model_sample( relation_name="sample_incremental_merge", expected_row_count=expected_rows, ) + + +class TestSampleSeedRefs(BaseSampleMode): + @pytest.fixture(scope="class") + def seeds(self): + return { + "input_seed.csv": input_seed_csv, + "properties.yml": seed_properties_yml, + } + + @pytest.fixture(scope="class") + def models(self): + return { + "sample_input_seed.sql": sample_input_seed_sql, + } + + @pytest.mark.parametrize( + "sample_mode_available,run_sample_mode,expected_row_count", + [ + (True, True, 2), + (True, False, 3), + (False, True, 3), + (False, False, 3), + ], + ) + @freezegun.freeze_time("2025-01-03T02:03:0Z") + def test_sample_mode( + self, + project, + mocker: MockerFixture, + sample_mode_available: bool, + run_sample_mode: bool, + expected_row_count: int, + ): + run_args = ["run"] + if run_sample_mode: + run_args.append("--sample=1 day") + + if sample_mode_available: + mocker.patch.dict(os.environ, {"DBT_EXPERIMENTAL_SAMPLE_MODE": "1"}) + + _ = run_dbt(["seed"]) + _ = run_dbt(run_args) + self.assert_row_count( + project=project, + relation_name="sample_input_seed", + expected_row_count=expected_row_count, + ) diff --git a/tests/unit/context/test_providers.py b/tests/unit/context/test_providers.py index f4ce723cdcf..68c59a53bf2 100644 --- a/tests/unit/context/test_providers.py +++ b/tests/unit/context/test_providers.py @@ -1,7 +1,7 @@ import os from argparse import Namespace from datetime import datetime -from typing import Optional +from typing import Any, Optional from unittest import mock import pytest @@ -9,7 +9,7 @@ from pytest_mock import MockerFixture from dbt.adapters.base import BaseRelation -from dbt.artifacts.resources import NodeConfig, Quoting +from dbt.artifacts.resources import NodeConfig, Quoting, SeedConfig from dbt.artifacts.resources.types import BatchSize from dbt.context.providers import ( BaseResolver, @@ -46,7 +46,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): assert resolver.resolve_limit == expected_resolve_limit @pytest.mark.parametrize( - "use_microbatch_batches,materialized,incremental_strategy,sample_mode_available,sample,resolver_model_node,expect_filter", + "use_microbatch_batches,materialized,incremental_strategy,sample_mode_available,sample,resolver_model_node,target_type,expect_filter", [ # Microbatch model without sample ( @@ -56,6 +56,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): True, None, True, + NodeConfig, True, ), # Microbatch model with sample @@ -69,6 +70,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): end=datetime(2025, 1, 1, tzinfo=pytz.UTC), ), True, + NodeConfig, True, ), # Normal model with sample @@ -82,6 +84,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): end=datetime(2025, 1, 1, tzinfo=pytz.UTC), ), True, + NodeConfig, True, ), # Incremental merge model with sample @@ -95,6 +98,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): end=datetime(2025, 1, 1, tzinfo=pytz.UTC), ), True, + NodeConfig, True, ), # Normal model with sample, but sample mode not available @@ -108,6 +112,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): end=datetime(2025, 1, 1, tzinfo=pytz.UTC), ), True, + NodeConfig, False, ), # Sample, but not model node @@ -121,6 +126,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): end=datetime(2025, 1, 1, tzinfo=pytz.UTC), ), False, + NodeConfig, False, ), # Microbatch, but not model node @@ -131,6 +137,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): False, None, False, + NodeConfig, False, ), # Mircrobatch model, but not using batches @@ -141,6 +148,7 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): False, None, True, + NodeConfig, False, ), # Non microbatch model, but supposed to use batches @@ -151,10 +159,35 @@ def test_resolve_limit(self, resolver, empty, expected_resolve_limit): False, None, True, + NodeConfig, False, ), # Incremental merge - (True, "incremental", "merge", False, None, True, False), + (True, "incremental", "merge", False, None, True, NodeConfig, False), + # Target seed node, with sample + ( + False, + "table", + None, + True, + SampleWindow.from_relative_string("2 days"), + True, + SeedConfig, + True, + ), + # Target seed node, with sample, but sample mode not availavle + ( + False, + "table", + None, + False, + SampleWindow.from_relative_string("2 days"), + True, + SeedConfig, + False, + ), + # Target seed node, without sample, but sample mode availavle + (False, "table", None, True, None, True, SeedConfig, False), ], ) def test_resolve_event_time_filter( @@ -167,11 +200,12 @@ def test_resolve_event_time_filter( sample_mode_available: bool, sample: Optional[SampleWindow], resolver_model_node: bool, + target_type: Any, expect_filter: bool, ) -> None: # Target mocking target = mock.Mock() - target.config = mock.MagicMock(NodeConfig) + target.config = mock.MagicMock(target_type) target.config.event_time = "created_at" # Declare whether sample mode is available From 9780f220dcc59b1a3c8a5ebabbf01811fcdceb98 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 12 Feb 2025 17:37:55 -0600 Subject: [PATCH 2/3] Add changie doc --- .changes/unreleased/Features-20250212-173743.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20250212-173743.yaml diff --git a/.changes/unreleased/Features-20250212-173743.yaml b/.changes/unreleased/Features-20250212-173743.yaml new file mode 100644 index 00000000000..bb9a14f5e75 --- /dev/null +++ b/.changes/unreleased/Features-20250212-173743.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Allow for sampling of ref'd seeds +time: 2025-02-12T17:37:43.554156-06:00 +custom: + Author: QMalcolm + Issue: "11300" From cc48853d9cd95942d0fc66520e226bad2ece99a1 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Thu, 13 Feb 2025 16:32:16 -0600 Subject: [PATCH 3/3] Fix seed column types for `TestSampleSeedRefs` tests --- tests/functional/sample_mode/test_sample_mode.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/sample_mode/test_sample_mode.py b/tests/functional/sample_mode/test_sample_mode.py index cffb0f2af4e..14df94eb6e8 100644 --- a/tests/functional/sample_mode/test_sample_mode.py +++ b/tests/functional/sample_mode/test_sample_mode.py @@ -49,6 +49,8 @@ - name: input_seed config: event_time: event_time + column_types: + event_time: timestamp """ sample_mode_model_sql = """