diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml deleted file mode 100644 index fad19d6a..00000000 --- a/.github/workflows/cd.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: CD - -on: - push: - branches: - - master - -env: - GCP_PROJECT_ID: ${{ secrets.GKE_PROJECT }} - GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} - GH_PAT: ${{ secrets.GH_PAT }} - GKE_CLUSTER: ${{ secrets.GKE_CLUSTER_NAME }} - GKE_ZONE: ${{ secrets.GKE_CLUSTER_ZONE }} - IMAGE_NAME: ghcr.io/prefeitura-rio/dbt-rpc - - -jobs: - build-container: - name: Build, publish and register - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.x - - - name: Create credentials directory - run: mkdir -p credentials - - - name: Mount credentials - env: - DBT_CREDENTIALS_DEV: ${{ secrets.DBT_CREDENTIALS_DEV }} - run: echo "$DBT_CREDENTIALS_DEV" > credentials/dev.json - - - name: Install dbt - run: pip install dbt-core dbt-bigquery setuptools - - - name: Compile dbt models - run: dbt compile --profiles-dir . --profile default --target compile - - - name: Setup Google Cloud CLI - uses: google-github-actions/setup-gcloud@v0.2.1 - with: - service_account_key: ${{ secrets.GKE_SA_KEY }} - project_id: ${{ secrets.GKE_PROJECT }} - export_default_credentials: true - - - name: Get GKE credentials - uses: google-github-actions/get-gke-credentials@v0.2.1 - with: - cluster_name: ${{ env.GKE_CLUSTER }} - location: ${{ env.GKE_ZONE }} - credentials: ${{ secrets.GKE_SA_KEY }} - - - name: Build and publish image - run: | - docker build -t $IMAGE_NAME:$GITHUB_SHA . - echo $GH_PAT | docker login ghcr.io -u gabriel-milan --password-stdin - docker push $IMAGE_NAME:$GITHUB_SHA - - - name: Set up Kustomize - run: |- - curl -sfLo kustomize https://github.com/kubernetes-sigs/kustomize/releases/download/v3.1.0/kustomize_3.1.0_linux_amd64 - chmod u+x ./kustomize - - - name: Deploy - run: |- - ./kustomize edit set image gcr.io/PROJECT_ID/IMAGE_DBT:TAG=$IMAGE_NAME:$GITHUB_SHA - ./kustomize build . | kubectl apply -n prefect-agent-rj-sms -f - - ./kustomize build . | kubectl apply -n prefect-agent-rj-sms-dev -f - - kubectl rollout status -w -n prefect-agent-rj-sms deployment/dbt-rpc-dev - kubectl rollout status -w -n prefect-agent-rj-sms deployment/dbt-rpc-prod - kubectl rollout status -w -n prefect-agent-rj-sms-dev deployment/dbt-rpc-dev - kubectl rollout status -w -n prefect-agent-rj-sms-dev deployment/dbt-rpc-prod diff --git a/.github/workflows/dbt-compile.yaml b/.github/workflows/dbt-compile.yaml index c6f765ce..5955ca62 100644 --- a/.github/workflows/dbt-compile.yaml +++ b/.github/workflows/dbt-compile.yaml @@ -1,6 +1,6 @@ name: DBT Compile -on: [pull_request, push] +on: [pull_request] jobs: dbt-compile: @@ -21,11 +21,14 @@ jobs: - name: Mount credentials env: DBT_CREDENTIALS_DEV: ${{ secrets.DBT_CREDENTIALS_DEV }} - run: echo "$DBT_CREDENTIALS_DEV" > credentials/dev.json + run: echo "$DBT_CREDENTIALS_DEV" > /tmp/credentials.json - name: Install dbt run: pip install dbt-core dbt-bigquery setuptools + - name: Install DBT Dependencies + run: dbt deps + - name: Compile dbt models - run: dbt compile --profiles-dir . --profile default --target compile + run: dbt compile --profiles-dir . --profile default --target dev diff --git a/.github/workflows/lint_docker.yaml b/.github/workflows/lint_docker.yaml deleted file mode 100644 index 9db59bfb..00000000 --- a/.github/workflows/lint_docker.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: Lint Dockerfile - -on: - pull_request: - -jobs: - docker_lint: - name: Lint Dockerfile - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Run hadolint - uses: reviewdog/action-hadolint@v1 - with: - reporter: github-pr-review diff --git a/.github/workflows/metadata_automation.yaml b/.github/workflows/metadata_automation.yaml deleted file mode 100644 index f8801bc8..00000000 --- a/.github/workflows/metadata_automation.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: Metadata Automation - -on: - push: - branches: - - "*" - -env: - GKE_SA_KEY: ${{ secrets.GKE_SA_KEY }} - -jobs: - update-metadata: - name: Update metadata from Google Sheets - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python version - uses: actions/setup-python@v2 - with: - python-version: "3.9" - - - name: Install Python dependencies for actions - run: |- - pip install -r requirements-actions.txt - - - name: Checkout to the head ref - run: |- - git fetch --depth=1 - git checkout ${{ github.head_ref }} - - - name: Fetch updated metadata from API - run: |- - python3 .github/workflows/scripts/fetch_metadata.py - - - name: Update DBT schemas - run: |- - python3 .github/workflows/scripts/metadata_to_dbt_schema.py - - - name: Commit changed files - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: "Update metadata" diff --git a/.github/workflows/query_automation.yaml b/.github/workflows/query_automation.yaml deleted file mode 100644 index 6e827558..00000000 --- a/.github/workflows/query_automation.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Query Automation - -on: - pull_request_target: - types: - - labeled -env: - GKE_SA_KEY: ${{ secrets.GKE_SA_KEY }} - -jobs: - create-queries: - name: Create a basic treated query from Google Sheets - runs-on: ubuntu-latest - if: github.event.label.name == 'create-basic-query' - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python version - uses: actions/setup-python@v2 - with: - python-version: "3.9" - - - name: Install Python dependencies for actions - run: |- - pip install -r requirements-actions.txt - - - name: Checkout to the head ref - run: |- - git fetch --depth=1 - git checkout ${{ github.head_ref }} - - - name: Update query - run: |- - python3 .github/workflows/scripts/query-automation.py - - - name: Commit changed files - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: 'Update query' diff --git a/.github/workflows/sql-linter.yaml b/.github/workflows/sql-linter.yaml new file mode 100644 index 00000000..5f16a1b2 --- /dev/null +++ b/.github/workflows/sql-linter.yaml @@ -0,0 +1,96 @@ +name: SQLFluff + +on: + - pull_request + +jobs: + lint-models: + runs-on: ubuntu-latest + steps: + - name: checkout + uses: actions/checkout@v2 + + - name: Install Python + uses: "actions/setup-python@v2" + with: + python-version: "3.10" + + - name: Install poetry + uses: abatilo/actions-poetry@v2 + with: + poetry-version: '1.7.1' + + - name: Setup a local virtual environment (if no poetry.toml file) + run: | + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + + - name: Install the project dependencies + run: poetry install --no-interaction + + - name: Add Poetry virtualenv to PATH + run: | + echo "$(poetry env info --path)/bin" >> $GITHUB_PATH + + - name: Install DBT Dependencies + run: "dbt deps" + + - name: Get changed files + id: get_file_changes + uses: trilom/file-changes-action@v1.2.4 + with: + output: ' ' + + - name: Display changed files + run: | + echo "Modified files: ${{ steps.get_file_changes.outputs.files_modified }}" + echo "Added files: ${{ steps.get_file_changes.outputs.files_added }}" + + - name: Get changed .sql files in /models to lint + id: get_files_to_lint + shell: bash -l {0} + run: | + # Set the command in the $() brackets as an output to use in later steps + echo "::set-output name=lintees::$( + # Issue where grep regular expressions don't work as expected on the + # Github Actions shell, check models/ folder + echo \ + $(echo ${{ steps.get_file_changes.outputs.files_modified }} | + tr -s ' ' '\n' | + grep -E '^models.*[.]sql$' | + tr -s '\n' ' ') \ + $(echo ${{ steps.get_file_changes.outputs.files_added }} | + tr -s ' ' '\n' | + grep -E '^models.*[.]sql$' | + tr -s '\n' ' ') + )" + + - name: Lint each SQL file individually + run: | + # Loop through each changed SQL file and lint them individually + for file in ${{ steps.get_file_changes.outputs.files_modified }} ${{ steps.get_file_changes.outputs.files_added }}; do + if [[ $file == *.sql ]]; then + echo "Linting $file" + sqlfluff lint failure $file || exit_code=$? + fi + done + shell: bash + + - name: Fail if any file had a linting error + run: | + if [[ -n "$exit_code" ]]; then + exit $exit_code + fi + + - name: Lint dbt models + id: sqlfluff_json + if: steps.get_files_to_lint.outputs.lintees != '' + shell: bash -l {0} + run: sqlfluff lint ./models/ > annotations.json + + - name: Annotate + uses: yuzutech/annotations-action@v0.3.0 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" + title: "SQLFluff Lint" + input: "./annotations.json" \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..154825a1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-added-large-files # prevents adding large files + - id: detect-private-key # detects private keys + - id: fix-byte-order-marker # fixes BOM \ No newline at end of file diff --git a/.sqlfluff b/.sqlfluff new file mode 100644 index 00000000..aa86eec6 --- /dev/null +++ b/.sqlfluff @@ -0,0 +1,54 @@ +[sqlfluff] + +dialect = bigquery + +templater = dbt + +runaway_limit = 10 + +max_line_length = 120 + +indent_unit = space +large_file_skip_byte_limit = 30000 + +[sqlfluff:indentation] + +tab_space_size = 4 + +[sqlfluff:layout:type:comma] + +spacing_before = touch + +line_position = trailing + +[sqlfluff:rules:capitalisation.keywords] + +capitalisation_policy = upper + +[sqlfluff:rules:aliasing.table] + +aliasing = explicit + +[sqlfluff:rules:aliasing.column] + +aliasing = explicit + +[sqlfluff:rules:aliasing.expression] + +allow_scalar = False + +[sqlfluff:rules:capitalisation.identifiers] + +extended_capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.functions] + +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.literals] + +capitalisation_policy = lower + +[sqlfluff:rules:ambiguous.column_references] # Number in group by + +group_by_and_order_by_style = implicit \ No newline at end of file diff --git a/macros/padronize_cep.sql b/macros/padronize_cep.sql new file mode 100644 index 00000000..aadce660 --- /dev/null +++ b/macros/padronize_cep.sql @@ -0,0 +1,33 @@ +{% macro padronize_cep(cep_column) %} + + case + -- Caso tenha 13 dígitos, remover os 5 primeiros se os 5 posteriores forem + -- iguais + when + length({{ cep_column }}) = 13 + and substr(trim(replace({{ cep_column }}, '-', '')), 1, 5) + = substr(trim(replace({{ cep_column }}, '-', '')), 6, 5) + then substr(trim(replace({{ cep_column }}, '-', '')), 6, 13) + when + length({{ cep_column }}) = 13 + and substr(trim(replace({{ cep_column }}, '-', '')), 1, 5) + != substr(trim(replace({{ cep_column }}, '-', '')), 6, 5) + then null + + -- Caso número de dígitos seja 6 ou 7, fixar os 5 primeiros e preencher com 0 + -- à esquerda até ter 8 dígitos + when length({{ cep_column }}) in (6, 7) + then + concat( + substr(trim(replace({{ cep_column }}, '-', '')), 1, 5), + lpad(substr(trim(replace({{ cep_column }}, '-', '')), 6, 2), 3, '0') + ) + + -- Caso tenha 5 ou menos dígitos, preencher com 0 à direita até ter 8 dígitos + when length({{ cep_column }}) <= 5 + then rpad(trim(replace({{ cep_column }}, '-', '')), 8, '0') + + -- Caso não se encaixe em nenhuma das condições, manter o cep_column original + else trim(replace({{ cep_column }}, '-', '')) + end +{% endmacro %} diff --git a/macros/padronize_telefone.sql b/macros/padronize_telefone.sql new file mode 100644 index 00000000..4dc408be --- /dev/null +++ b/macros/padronize_telefone.sql @@ -0,0 +1,30 @@ +{% macro padronize_telefone(telefone_column) %} + case + when + length(trim({{ telefone_column }})) = 0 + or trim({{ telefone_column }}) in ('NONE', 'NULL', '0', "()", "") + or trim({{ telefone_column }}) like '00%' + or trim({{ telefone_column }}) like '000%' + or trim({{ telefone_column }}) like '0000%' + or regexp_contains(trim({{ telefone_column }}), r'^([0-9])\\1*$') -- Remove repeated digits + or regexp_contains(trim({{ telefone_column }}), r'E\+\d+') -- Remove scientific notation + or regexp_contains(trim({{ telefone_column }}), r'[a-zA-Z]') -- Remove numbers that contain letters + then null + else + regexp_replace( + regexp_replace( + regexp_replace( + regexp_replace( + trim({{ telefone_column }}), '^0', '' -- Remove leading 0 + ), + '[()]', + '' -- Remove parentheses + ), + '-', + '' -- Remove hyphens + ), + ' ', -- Remove blank spaces + '' + ) + end +{% endmacro %} diff --git a/macros/validade_cns.sql b/macros/validade_cns.sql new file mode 100644 index 00000000..070f16be --- /dev/null +++ b/macros/validade_cns.sql @@ -0,0 +1,75 @@ +{% macro validate_cns(cns_column) %} + -- CNS validation based on https://integracao.esusab.ufsc.br/v211/docs/algoritmo_CNS.html + CASE + WHEN LENGTH(TRIM({{ cns_column }})) != 15 THEN FALSE + + -- Validation for CNS starting with 1 or 2 + WHEN SAFE_CAST(SUBSTR({{ cns_column }}, 1, 1) AS INT64) IN (1, 2) THEN ( + SELECT + cns = CASE + WHEN 11 - resto = 10 THEN pis || '001' || CAST(dv AS STRING) + ELSE pis || '000' || CAST(dv AS STRING) + END + FROM ( + SELECT + *, + CASE + WHEN 11 - resto = 11 THEN 0 + WHEN 11 - resto = 10 THEN + 11 - MOD((d1 * 15 + d2 * 14 + d3 * 13 + d4 * 12 + d5 * 11 + + d6 * 10 + d7 * 9 + d8 * 8 + d9 * 7 + d10 * 6 + d11 * 5 + 2), 11) + ELSE 11 - resto + END AS dv + FROM ( + SELECT + *, + MOD((d1 * 15 + d2 * 14 + d3 * 13 + d4 * 12 + d5 * 11 + + d6 * 10 + d7 * 9 + d8 * 8 + d9 * 7 + d10 * 6 + d11 * 5), 11) AS resto + FROM ( + SELECT + {{ cns_column }} AS cns, + SUBSTR({{ cns_column }}, 1, 11) AS pis, + SAFE_CAST(SUBSTR({{ cns_column }}, 1, 1) AS INT64) AS d1, + SAFE_CAST(SUBSTR({{ cns_column }}, 2, 1) AS INT64) AS d2, + SAFE_CAST(SUBSTR({{ cns_column }}, 3, 1) AS INT64) AS d3, + SAFE_CAST(SUBSTR({{ cns_column }}, 4, 1) AS INT64) AS d4, + SAFE_CAST(SUBSTR({{ cns_column }}, 5, 1) AS INT64) AS d5, + SAFE_CAST(SUBSTR({{ cns_column }}, 6, 1) AS INT64) AS d6, + SAFE_CAST(SUBSTR({{ cns_column }}, 7, 1) AS INT64) AS d7, + SAFE_CAST(SUBSTR({{ cns_column }}, 8, 1) AS INT64) AS d8, + SAFE_CAST(SUBSTR({{ cns_column }}, 9, 1) AS INT64) AS d9, + SAFE_CAST(SUBSTR({{ cns_column }}, 10, 1) AS INT64) AS d10, + SAFE_CAST(SUBSTR({{ cns_column }}, 11, 1) AS INT64) AS d11 + ) + ) + ) + ) + + -- Validation for CNS starting with 7, 8, or 9 + WHEN SAFE_CAST(SUBSTR({{ cns_column }}, 1, 1) AS INT64) IN (7, 8, 9) THEN ( + SELECT + MOD((d1 * 15 + d2 * 14 + d3 * 13 + d4 * 12 + d5 * 11 + + d6 * 10 + d7 * 9 + d8 * 8 + d9 * 7 + d10 * 6 + + d11 * 5 + d12 * 4 + d13 * 3 + d14 * 2 + d15 * 1), 11) = 0 + FROM ( + SELECT + {{ cns_column }} AS cns, + SAFE_CAST(SUBSTR({{ cns_column }}, 1, 1) AS INT64) AS d1, + SAFE_CAST(SUBSTR({{ cns_column }}, 2, 1) AS INT64) AS d2, + SAFE_CAST(SUBSTR({{ cns_column }}, 3, 1) AS INT64) AS d3, + SAFE_CAST(SUBSTR({{ cns_column }}, 4, 1) AS INT64) AS d4, + SAFE_CAST(SUBSTR({{ cns_column }}, 5, 1) AS INT64) AS d5, + SAFE_CAST(SUBSTR({{ cns_column }}, 6, 1) AS INT64) AS d6, + SAFE_CAST(SUBSTR({{ cns_column }}, 7, 1) AS INT64) AS d7, + SAFE_CAST(SUBSTR({{ cns_column }}, 8, 1) AS INT64) AS d8, + SAFE_CAST(SUBSTR({{ cns_column }}, 9, 1) AS INT64) AS d9, + SAFE_CAST(SUBSTR({{ cns_column }}, 10, 1) AS INT64) AS d10, + SAFE_CAST(SUBSTR({{ cns_column }}, 11, 1) AS INT64) AS d11, + SAFE_CAST(SUBSTR({{ cns_column }}, 12, 1) AS INT64) AS d12, + SAFE_CAST(SUBSTR({{ cns_column }}, 13, 1) AS INT64) AS d13, + SAFE_CAST(SUBSTR({{ cns_column }}, 14, 1) AS INT64) AS d14, + SAFE_CAST(SUBSTR({{ cns_column }}, 15, 1) AS INT64) AS d15 + ) + ) + ELSE FALSE + END{% endmacro %} \ No newline at end of file diff --git a/models/intermediate/historico_clinico/obito/int_historico_clinico__obito__vitai.sql b/models/intermediate/historico_clinico/obito/int_historico_clinico__obito__vitai.sql index 7f2f187a..8a027631 100644 --- a/models/intermediate/historico_clinico/obito/int_historico_clinico__obito__vitai.sql +++ b/models/intermediate/historico_clinico/obito/int_historico_clinico__obito__vitai.sql @@ -16,9 +16,60 @@ alta_internacao as ( select * from {{ref('raw_prontuario_vitai__resumo_alta')}} ), +boletins_consulta as ( + select + boletim_r.gid, + boletim_r.cpf, + boletim_r.cns, + boletim_r.alta_data + from {{ ref("raw_prontuario_vitai__boletim") }} as boletim_r + left join + {{ ref("raw_prontuario_vitai__atendimento") }} as atendimento + on boletim_r.gid = atendimento.gid_boletim + where atendimento.gid_boletim is not null and {{process_null('boletim_r.internacao_data')}} is null +), +boletins_internacao as ( + select + boletim_r.gid, + boletim_r.cpf, + boletim_r.cns, + boletim_r.alta_data + from {{ ref("raw_prontuario_vitai__boletim") }} as boletim_r + left join {{ ref("raw_prontuario_vitai__internacao") }} internacao + on boletim_r.gid = internacao.gid_boletim + where + internacao.gid_boletim is not null + and {{process_null('boletim_r.internacao_data')}} is not null +), +boletins_exames as ( + select + boletim_r.gid, + boletim_r.cpf, + boletim_r.cns, + boletim_r.alta_data + from {{ ref("raw_prontuario_vitai__boletim") }} as boletim_r + left join {{ ref("raw_prontuario_vitai__exame") }} as exame_table + on boletim_r.gid = exame_table.gid_boletim + left join{{ ref("raw_prontuario_vitai__atendimento") }} as atendimento + on boletim_r.gid = atendimento.gid_boletim + where + exame_table.gid_boletim is not null + and atendimento.gid_boletim is null + and {{process_null('boletim_r.internacao_data')}} is null +), boletim as ( - select * from - {{ref('raw_prontuario_vitai__boletim')}} + select distinct + gid, + cpf, + cns, + alta_data + from ( + select * from boletins_consulta + union all + select * from boletins_internacao + union all + select * from boletins_exames + ) ), obitos as ( select @@ -60,22 +111,32 @@ ultimo_boletim_vitai as ( from boletim group by 1 ), -ultimo_boletim_vitacare as( +boletins_vitacare as( select cpf, - max( - extract( - date from datahora_fim + case + when + eh_coleta = 'True' then false + when + eh_coleta != 'True' and + ( + json_extract_scalar(condicao_json, "$.cod_cid10") is not null + or soap_subjetivo_motivo is not null + or soap_plano_observacoes is not null ) - ) as ultima_entrada - from {{ref('raw_prontuario_vitacare__atendimento')}} - group by 1 + then false + else true + end as flag__episodio_sem_informacao, + datahora_fim + from {{ref('raw_prontuario_vitacare__atendimento')}},unnest(json_extract_array(condicoes)) as condicao_json ), ultimo_boletim as( select cpf, max(ultima_entrada) as ultima_entrada from ( - select * - from ultimo_boletim_vitacare + select cpf,max(extract(date from datahora_fim)) as ultima_entrada + from boletins_vitacare + where flag__episodio_sem_informacao=false + group by 1 union all select * from ultimo_boletim_vitai @@ -101,4 +162,5 @@ select array_agg(distinct gid_boletim_obito ignore nulls) as gid_boletim_obito from obitos_flags where tem_boletim_pos_obito = 0 +and cpf is not null group by 1 \ No newline at end of file diff --git a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__smsrio.sql b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__smsrio.sql index 02dcb739..966b166d 100644 --- a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__smsrio.sql +++ b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__smsrio.sql @@ -2,280 +2,337 @@ config( alias="paciente_smsrio", materialized="table", - schema="intermediario_historico_clinico" + schema="intermediario_historico_clinico", ) }} --- This code integrates patient data from SMSRIO: --- rj-sms.brutos_plataforma_smsrio.paciente (SMSRIO) +-- This code integrates patient data from smsrio: +-- rj-sms.brutos_plataforma_smsrio.paciente (smsrio) -- The goal is to consolidate information such as registration data, -- contact, address and medical record into a single view. - -- Declaration of the variable to filter by CPF (optional) -- DECLARE cpf_filter STRING DEFAULT ""; +-- smsrio: Patient base table +with + smsrio_tb as ( + select + {{ remove_accents_upper("cpf") }} as cpf, + {{ validate_cpf(remove_accents_upper("cpf")) }} as cpf_valido_indicador, + {{ remove_accents_upper("cns_lista") }} as cns, + {{ remove_accents_upper("nome") }} as nome, + {{ remove_accents_upper("telefone_lista") }} as telefones, + {{ remove_accents_upper("email") }} as email, + {{ padronize_cep(remove_accents_upper("endereco_cep")) }} as cep, + {{ remove_accents_upper("endereco_tipo_logradouro") }} as tipo_logradouro, + {{ remove_accents_upper("endereco_logradouro") }} as logradouro, + {{ remove_accents_upper("endereco_numero") }} as numero, + {{ remove_accents_upper("endereco_complemento") }} as complemento, + {{ remove_accents_upper("endereco_bairro") }} as bairro, + {{ remove_accents_upper("endereco_municipio_codigo") }} as cidade, + {{ remove_accents_upper("endereco_uf") }} as estado, + {{ remove_accents_upper("cpf") }} as id_paciente, + cast(null as string) as nome_social, + {{ remove_accents_upper("sexo") }} as genero, + {{ remove_accents_upper("raca_cor") }} as raca, + {{ remove_accents_upper("nome_mae") }} as mae_nome, + {{ remove_accents_upper("nome_pai") }} as pai_nome, + date(data_nascimento) as data_nascimento, + date(data_obito) as obito_data, + {{ remove_accents_upper("obito") }} as obito_indicador, + updated_at, + cast(null as string) as id_cnes + from {{ ref("raw_plataforma_smsrio__paciente") }} -- `rj-sms-dev`.`brutos_plataforma_smsrio`.`paciente` + where {{ validate_cpf("cpf") }} + ), --- SMSRIO: Patient base table -WITH smsrio_tb AS ( - SELECT - {{remove_accents_upper('cpf')}} AS cpf, - {{ validate_cpf(remove_accents_upper('cpf')) }} AS cpf_valido_indicador, - {{remove_accents_upper('cns_lista')}} AS cns, - {{remove_accents_upper('nome')}} AS nome, - {{remove_accents_upper('telefone_lista')}} AS telefones, - {{remove_accents_upper('email')}} AS email, - {{remove_accents_upper('endereco_cep')}} AS cep, - {{remove_accents_upper('endereco_tipo_logradouro')}} AS tipo_logradouro, - {{remove_accents_upper('endereco_logradouro')}} AS logradouro, - {{remove_accents_upper('endereco_numero')}} AS numero, - {{remove_accents_upper('endereco_complemento')}} AS complemento, - {{remove_accents_upper('endereco_bairro')}} AS bairro, - {{remove_accents_upper('endereco_municipio_codigo')}} AS cidade, - {{remove_accents_upper('endereco_uf')}} AS estado, - {{remove_accents_upper('cpf')}} AS id_paciente, - CAST(NULL AS STRING) AS nome_social, - {{remove_accents_upper('sexo')}} AS genero, - {{remove_accents_upper('raca_cor')}} AS raca, - {{remove_accents_upper('nome_mae')}} AS mae_nome, - {{remove_accents_upper('nome_pai')}} AS pai_nome, - DATE(data_nascimento) AS data_nascimento, - DATE(data_obito) AS obito_data, - {{remove_accents_upper('obito')}} AS obito_indicador, - updated_at, - CAST(NULL AS STRING) AS id_cnes - FROM {{ref("raw_plataforma_smsrio__paciente")}} -- `rj-sms-dev`.`brutos_plataforma_smsrio`.`paciente` - WHERE {{validate_cpf('cpf')}} -), + -- CNS + smsrio_cns_ranked as ( + select + cpf, + case when trim(cns) in ('NONE') then null else trim(cns) end as cns, + row_number() over (partition by cpf order by updated_at desc) as rank, + from + ( + select cpf, cns, updated_at + from + smsrio_tb, + unnest( + split( + replace(replace(replace(cns, '[', ''), ']', ''), '"', ''), + ',' + ) + ) as cns + ) + group by cpf, cns, updated_at + ), --- CNS -smsrio_cns_ranked AS ( - SELECT - cpf, - CASE - WHEN TRIM(cns) IN ('NONE') THEN NULL - ELSE TRIM(cns) - END AS cns, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank, - FROM ( - SELECT - cpf, - cns, - updated_at - FROM smsrio_tb, - UNNEST(SPLIT(REPLACE(REPLACE(REPLACE(cns, '[', ''), ']', ''), '"', ''), ',')) AS cns - ) - GROUP BY cpf, cns, updated_at - -), - --- CNS Dados -cns_dedup AS ( - SELECT - cpf, - cns, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank - FROM( - SELECT + -- CNS Dados + cns_dedup as ( + select cpf, cns, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, cns ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank + from + ( + select + cpf, + cns, + rank, + merge_order, + row_number() over ( + partition by cpf, cns order by merge_order, rank asc + ) as dedup_rank, + from (select cpf, cns, rank, 2 as merge_order from smsrio_cns_ranked) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), + cns_validated AS ( + SELECT + cns, + {{validate_cns('cns')}} AS cns_valido_indicador, FROM ( - SELECT - cpf, - cns, - rank, - 2 AS merge_order - FROM smsrio_cns_ranked + SELECT DISTINCT cns FROM cns_dedup ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + ), -cns_dados AS ( - SELECT - cpf, - ARRAY_AGG( - STRUCT( - cns, - rank - ) - ) AS cns - FROM cns_dedup - GROUP BY cpf -), - - --- CONTATO TELEPHONE -smsrio_contato_telefone AS ( - SELECT - cpf, - tipo, - CASE - WHEN TRIM(valor) IN ("NONE", "NULL", "") THEN NULL - ELSE valor - END AS valor, - rank - FROM ( - SELECT + cns_dados AS ( + SELECT cpf, - 'telefone' AS tipo, - TRIM(telefones) AS valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM ( - SELECT - cpf, - telefones, - updated_at - FROM smsrio_tb, - UNNEST(SPLIT(REPLACE(REPLACE(REPLACE(telefones, '[', ''), ']', ''), '"', ''), ',')) AS telefones - ) - GROUP BY - cpf, telefones, updated_at - ) - WHERE NOT (TRIM(valor) IN ("NONE", "NULL", "") AND (rank >= 2)) -), + ARRAY_AGG( + STRUCT( + cd.cns, + cv.cns_valido_indicador, + cd.rank + ) + ) AS cns + FROM cns_dedup cd + JOIN cns_validated cv + ON cd.cns = cv.cns + GROUP BY cpf + ), --- CONTATO SMSRIO: Extracts and ranks email -smsrio_contato_email AS ( - SELECT - cpf, - tipo, - CASE - WHEN TRIM(valor) IN ("NONE", "NULL", "") THEN NULL - ELSE valor - END AS valor, - rank - FROM ( - SELECT + -- CONTATO TELEPHONE + smsrio_contato_telefone as ( + select cpf, - 'email' AS tipo, - email AS valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM smsrio_tb - GROUP BY - cpf, email, updated_at - ) - WHERE NOT (TRIM(valor) IN ("NONE", "NULL", "") AND (rank >= 2)) -), + tipo, + valor_original, + case + when length(valor) in (10, 11) + then substr(valor, 1, 2) -- Keep only the first 2 digits (DDD) + else null + end as ddd, + case + when length(valor) in (8, 9) + then valor -- For numbers with 8 or 9 digits, keep the original value + when length(valor) = 10 + then substr(valor, 3, 8) -- Keep only the last 8 digits (discard the first 2) + when length(valor) = 11 + then substr(valor, 3, 9) -- Keep only the last 9 digits (discard the first 2) + else null + end as valor, + case + when length(valor) = 8 + then 'fixo' + when length(valor) = 9 + then 'celular' + when length(valor) = 10 + then 'ddd_fixo' + when length(valor) = 11 + then 'ddd_celular' + else null + end as valor_tipo, + length(valor) as len, + rank + from + ( + select + cpf, + 'telefone' as tipo, + telefones as valor_original, + {{ padronize_telefone("telefones") }} as valor, + row_number() over ( + partition by cpf order by updated_at desc + ) as rank + from + ( + select cpf, telefones, updated_at + from + smsrio_tb, + unnest( + split( + replace( + replace(replace(telefones, '[', ''), ']', ''), + '"', + '' + ), + ',' + ) + ) as telefones + ) + group by cpf, telefones, updated_at + ) + where not (trim(valor) in ("NONE", "NULL", "") and (rank >= 2)) + ), + -- CONTATO smsrio: Extracts and ranks email + smsrio_contato_email as ( + select + cpf, + tipo, + case + when trim(valor) in ("NONE", "NULL", "") then null else valor + end as valor, + rank + from + ( + select + cpf, + 'email' as tipo, + email as valor, + row_number() over ( + partition by cpf order by updated_at desc + ) as rank + from smsrio_tb + group by cpf, email, updated_at + ) + where not (trim(valor) in ("NONE", "NULL", "") and (rank >= 2)) + ), -telefone_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + telefone_dedup as ( + select cpf, + valor_original, + ddd, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + valor_tipo, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - valor, - rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_contato_telefone - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + from + ( + select + cpf, + valor_original, + ddd, + valor, + valor_tipo, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + valor_original, + ddd, + valor, + valor_tipo, + rank, + "smsrio" as sistema, + 2 as merge_order + from smsrio_contato_telefone + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -email_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + email_dedup as ( + select cpf, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - valor, - rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_contato_email - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -contato_dados AS ( - SELECT - COALESCE(t.cpf, e.cpf) AS cpf, - STRUCT( - ARRAY_AGG(STRUCT(t.valor, t.sistema,t.rank)) AS telefone, - ARRAY_AGG(STRUCT(e.valor, e.sistema, e.rank)) AS email - ) AS contato - FROM telefone_dedup t - FULL OUTER JOIN email_dedup e - ON t.cpf = e.cpf - GROUP BY COALESCE(t.cpf, e.cpf) -), + from + ( + select + cpf, + valor, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select cpf, valor, rank, "smsrio" as sistema, 2 as merge_order + from smsrio_contato_telefone + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), + contato_dados as ( + select + coalesce(t.cpf, e.cpf) as cpf, + struct( + array_agg( + struct( + t.valor_original, + t.ddd, + t.valor, + t.valor_tipo, + lower(t.sistema) as sistema, + t.rank + ) + ) as telefone, + array_agg( + struct(lower(e.valor) as valor, lower(e.sistema) as sistema, e.rank) + ) as email + ) as contato + from telefone_dedup t + full outer join email_dedup e on t.cpf = e.cpf + group by coalesce(t.cpf, e.cpf) + ), --- ENDEREÇO -smsrio_endereco AS ( - SELECT - cpf, - cep, - CASE - WHEN tipo_logradouro IN ("NONE","") THEN NULL - ELSE tipo_logradouro - END AS tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - CASE - WHEN cidade IN ("NONE","") THEN NULL - ELSE cidade - END AS cidade, - estado, - CAST(updated_at AS STRING) AS datahora_ultima_atualizacao, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM smsrio_tb - GROUP BY - cpf, cep, tipo_logradouro, logradouro, numero, complemento, bairro, cidade, estado, updated_at -), + -- ENDEREÇO + smsrio_endereco as ( + select + cpf, + cep, + case + when tipo_logradouro in ("NONE", "") then null else tipo_logradouro + end as tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + case when cidade in ("NONE", "") then null else cidade end as cidade, + estado, + cast(updated_at as string) as datahora_ultima_atualizacao, + row_number() over (partition by cpf order by updated_at desc) as rank + from smsrio_tb + group by + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + updated_at + ), -endereco_dedup AS ( - SELECT - cpf, - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + endereco_dedup as ( + select cpf, cep, tipo_logradouro, @@ -286,211 +343,253 @@ endereco_dedup AS ( cidade, estado, datahora_ultima_atualizacao, - merge_order, - rank, - ROW_NUMBER() OVER (PARTITION BY cpf, datahora_ultima_atualizacao ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_endereco - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -endereco_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - sistema, - rank - )) AS endereco - FROM endereco_dedup - GROUP BY cpf -), + from + ( + select + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + merge_order, + rank, + row_number() over ( + partition by cpf, datahora_ultima_atualizacao + order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + rank, + "smsrio" as sistema, + 2 as merge_order + from smsrio_endereco + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), --- PRONTUARIO -smsrio_prontuario AS ( - SELECT - cpf, - 'SMSRIO' AS sistema, - id_cnes, - id_paciente, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM smsrio_tb - GROUP BY - cpf, id_cnes,id_paciente, updated_at -), - -prontuario_dedup AS ( - SELECT - cpf, - sistema, - id_cnes, - id_paciente, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank - FROM ( - SELECT + endereco_dados as ( + select cpf, - sistema, - id_cnes, - id_paciente, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, id_cnes, id_paciente ORDER BY merge_order, rank ASC) AS dedup_rank - FROM ( - SELECT - vi.cpf, - "SMSRIO" AS sistema, - id_cnes, - id_paciente, - rank, - 2 AS merge_order - FROM smsrio_prontuario vi - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + array_agg( + struct( + cep, + lower(tipo_logradouro) as tipo_logradouro, + {{ proper_br("logradouro") }} as logradouro, + numero, + lower(complemento) as complemento, + {{ proper_br("bairro") }} as bairro, + {{ proper_br("cidade") }} as cidade, + lower(estado) as estado, + timestamp( + datahora_ultima_atualizacao + ) as datahora_ultima_atualizacao, + lower(sistema) as sistema, + rank + ) + ) as endereco + from endereco_dedup + group by cpf + ), -prontuario_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - sistema, - id_cnes, - id_paciente, - rank - )) AS prontuario - FROM prontuario_dedup - GROUP BY cpf -), + -- PRONTUARIO + smsrio_prontuario as ( + select + cpf, + 'smsrio' as sistema, + id_cnes, + id_paciente, + row_number() over (partition by cpf order by updated_at desc) as rank + from smsrio_tb + group by cpf, id_cnes, id_paciente, updated_at + ), + prontuario_dedup as ( + select + cpf, + sistema, + id_cnes, + id_paciente, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank + from + ( + select + cpf, + sistema, + id_cnes, + id_paciente, + rank, + merge_order, + row_number() over ( + partition by cpf, id_cnes, id_paciente + order by merge_order, rank asc + ) as dedup_rank + from + ( + select + vi.cpf, + "smsrio" as sistema, + id_cnes, + id_paciente, + rank, + 2 as merge_order + from smsrio_prontuario vi + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), --- PACIENTE DADOS -smsrio_paciente AS ( - SELECT - cpf, - cpf_valido_indicador, - {{proper_br('nome')}} AS nome, - {{proper_br('nome_social')}} AS nome_social, - data_nascimento, - CASE - WHEN genero = "1" THEN INITCAP("MASCULINO") - WHEN genero = "2" THEN INITCAP("FEMININO") - ELSE NULL - END AS genero, - CASE - WHEN raca IN ("NONE", "None", "NAO INFORMADO", "SEM INFORMACAO") THEN NULL - ELSE INITCAP(raca) - END AS raca, - CASE - WHEN obito_indicador = "0" THEN FALSE - WHEN obito_indicador = "1" THEN TRUE - ELSE NULL - END AS obito_indicador, - obito_data, - CASE - WHEN mae_nome IN ("NONE") THEN NULL - ELSE mae_nome - END AS mae_nome, - pai_nome, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at) AS rank - FROM smsrio_tb - GROUP BY - cpf, nome,nome_social, cpf, data_nascimento, genero, obito_indicador, obito_data, mae_nome, pai_nome, updated_at,cpf_valido_indicador, - CASE - WHEN raca IN ("NONE", "None", "NAO INFORMADO", "SEM INFORMACAO") THEN NULL - ELSE INITCAP(raca) - END -), + prontuario_dados as ( + select + cpf, + array_agg( + struct(lower(sistema) as sistema, id_cnes, id_paciente, rank) + ) as prontuario + from prontuario_dedup + group by cpf + ), -paciente_metadados AS ( - SELECT - cpf, - STRUCT( - -- count the distinct values for each field - COUNT(DISTINCT nome) AS qtd_nomes, - COUNT(DISTINCT nome_social) AS qtd_nomes_sociais, - COUNT(DISTINCT data_nascimento) AS qtd_datas_nascimento, - COUNT(DISTINCT genero) AS qtd_generos, - COUNT(DISTINCT raca) AS qtd_racas, - COUNT(DISTINCT obito_indicador) AS qtd_obitos_indicadores, - COUNT(DISTINCT obito_data) AS qtd_datas_obitos, - COUNT(DISTINCT mae_nome) AS qtd_maes_nomes, - COUNT(DISTINCT pai_nome) AS qtd_pais_nomes, - COUNT(DISTINCT cpf_valido_indicador) AS qtd_cpfs_validos, - "SMSRIO" AS sistema - ) AS metadados - FROM smsrio_paciente - GROUP BY cpf -), + -- PACIENTE DADOS + smsrio_paciente as ( + select + cpf, + cpf_valido_indicador, + {{ proper_br("nome") }} as nome, + {{ proper_br("nome_social") }} as nome_social, + data_nascimento, + case + when genero = "1" + then initcap("MASCULINO") + when genero = "2" + then initcap("FEMININO") + else null + end as genero, + case + when raca in ("NONE", "None", "NAO INFORMADO", "SEM INFORMACAO") + then null + else initcap(raca) + end as raca, + case + when obito_indicador = "0" + then false + when obito_indicador = "1" + then true + else null + end as obito_indicador, + obito_data, + case when mae_nome in ("NONE") then null else mae_nome end as mae_nome, + pai_nome, + row_number() over (partition by cpf order by updated_at) as rank + from smsrio_tb + group by + cpf, + nome, + nome_social, + cpf, + data_nascimento, + genero, + obito_indicador, + obito_data, + mae_nome, + pai_nome, + updated_at, + cpf_valido_indicador, + case + when raca in ("NONE", "None", "NAO INFORMADO", "SEM INFORMACAO") + then null + else initcap(raca) + end + ), -paciente_dados AS ( - SELECT - pc.cpf, - ARRAY_AGG(STRUCT( - cpf_valido_indicador, - nome, - nome_social, - data_nascimento, - genero, - raca, - obito_indicador, - obito_data, - mae_nome, - pai_nome, - rank, - pm.metadados - )) AS dados - FROM smsrio_paciente pc - JOIN paciente_metadados as pm - ON pc.cpf = pm.cpf - GROUP BY cpf -), + paciente_metadados as ( + select + cpf, + struct( + -- count the distinct values for each field + count(distinct nome) as qtd_nomes, + count(distinct nome_social) as qtd_nomes_sociais, + count(distinct data_nascimento) as qtd_datas_nascimento, + count(distinct genero) as qtd_generos, + count(distinct raca) as qtd_racas, + count(distinct obito_indicador) as qtd_obitos_indicadores, + count(distinct obito_data) as qtd_datas_obitos, + count(distinct mae_nome) as qtd_maes_nomes, + count(distinct pai_nome) as qtd_pais_nomes, + count(distinct cpf_valido_indicador) as qtd_cpfs_validos, + "smsrio" as sistema + ) as metadados + from smsrio_paciente + group by cpf + ), ----- FINAL JOIN: Joins all the data previously processed, creating the ----- integrated table of the patients. -paciente_integrado AS ( - SELECT - pd.cpf, - cns.cns, - pd.dados, - ct.contato, - ed.endereco, - pt.prontuario, - STRUCT(CURRENT_TIMESTAMP() AS created_at) AS metadados - FROM paciente_dados pd - LEFT JOIN cns_dados cns ON pd.cpf = cns.cpf - LEFT JOIN contato_dados ct ON pd.cpf = ct.cpf - LEFT JOIN endereco_dados ed ON pd.cpf = ed.cpf - LEFT JOIN prontuario_dados pt ON pd.cpf = pt.cpf -) + paciente_dados as ( + select + pc.cpf, + array_agg( + struct( + cpf_valido_indicador, + {{ proper_br("nome") }} as nome, + {{ proper_br("nome_social") }} as nome_social, + data_nascimento, + lower(genero) as genero, + lower(raca) as raca, + obito_indicador, + obito_data, + {{ proper_br("mae_nome") }} as mae_nome, + {{ proper_br("pai_nome") }} as pai_nome, + rank, + pm.metadados + ) + ) as dados + from smsrio_paciente pc + join paciente_metadados as pm on pc.cpf = pm.cpf + group by cpf + ), + -- -- FINAL JOIN: Joins all the data previously processed, creating the + -- -- integrated table of the patients. + paciente_integrado as ( + select + pd.cpf, + cns.cns, + pd.dados, + ct.contato, + ed.endereco, + pt.prontuario, + struct(current_timestamp() as created_at) as metadados + from paciente_dados pd + left join cns_dados cns on pd.cpf = cns.cpf + left join contato_dados ct on pd.cpf = ct.cpf + left join endereco_dados ed on pd.cpf = ed.cpf + left join prontuario_dados pt on pd.cpf = pt.cpf + ) -SELECT * FROM paciente_integrado \ No newline at end of file +select * +from paciente_integrado diff --git a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitacare.sql b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitacare.sql index 2b339b40..2ca94529 100644 --- a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitacare.sql +++ b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitacare.sql @@ -11,8 +11,8 @@ ) }} --- This code integrates patient data from VITACARE: --- rj-sms.brutos_prontuario_vitacare.paciente (VITACARE) +-- This code integrates patient data from vitacare: +-- rj-sms.brutos_prontuario_vitacare.paciente (vitacare) -- The goal is to consolidate information such as registration data, -- contact, address and medical record into a single view. -- Declaration of the variable to filter by CPF (optional) @@ -104,11 +104,32 @@ with order by merge_order asc, rank asc ), - cns_dados as ( - select cpf, array_agg(struct(cns, rank)) as cns from cns_dedup group by cpf - ), +cns_validated as ( + select + cns, + {{validate_cns('cns')}} as cns_valido_indicador, + from ( + select distinct cns from cns_dedup + ) +), + +cns_dados as ( + select + cpf, + array_agg( + struct( + cd.cns, + cv.cns_valido_indicador, + cd.rank + ) + ) as cns + from cns_dedup cd + join cns_validated cv + on cd.cns = cv.cns + group by cpf +), - -- EQUIPE SAUDE FAMILIA VITACARE: Extracts and ranks family health teams + -- EQUIPE SAUDE FAMILIA vitacare: Extracts and ranks family health teams -- clinica da familia source_clinica_familia as ( select * @@ -230,14 +251,41 @@ with select cpf, tipo, - case when trim(valor) in ("()", "") then null else valor end as valor, + valor_original, + case + when length(valor) in (10, 11) + then substr(valor, 1, 2) -- Keep only the first 2 digits (DDD) + else null + end as ddd, + case + when length(valor) in (8, 9) + then valor -- For numbers with 8 or 9 digits, keep the original value + when length(valor) = 10 + then substr(valor, 3, 8) -- Keep only the last 8 digits (discard the first 2) + when length(valor) = 11 + then substr(valor, 3, 9) -- Keep only the last 9 digits (discard the first 2) + else null + end as valor, + case + when length(valor) = 8 + then 'fixo' + when length(valor) = 9 + then 'celular' + when length(valor) = 10 + then 'ddd_fixo' + when length(valor) = 11 + then 'ddd_celular' + else null + end as valor_tipo, + length(valor) as len, rank from ( select cpf, 'telefone' as tipo, - telefone as valor, + telefone as valor_original, + {{ padronize_telefone("telefone") }} as valor, row_number() over ( partition by cpf order by @@ -290,7 +338,10 @@ with telefone_dedup as ( select cpf, + valor_original, + ddd, valor, + valor_tipo, row_number() over ( partition by cpf order by merge_order asc, rank asc ) as rank, @@ -299,7 +350,10 @@ with ( select cpf, + valor_original, + ddd, valor, + valor_tipo, rank, merge_order, row_number() over ( @@ -308,7 +362,15 @@ with sistema from ( - select cpf, valor, rank, "VITACARE" as sistema, 1 as merge_order + select + cpf, + valor_original, + ddd, + valor, + valor_tipo, + rank, + "vitacare" as sistema, + 1 as merge_order from vitacare_contato_telefone ) order by merge_order asc, rank asc @@ -338,7 +400,7 @@ with sistema from ( - select cpf, valor, rank, "VITACARE" as sistema, 1 as merge_order + select cpf, valor, rank, "vitacare" as sistema, 1 as merge_order from vitacare_contato_email ) order by merge_order asc, rank asc @@ -351,8 +413,19 @@ with select coalesce(t.cpf, e.cpf) as cpf, struct( - array_agg(struct(t.valor, t.sistema, t.rank)) as telefone, - array_agg(struct(e.valor, e.sistema, e.rank)) as email + array_agg( + struct( + t.valor_original, + t.ddd, + t.valor, + t.valor_tipo, + lower(t.sistema) as sistema, + t.rank + ) + ) as telefone, + array_agg( + struct(lower(e.valor) as valor, lower(e.sistema) as sistema, e.rank) + ) as email ) as contato from telefone_dedup t full outer join email_dedup e on t.cpf = e.cpf @@ -448,7 +521,7 @@ with estado, datahora_ultima_atualizacao, rank, - "VITACARE" as sistema, + "vitacare" as sistema, 1 as merge_order from vitacare_endereco ) @@ -464,15 +537,17 @@ with array_agg( struct( cep, - tipo_logradouro, - logradouro, + lower(tipo_logradouro) as tipo_logradouro, + {{ proper_br("logradouro") }} as logradouro, numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - sistema, + lower(complemento) as complemento, + {{ proper_br("bairro") }} as bairro, + {{ proper_br("cidade") }} as cidade, + lower(estado) as estado, + timestamp( + datahora_ultima_atualizacao + ) as datahora_ultima_atualizacao, + lower(sistema) as sistema, rank ) ) as endereco @@ -484,7 +559,7 @@ with vitacare_prontuario as ( select cpf, - 'VITACARE' as sistema, + 'vitacare' as sistema, id_cnes, id_paciente, row_number() over ( @@ -530,7 +605,7 @@ with ( select vi.cpf, - "VITACARE" as sistema, + "vitacare" as sistema, id_cnes, id_paciente, rank, @@ -544,13 +619,16 @@ with ), prontuario_dados as ( - select cpf, array_agg(struct(sistema, id_cnes, id_paciente, rank)) as prontuario + select + cpf, + array_agg( + struct(lower(sistema) as sistema, id_cnes, id_paciente, rank) + ) as prontuario from prontuario_dedup group by cpf ), -- PACIENTE DADOS - paciente_metadados as ( select cpf, @@ -566,7 +644,7 @@ with count(distinct mae_nome) as qtd_maes_nomes, count(distinct pai_nome) as qtd_pais_nomes, count(distinct cpf_valido_indicador) as qtd_cpfs_validos, - "VITACARE" as sistema + "vitacare" as sistema ) as metadados from paciente group by cpf @@ -581,8 +659,8 @@ with {{ proper_br("nome") }} as nome, {{ proper_br("nome_social") }} as nome_social, data_nascimento, - {{ proper_br("genero") }} as genero, - {{ proper_br("raca") }} as raca, + lower(genero) as genero, + lower(raca) as raca, obito_indicador, obito_data, {{ proper_br("mae_nome") }} as mae_nome, diff --git a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitai.sql b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitai.sql index af967d5c..f6a8f975 100644 --- a/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitai.sql +++ b/models/intermediate/historico_clinico/paciente/int_historico_clinico__paciente__vitai.sql @@ -2,291 +2,322 @@ config( alias="paciente_vitai", materialized="table", - schema="intermediario_historico_clinico" + schema="intermediario_historico_clinico", ) }} --- This code integrates patient data from VITAI: --- rj-sms.brutos_prontuario_vitai.paciente (VITAI) + +-- This code integrates patient data from vitai: +-- rj-sms.brutos_prontuario_vitai.paciente (vitai) -- The goal is to consolidate information such as registration data, -- contact, address and medical record into a single view. - -- Declaration of the variable to filter by CPF (optional) -- DECLARE cpf_filter STRING DEFAULT ""; - - ----=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- --- Get source data and standardize ----=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- - +-- -=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- +-- Get source data and standardize +-- -=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- -- Patient base table -WITH vitai_tb AS ( - SELECT - {{remove_accents_upper('cpf')}} AS cpf, - {{ validate_cpf(remove_accents_upper('cpf')) }} AS cpf_valido_indicador, - {{remove_accents_upper('cns')}} AS cns, - {{remove_accents_upper('nome')}} AS nome, - {{remove_accents_upper('telefone')}} AS telefone, - CAST("" AS STRING) AS email, - CAST(NULL AS STRING) AS cep, - {{remove_accents_upper('tipo_logradouro')}} AS tipo_logradouro, - {{remove_accents_upper('nome_logradouro')}} AS logradouro, - {{remove_accents_upper('numero')}} AS numero, - {{remove_accents_upper('complemento')}} AS complemento, - {{remove_accents_upper('bairro')}} AS bairro, - {{remove_accents_upper('municipio')}} AS cidade, - {{remove_accents_upper('uf')}} AS estado, - {{remove_accents_upper('gid')}} AS id_paciente, - {{remove_accents_upper('nome_alternativo')}} AS nome_social, - {{remove_accents_upper('sexo')}} AS genero, - {{remove_accents_upper('raca_cor')}} AS raca, - {{remove_accents_upper('nome_mae')}} AS mae_nome, - CAST(NULL AS STRING) AS pai_nome, - DATE(data_nascimento) AS data_nascimento, - DATE(data_obito) AS obito_data, - updated_at, - gid_estabelecimento AS id_cnes -- use gid to get id_cnes from rj-sms.brutos_prontuario_vitai.estabelecimento - FROM {{ref('raw_prontuario_vitai__paciente')}} -- `rj-sms-dev`.`brutos_prontuario_vitai`.`paciente` - WHERE {{validate_cpf('cpf')}} -), +with + vitai_tb as ( + select + {{ remove_accents_upper("cpf") }} as cpf, + {{ validate_cpf(remove_accents_upper("cpf")) }} as cpf_valido_indicador, + {{ remove_accents_upper("cns") }} as cns, + {{ remove_accents_upper("nome") }} as nome, + {{ remove_accents_upper("telefone") }} as telefone, + cast("" as string) as email, + cast(null as string) as cep, + {{ remove_accents_upper("tipo_logradouro") }} as tipo_logradouro, + {{ remove_accents_upper("nome_logradouro") }} as logradouro, + {{ remove_accents_upper("numero") }} as numero, + {{ remove_accents_upper("complemento") }} as complemento, + {{ remove_accents_upper("bairro") }} as bairro, + {{ remove_accents_upper("municipio") }} as cidade, + {{ remove_accents_upper("uf") }} as estado, + {{ remove_accents_upper("gid") }} as id_paciente, + {{ remove_accents_upper("nome_alternativo") }} as nome_social, + {{ remove_accents_upper("sexo") }} as genero, + {{ remove_accents_upper("raca_cor") }} as raca, + {{ remove_accents_upper("nome_mae") }} as mae_nome, + cast(null as string) as pai_nome, + date(data_nascimento) as data_nascimento, + date(data_obito) as obito_data, + updated_at, + gid_estabelecimento as id_cnes -- use gid to get id_cnes from rj-sms.brutos_prontuario_vitai.estabelecimento + from {{ ref("raw_prontuario_vitai__paciente") }} -- `rj-sms-dev`.`brutos_prontuario_vitai`.`paciente` + where {{ validate_cpf("cpf") }} + ), --- CNS -vitai_cns_ranked AS ( - SELECT - cpf, - cns, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM ( - SELECT + -- CNS + vitai_cns_ranked as ( + select cpf, - CASE - WHEN TRIM(cns) IN ('NONE') THEN NULL - ELSE TRIM(cns) - END AS cns, - updated_at - FROM vitai_tb - ) - WHERE - cns IS NOT NULL - AND TRIM(cns) NOT IN ("") - GROUP BY cpf, cns, updated_at -), + cns, + row_number() over (partition by cpf order by updated_at desc) as rank + from + ( + select + cpf, + case when trim(cns) in ('NONE') then null else trim(cns) end as cns, + updated_at + from vitai_tb + ) + where cns is not null and trim(cns) not in ("") + group by cpf, cns, updated_at + ), --- CNS Dados -cns_dedup AS ( - SELECT - cpf, - cns, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank - FROM( - SELECT + -- CNS Dados + cns_dedup as ( + select cpf, cns, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, cns ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank + from + ( + select + cpf, + cns, + rank, + merge_order, + row_number() over ( + partition by cpf, cns order by merge_order, rank asc + ) as dedup_rank, + from (select cpf, cns, rank, 3 as merge_order from vitai_cns_ranked) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), + cns_validated AS ( + SELECT + cns, + {{validate_cns('cns')}} AS cns_valido_indicador, FROM ( - SELECT - cpf, - cns, - rank, - 3 AS merge_order - FROM vitai_cns_ranked + SELECT DISTINCT cns FROM cns_dedup ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -cns_dados AS ( - SELECT - cpf, - ARRAY_AGG( - STRUCT( - cns, - rank - ) - ) AS cns - FROM cns_dedup - GROUP BY cpf -), - --- CONTATO TELEPHONE -vitai_contato_telefone AS ( - SELECT - cpf, - tipo, - CASE - WHEN TRIM(valor) IN ("()", "") THEN NULL - ELSE valor - END AS valor, - rank - FROM ( - SELECT + ), + cns_dados AS ( + SELECT cpf, - 'telefone' AS tipo, - telefone AS valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM vitai_tb - GROUP BY cpf, telefone, updated_at - ) - WHERE NOT (TRIM(valor) IN ("()", "") AND (rank >= 2)) -), + ARRAY_AGG( + STRUCT( + cd.cns, + cv.cns_valido_indicador, + cd.rank + ) + ) AS cns + FROM cns_dedup cd + JOIN cns_validated cv + ON cd.cns = cv.cns + GROUP BY cpf + ), + -- CONTATO TELEPHONE + vitai_contato_telefone as ( + select + cpf, + tipo, + valor_original, + case + when length(valor) in (10, 11) + then substr(valor, 1, 2) -- Keep only the first 2 digits (DDD) + else null + end as ddd, + case + when length(valor) in (8, 9) + then valor -- For numbers with 8 or 9 digits, keep the original value + when length(valor) = 10 + then substr(valor, 3, 8) -- Keep only the last 8 digits (discard the first 2) + when length(valor) = 11 + then substr(valor, 3, 9) -- Keep only the last 9 digits (discard the first 2) + else null + end as valor, + case + when length(valor) = 8 + then 'fixo' + when length(valor) = 9 + then 'celular' + when length(valor) = 10 + then 'ddd_fixo' + when length(valor) = 11 + then 'ddd_celular' + else null + end as valor_tipo, + length(valor) as len, + rank + from + ( + select + cpf, + 'telefone' as tipo, + telefone as valor_original, + {{ padronize_telefone("telefone") }} as valor, + row_number() over ( + partition by cpf order by updated_at desc + ) as rank + from vitai_tb + group by cpf, telefone, updated_at + ) + where not (trim(valor) in ("()", "") and (rank >= 2)) + ), --- CONTATO EMAIL -vitai_contato_email AS ( - SELECT - cpf, - tipo, - CASE - WHEN TRIM(valor) IN ("()", "") THEN NULL - ELSE valor - END AS valor, - rank - FROM ( - SELECT + -- CONTATO EMAIL + vitai_contato_email as ( + select cpf, - 'email' AS tipo, - email AS valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM vitai_tb - GROUP BY cpf, email, updated_at - ) - WHERE NOT (TRIM(valor) IN ("()", "") AND (rank >= 2)) -), + tipo, + case when trim(valor) in ("()", "") then null else valor end as valor, + rank + from + ( + select + cpf, + 'email' as tipo, + email as valor, + row_number() over ( + partition by cpf order by updated_at desc + ) as rank + from vitai_tb + group by cpf, email, updated_at + ) + where not (trim(valor) in ("()", "") and (rank >= 2)) + ), -telefone_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + telefone_dedup as ( + select cpf, + valor_original, + ddd, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + valor_tipo, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - valor, - rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_contato_telefone - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + from + ( + select + cpf, + valor_original, + ddd, + valor, + valor_tipo, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + valor_original, + ddd, + valor, + valor_tipo, + rank, + "vitai" as sistema, + 3 as merge_order + from vitai_contato_telefone + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -email_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + email_dedup as ( + select cpf, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - valor, - rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_contato_email - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + from + ( + select + cpf, + valor, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select cpf, valor, rank, "vitai" as sistema, 3 as merge_order + from vitai_contato_email + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -contato_dados AS ( - SELECT - COALESCE(t.cpf, e.cpf) AS cpf, - STRUCT( - ARRAY_AGG(STRUCT(t.valor, t.sistema,t.rank)) AS telefone, - ARRAY_AGG(STRUCT(e.valor, e.sistema, e.rank)) AS email - ) AS contato - FROM telefone_dedup t - FULL OUTER JOIN email_dedup e - ON t.cpf = e.cpf - GROUP BY COALESCE(t.cpf, e.cpf) -), + contato_dados as ( + select + coalesce(t.cpf, e.cpf) as cpf, + struct( + array_agg( + struct( + t.valor_original, + t.ddd, + t.valor, + t.valor_tipo, + lower(t.sistema) as sistema, + t.rank + ) + ) as telefone, + array_agg( + struct(lower(e.valor) as valor, lower(e.sistema) as sistema, e.rank) + ) as email + ) as contato + from telefone_dedup t + full outer join email_dedup e on t.cpf = e.cpf + group by coalesce(t.cpf, e.cpf) + ), --- ENDEREÇO -vitai_endereco AS ( - SELECT - cpf, - CASE - WHEN cep in ("NONE") THEN NULL - ELSE cep - END AS cep, - CASE - WHEN tipo_logradouro in ("NONE") THEN NULL - ELSE tipo_logradouro - END AS tipo_logradouro, - CASE - WHEN logradouro in ("NONE") THEN NULL - ELSE logradouro - END AS logradouro, - CASE - WHEN numero in ("NONE") THEN NULL - ELSE numero - END AS numero, - CASE - WHEN complemento in ("NONE") THEN NULL - ELSE complemento - END AS complemento, - CASE - WHEN bairro in ("NONE") THEN NULL - ELSE bairro - END AS bairro, - CASE - WHEN cidade in ("NONE") THEN NULL - ELSE cidade - END AS cidade, - CASE - WHEN estado in ("NONE") THEN NULL - ELSE estado - END AS estado, - CAST(updated_at AS STRING) AS datahora_ultima_atualizacao, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM vitai_tb - WHERE logradouro IS NOT NULL - GROUP BY - cpf,cep, tipo_logradouro, logradouro, numero, complemento, bairro, cidade, estado, updated_at -), + -- ENDEREÇO + vitai_endereco as ( + select + cpf, + case when cep in ("NONE") then null else cep end as cep, + case + when tipo_logradouro in ("NONE") then null else tipo_logradouro + end as tipo_logradouro, + case + when logradouro in ("NONE") then null else logradouro + end as logradouro, + case when numero in ("NONE") then null else numero end as numero, + case + when complemento in ("NONE") then null else complemento + end as complemento, + case when bairro in ("NONE") then null else bairro end as bairro, + case when cidade in ("NONE") then null else cidade end as cidade, + case when estado in ("NONE") then null else estado end as estado, + cast(updated_at as string) as datahora_ultima_atualizacao, + row_number() over (partition by cpf order by updated_at desc) as rank + from vitai_tb + where logradouro is not null + group by + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + updated_at + ), -endereco_dedup AS ( - SELECT - cpf, - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + endereco_dedup as ( + select cpf, cep, tipo_logradouro, @@ -297,226 +328,257 @@ endereco_dedup AS ( cidade, estado, datahora_ultima_atualizacao, - merge_order, - rank, - ROW_NUMBER() OVER (PARTITION BY cpf, datahora_ultima_atualizacao ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_endereco - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -endereco_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - sistema, - rank - )) AS endereco - FROM endereco_dedup - GROUP BY cpf -), - --- PRONTUARIO -vitai_prontuario AS ( - SELECT - cpf, - 'VITAI' AS sistema, - id_cnes, - id_paciente, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at DESC) AS rank - FROM( - SELECT - pc.updated_at, - pc.cpf, - pc.id_paciente, - es.cnes AS id_cnes, - FROM vitai_tb pc - JOIN {{ ref('raw_prontuario_vitai__m_estabelecimento') }} es - ON pc.id_cnes = es.gid - ) - GROUP BY - cpf, id_cnes, id_paciente, updated_at -), + from + ( + select + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + merge_order, + rank, + row_number() over ( + partition by cpf, datahora_ultima_atualizacao + order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + rank, + "vitai" as sistema, + 3 as merge_order + from vitai_endereco + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -prontuario_dedup AS ( - SELECT - cpf, - sistema, - id_cnes, - id_paciente, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank - FROM ( - SELECT + endereco_dados as ( + select cpf, - sistema, - id_cnes, - id_paciente, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, id_cnes, id_paciente ORDER BY merge_order, rank ASC) AS dedup_rank - FROM ( - SELECT - vi.cpf, - "VITAI" AS sistema, - id_cnes, - id_paciente, - rank, - 3 AS merge_order - FROM vitai_prontuario vi - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -prontuario_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - sistema, - id_cnes, - id_paciente, - rank - )) AS prontuario - FROM prontuario_dedup - GROUP BY cpf -), + array_agg( + struct( + cep, + lower(tipo_logradouro) as tipo_logradouro, + {{ proper_br("logradouro") }} as logradouro, + numero, + lower(complemento) as complemento, + {{ proper_br("bairro") }} as bairro, + {{ proper_br("cidade") }} as cidade, + lower(estado) as estado, + timestamp( + datahora_ultima_atualizacao + ) as datahora_ultima_atualizacao, + lower(sistema) as sistema, + rank + ) + ) as endereco + from endereco_dedup + group by cpf + ), + -- PRONTUARIO + vitai_prontuario as ( + select + cpf, + 'vitai' as sistema, + id_cnes, + id_paciente, + row_number() over (partition by cpf order by updated_at desc) as rank + from + ( + select pc.updated_at, pc.cpf, pc.id_paciente, es.cnes as id_cnes, + from vitai_tb pc + join + {{ ref("raw_prontuario_vitai__m_estabelecimento") }} es + on pc.id_cnes = es.gid + ) + group by cpf, id_cnes, id_paciente, updated_at + ), --- PACIENTE DADOS -vitai_paciente AS ( - SELECT - cpf, - cpf_valido_indicador, - {{proper_br('nome')}} AS nome, - {{proper_br('nome_social')}} AS nome_social, - data_nascimento, - CASE - WHEN genero = "M" THEN INITCAP("MASCULINO") - WHEN genero = "F" THEN INITCAP("FEMININO") - ELSE NULL - END AS genero, - CASE - WHEN raca IN ("NONE", "NAO INFORMADO", "SEM INFORMACAO") THEN NULL - WHEN raca IN ("PRETO","NEGRO") THEN INITCAP("PRETA") - ELSE INITCAP(raca) - END AS raca, - CASE - WHEN obito_data IS NOT NULL THEN TRUE - ELSE NULL - END AS obito_indicador, - obito_data, - CASE - WHEN mae_nome IN ("NONE") THEN NULL - ELSE mae_nome - END AS mae_nome, - pai_nome, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY updated_at) AS rank - FROM vitai_tb - GROUP BY - cpf, pai_nome, nome, nome_social, data_nascimento, genero, obito_data, mae_nome, updated_at, cpf_valido_indicador, - CASE - WHEN obito_data IS NOT NULL THEN TRUE - ELSE NULL - END, - CASE - WHEN raca IN ("NONE", "NAO INFORMADO", "SEM INFORMACAO") THEN NULL - WHEN raca IN ("PRETO","NEGRO") THEN INITCAP("PRETA") - ELSE INITCAP(raca) - END -), + prontuario_dedup as ( + select + cpf, + sistema, + id_cnes, + id_paciente, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank + from + ( + select + cpf, + sistema, + id_cnes, + id_paciente, + rank, + merge_order, + row_number() over ( + partition by cpf, id_cnes, id_paciente + order by merge_order, rank asc + ) as dedup_rank + from + ( + select + vi.cpf, + "vitai" as sistema, + id_cnes, + id_paciente, + rank, + 3 as merge_order + from vitai_prontuario vi + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), + prontuario_dados as ( + select + cpf, + array_agg( + struct(lower(sistema) as sistema, id_cnes, id_paciente, rank) + ) as prontuario + from prontuario_dedup + group by cpf + ), -paciente_metadados AS ( - SELECT - cpf, - STRUCT( - -- count the distinct values for each field - COUNT(DISTINCT nome) AS qtd_nomes, - COUNT(DISTINCT nome_social) AS qtd_nomes_sociais, - COUNT(DISTINCT data_nascimento) AS qtd_datas_nascimento, - COUNT(DISTINCT genero) AS qtd_generos, - COUNT(DISTINCT raca) AS qtd_racas, - COUNT(DISTINCT obito_indicador) AS qtd_obitos_indicadores, - COUNT(DISTINCT obito_data) AS qtd_datas_obitos, - COUNT(DISTINCT mae_nome) AS qtd_maes_nomes, - COUNT(DISTINCT pai_nome) AS qtd_pais_nomes, - COUNT(DISTINCT cpf_valido_indicador) AS qtd_cpfs_validos, - "VITAI" AS sistema - ) AS metadados - FROM vitai_paciente - GROUP BY cpf -), + -- PACIENTE DADOS + vitai_paciente as ( + select + cpf, + cpf_valido_indicador, + {{ proper_br("nome") }} as nome, + {{ proper_br("nome_social") }} as nome_social, + data_nascimento, + case + when genero = "M" + then initcap("MASCULINO") + when genero = "F" + then initcap("FEMININO") + else null + end as genero, + case + when raca in ("NONE", "NAO INFORMADO", "SEM INFORMACAO") + then null + when raca in ("PRETO", "NEGRO") + then initcap("PRETA") + else initcap(raca) + end as raca, + case when obito_data is not null then true else null end as obito_indicador, + obito_data, + case when mae_nome in ("NONE") then null else mae_nome end as mae_nome, + pai_nome, + row_number() over (partition by cpf order by updated_at) as rank + from vitai_tb + group by + cpf, + pai_nome, + nome, + nome_social, + data_nascimento, + genero, + obito_data, + mae_nome, + updated_at, + cpf_valido_indicador, + case when obito_data is not null then true else null end, + case + when raca in ("NONE", "NAO INFORMADO", "SEM INFORMACAO") + then null + when raca in ("PRETO", "NEGRO") + then initcap("PRETA") + else initcap(raca) + end + ), -paciente_dados AS ( - SELECT - pc.cpf, - ARRAY_AGG(STRUCT( - cpf_valido_indicador, - {{ proper_br("nome") }} as nome, - {{ proper_br("nome_social") }} as nome_social, - data_nascimento, - {{ proper_br("genero") }} as genero, - {{ proper_br("raca") }} as raca, - obito_indicador, - obito_data, - {{ proper_br("mae_nome") }} as mae_nome, - {{ proper_br("pai_nome") }} as pai_nome, - rank, - pm.metadados - )) AS dados - FROM vitai_paciente pc - JOIN paciente_metadados as pm - ON pc.cpf = pm.cpf - GROUP BY cpf -), + paciente_metadados as ( + select + cpf, + struct( + -- count the distinct values for each field + count(distinct nome) as qtd_nomes, + count(distinct nome_social) as qtd_nomes_sociais, + count(distinct data_nascimento) as qtd_datas_nascimento, + count(distinct genero) as qtd_generos, + count(distinct raca) as qtd_racas, + count(distinct obito_indicador) as qtd_obitos_indicadores, + count(distinct obito_data) as qtd_datas_obitos, + count(distinct mae_nome) as qtd_maes_nomes, + count(distinct pai_nome) as qtd_pais_nomes, + count(distinct cpf_valido_indicador) as qtd_cpfs_validos, + "vitai" as sistema + ) as metadados + from vitai_paciente + group by cpf + ), ----- FINAL JOIN: Joins all the data previously processed, creating the ----- integrated table of the patients. -paciente_integrado AS ( - SELECT - pd.cpf, - cns.cns, - pd.dados, - ct.contato, - ed.endereco, - pt.prontuario, - STRUCT(CURRENT_TIMESTAMP() AS created_at) AS metadados - FROM paciente_dados pd - LEFT JOIN cns_dados cns ON pd.cpf = cns.cpf - LEFT JOIN contato_dados ct ON pd.cpf = ct.cpf - LEFT JOIN endereco_dados ed ON pd.cpf = ed.cpf - LEFT JOIN prontuario_dados pt ON pd.cpf = pt.cpf -) + paciente_dados as ( + select + pc.cpf, + array_agg( + struct( + cpf_valido_indicador, + {{ proper_br("nome") }} as nome, + {{ proper_br("nome_social") }} as nome_social, + data_nascimento, + lower(genero) as genero, + lower(raca) as raca, + obito_indicador, + obito_data, + {{ proper_br("mae_nome") }} as mae_nome, + {{ proper_br("pai_nome") }} as pai_nome, + rank, + pm.metadados + ) + ) as dados + from vitai_paciente pc + join paciente_metadados as pm on pc.cpf = pm.cpf + group by cpf + ), + -- -- FINAL JOIN: Joins all the data previously processed, creating the + -- -- integrated table of the patients. + paciente_integrado as ( + select + pd.cpf, + cns.cns, + pd.dados, + ct.contato, + ed.endereco, + pt.prontuario, + struct(current_timestamp() as created_at) as metadados + from paciente_dados pd + left join cns_dados cns on pd.cpf = cns.cpf + left join contato_dados ct on pd.cpf = ct.cpf + left join endereco_dados ed on pd.cpf = ed.cpf + left join prontuario_dados pt on pd.cpf = pt.cpf + ) -SELECT * FROM paciente_integrado \ No newline at end of file +select * +from paciente_integrado diff --git a/models/marts/gerenciamento/farmacia_digital/gerenciamento__farmacia__processamento_estatisticas.sql b/models/marts/gerenciamento/farmacia_digital/gerenciamento__farmacia__processamento_estatisticas.sql new file mode 100644 index 00000000..a2317bae --- /dev/null +++ b/models/marts/gerenciamento/farmacia_digital/gerenciamento__farmacia__processamento_estatisticas.sql @@ -0,0 +1,136 @@ +{{ + config( + schema="gerenciamento__monitoramento", + alias="estatisticas_farmacia", + materialized="incremental", + unique_key="id", + ) +}} + +{% set seven_days_ago = ( + modules.datetime.date.today() - modules.datetime.timedelta(days=7) +).isoformat() %} + +with + -- ################################################## + -- VITAI + -- ################################################## + vitai_estoque as ( + select distinct + cnes as unidade_cnes, + 'vitai' as fonte, + 'posicao' as tipo, + data_particao as data_atualizacao + from {{ source("brutos_prontuario_vitai_staging", "estoque_posicao") }} + {% if is_incremental() %} + where data_particao > '{{seven_days_ago}}' + {% endif %} + union all + select distinct + cnes as unidade_cnes, + 'vitai' as fonte, + 'movimento' as tipo, + data_particao as data_atualizacao + from {{ source("brutos_prontuario_vitai_staging", "estoque_movimento") }} + {% if is_incremental() %} + where data_particao > '{{seven_days_ago}}' + {% endif %} + ), + unidades_vitai as ( + select + area_programatica as unidade_ap, + id_cnes as unidade_cnes, + nome_limpo as unidade_nome + from {{ref('dim_estabelecimento')}} + where prontuario_versao = 'vitai' and prontuario_estoque_tem_dado = 'sim' + ), + vitai_agrupado as ( + select + fonte, + tipo, + data_atualizacao, + count(unidade_cnes) as quant_unidades_com_dado, + array_agg(unidade_cnes) as unidades_com_dado, + from vitai_estoque + group by 1, 2, 3 + ), + vitai_agrupado_com_unidades as ( + select + * except(unidades_com_dado), + array( + select as struct * + from unidades_vitai where unidade_cnes not in unnest(unidades_com_dado) + ) as unidades_sem_dado + from vitai_agrupado + ), + + -- ################################################## + -- VITACARE + -- ################################################## + vitacare_estoque as ( + select distinct + cnesUnidade as unidade_cnes, + 'vitacare' as fonte, + 'posicao' as tipo, + data_particao as data_atualizacao + from {{ source("brutos_prontuario_vitacare_staging", "estoque_posicao") }} + {% if is_incremental() %} + where data_particao > '{{seven_days_ago}}' + {% endif %} + union all + select distinct + cnesUnidade as unidade_cnes, + 'vitacare' as fonte, + 'movimento' as tipo, + data_particao as data_atualizacao + from {{ source("brutos_prontuario_vitacare_staging", "estoque_movimento") }} + {% if is_incremental() %} + where data_particao > '{{seven_days_ago}}' + {% endif %} + ), + unidades_vitacare as ( + select + area_programatica as unidade_ap, + id_cnes as unidade_cnes, + nome_limpo as unidade_nome + from {{ref('dim_estabelecimento')}} + where prontuario_versao = 'vitacare' and prontuario_estoque_tem_dado = 'sim' + ), + vitacare_agrupado as ( + select + fonte, + tipo, + data_atualizacao, + count(unidade_cnes) as quant_unidades_com_dado, + array_agg(unidade_cnes) as unidades_com_dado, + from vitacare_estoque + group by 1, 2, 3 + ), + vitacare_agrupado_com_unidades as ( + select + * except(unidades_com_dado), + array( + select as struct * + from unidades_vitacare where unidade_cnes not in unnest(unidades_com_dado) + ) as unidades_sem_dado + from vitacare_agrupado + ), + + -- ################################################## + -- JUNTANDO + -- ################################################## + unioned as ( + select * from vitai_agrupado_com_unidades + union all + select * from vitacare_agrupado_com_unidades + ), + with_key as ( + select + concat(data_atualizacao, '.', fonte, '.', tipo) as id, + * + from unioned + ) +select + * +from with_key +order by id \ No newline at end of file diff --git a/models/marts/historico_clinico/mart_historico_clinico__paciente.sql b/models/marts/historico_clinico/mart_historico_clinico__paciente.sql index 47095919..2cda81a0 100644 --- a/models/marts/historico_clinico/mart_historico_clinico__paciente.sql +++ b/models/marts/historico_clinico/mart_historico_clinico__paciente.sql @@ -13,322 +13,304 @@ }} -- This code integrates patient data from three sources: --- rj-sms.brutos_prontuario_vitacare.paciente (VITACARE) --- rj-sms.brutos_plataforma_vitai.paciente (VITAI) --- rj-sms.brutos_plataforma_smsrio.paciente (SMSRIO) +-- rj-sms.brutos_prontuario_vitacare.paciente (vitacare) +-- rj-sms.brutos_plataforma_vitai.paciente (vitai) +-- rj-sms.brutos_plataforma_smsrio.paciente (smsrio) -- The goal is to consolidate information such as registration data, -- contact, address and medical record into a single view. --- dbt run --select int_historico_clinico__paciente__vitacare int_historico_clinico__paciente__smsrio int_historico_clinico__paciente__vitai mart_historico_clinico__paciente mart_historico_clinico__paciente_suspeitos - +-- dbt run --select int_historico_clinico__paciente__vitacare +-- int_historico_clinico__paciente__smsrio int_historico_clinico__paciente__vitai +-- mart_historico_clinico__paciente +-- mart_historico_clinico__paciente_suspeitos -- Declaration of the variable to filter by CPF (optional) -- DECLARE cpf_filter STRING DEFAULT ""; - --- VITACARE: Patient base table -WITH vitacare_tb AS ( - SELECT - cpf, - cns, - dados.nome, - dados.cpf_valido_indicador, - dados.nome_social, - dados.data_nascimento, - dados.genero, - dados.raca, - dados.obito_indicador, - dados.obito_data, - dados.mae_nome, - dados.pai_nome, - dados.metadados, - equipe_saude_familia, - contato, - endereco, - prontuario - FROM {{ ref('int_historico_clinico__paciente__vitacare') }}, - UNNEST(dados) AS dados - WHERE dados.rank=1 +-- vitacare: Patient base table +with + vitacare_tb as ( + select + cpf, + cns, + dados.nome, + dados.cpf_valido_indicador, + dados.nome_social, + dados.data_nascimento, + dados.genero, + dados.raca, + dados.obito_indicador, + dados.obito_data, + dados.mae_nome, + dados.pai_nome, + dados.metadados, + equipe_saude_familia, + contato, + endereco, + prontuario + from + {{ ref("int_historico_clinico__paciente__vitacare") }}, + unnest(dados) as dados + where dados.rank = 1 -- AND cpf = cpf_filter -), --- VITAI: Deceased base table -base_obitos_vitai as ( - select - * - from {{ ref('int_historico_clinico__obito__vitai') }} + ), + -- vitai: Deceased base table + base_obitos_vitai as ( + select * from {{ ref("int_historico_clinico__obito__vitai") }} ), --- VITAI: Patient base table -vitai_tb AS ( - SELECT - cpf, - cns, - dados.nome, - dados.cpf_valido_indicador, - dados.nome_social, - dados.data_nascimento, - dados.genero, - dados.raca, - dados.obito_indicador, - dados.obito_data, - dados.mae_nome, - dados.pai_nome, - dados.metadados, - contato, - endereco, - prontuario - FROM {{ ref('int_historico_clinico__paciente__vitai') }}, - UNNEST(dados) AS dados - WHERE dados.rank=1 + -- vitai: Patient base table + vitai_tb as ( + select + cpf, + cns, + dados.nome, + dados.cpf_valido_indicador, + dados.nome_social, + dados.data_nascimento, + dados.genero, + dados.raca, + dados.obito_indicador, + dados.obito_data, + dados.mae_nome, + dados.pai_nome, + dados.metadados, + contato, + endereco, + prontuario + from {{ ref("int_historico_clinico__paciente__vitai") }}, unnest(dados) as dados + where dados.rank = 1 -- AND cpf = cpf_filter -), + ), --- SMSRIO: Patient base table -smsrio_tb AS ( - SELECT - cpf, - cns, - dados.nome, - dados.cpf_valido_indicador, - dados.nome_social, - dados.data_nascimento, - dados.genero, - dados.raca, - dados.obito_indicador, - dados.obito_data, - dados.mae_nome, - dados.pai_nome, - dados.metadados, - contato, - endereco, - prontuario - FROM {{ ref("int_historico_clinico__paciente__smsrio") }}, - UNNEST(dados) AS dados - WHERE dados.rank=1 + -- smsrio: Patient base table + smsrio_tb as ( + select + cpf, + cns, + dados.nome, + dados.cpf_valido_indicador, + dados.nome_social, + dados.data_nascimento, + dados.genero, + dados.raca, + dados.obito_indicador, + dados.obito_data, + dados.mae_nome, + dados.pai_nome, + dados.metadados, + contato, + endereco, + prontuario + from + {{ ref("int_historico_clinico__paciente__smsrio") }}, unnest(dados) as dados + where dados.rank = 1 -- AND cpf = cpf_filter -), - ----=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- --- Merge data from different sources ----=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- + ), --- CNS Dados: Merges CNS data, grouping by patient --- UNION 1. Vitacare | 2. Vitai | 3. SMSRIO -cns_dedup AS ( - SELECT - cpf, - cns, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - merge_order, - sistema - FROM( - SELECT + -- -=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- + -- Merge data from different sources + -- -=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- + -- CNS Dados: Merges CNS data, grouping by patient + -- UNION 1. Vitacare | 2. Vitai | 3. smsrio + cns_dedup as ( + select cpf, cns, - rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, cns ORDER BY merge_order, rank ASC) AS dedup_rank, sistema - FROM ( - SELECT - cpf, - cns.cns AS cns, - cns.rank AS rank, - "VITACARE" AS sistema, - 1 AS merge_order - FROM vitacare_tb, - UNNEST(cns) AS cns - UNION ALL - SELECT - cpf, - cns.cns AS cns, - cns.rank AS rank, - "VITAI" AS sistema, - 2 AS merge_order - FROM vitai_tb, - UNNEST(cns) AS cns - UNION ALL - SELECT - cpf, - cns.cns AS cns, - cns.rank AS rank, - "SMSRIO" AS sistema, - 3 AS merge_order - FROM smsrio_tb, - UNNEST(cns) AS cns - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -cns_contagem AS ( - SELECT - cpf, - CASE - WHEN cc.cpf_count > 1 THEN NULL - ELSE cd.cns - END AS cns - FROM cns_dedup cd - LEFT JOIN ( - SELECT - cns, - COUNT(DISTINCT cpf) AS cpf_count - FROM cns_dedup - GROUP BY cns - ) AS cc - ON cd.cns = cc.cns - ORDER BY merge_order ASC, rank ASC -), - -cns_dados AS ( - SELECT - cpf, - ARRAY_AGG( - cns - ) AS cns - FROM cns_contagem - WHERE cns IS NOT NULL - GROUP BY cpf -), + from + ( + select + cpf, + cns, + rank, + merge_order, + row_number() over ( + partition by cpf, cns order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + cns.cns as cns, + cns.rank as rank, + "vitacare" as sistema, + 1 as merge_order + from vitacare_tb, unnest(cns) as cns + where cns.cns_valido_indicador is true + union all + select + cpf, + cns.cns as cns, + cns.rank as rank, + "vitai" as sistema, + 2 as merge_order + from vitai_tb, unnest(cns) as cns + where cns.cns_valido_indicador is true + union all + select + cpf, + cns.cns as cns, + cns.rank as rank, + "smsrio" as sistema, + 3 as merge_order + from smsrio_tb, unnest(cns) as cns + where cns.cns_valido_indicador is true + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), + cns_contagem as ( + select cpf, case when cc.cpf_count > 1 then null else cd.cns end as cns + from cns_dedup cd + left join + ( + select cns, count(distinct cpf) as cpf_count from cns_dedup group by cns + ) as cc + on cd.cns = cc.cns + order by merge_order asc, rank asc + ), + cns_dados as ( + select cpf, array_agg(cns) as cns + from cns_contagem + where cns is not null + group by cpf + ), --- Equipe Saude Familia Dados: Groups family health team data by patient. --- ONLY VITACARE -equipe_saude_familia_dados AS ( - SELECT - cpf, - equipe_saude_familia - FROM vitacare_tb -), + -- Equipe Saude Familia Dados: Groups family health team data by patient. + -- ONLY vitacare + equipe_saude_familia_dados as (select cpf, equipe_saude_familia from vitacare_tb), --- Contato Dados: Merges contact data --- UNION: 1. Vitacare | 2. SMSRIO | 3. Vitai -telefone_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + -- Contato Dados: Merges contact data + -- UNION: 1. Vitacare | 2. smsrio | 3. Vitai + telefone_dedup as ( + select cpf, + ddd, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - telefone.valor, - telefone.rank, - "VITACARE" AS sistema, - 1 AS merge_order - FROM vitacare_tb, - UNNEST(contato.telefone) AS telefone -- Expandindo os elementos da array struct de telefone - UNION ALL - SELECT - cpf, - telefone.valor, - telefone.rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_tb, - UNNEST(contato.telefone) AS telefone - UNION ALL - SELECT - cpf, - telefone.valor, - telefone.rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_tb, - UNNEST(contato.telefone) AS telefone - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + from + ( + select + cpf, + ddd, + valor, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + telefone.ddd, + telefone.valor, + telefone.rank, + "vitacare" as sistema, + 1 as merge_order + from vitacare_tb, unnest(contato.telefone) as telefone -- Expandindo os elementos da array struct de telefone + union all + select + cpf, + telefone.ddd, + telefone.valor, + telefone.rank, + "smsrio" as sistema, + 2 as merge_order + from smsrio_tb, unnest(contato.telefone) as telefone + union all + select + cpf, + telefone.ddd, + telefone.valor, + telefone.rank, + "vitai" as sistema, + 3 as merge_order + from vitai_tb, unnest(contato.telefone) as telefone + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -email_dedup AS ( - SELECT - cpf, - valor, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + email_dedup as ( + select cpf, valor, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, valor ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - email.valor, - email.rank, - "VITACARE" AS sistema, - 1 AS merge_order - FROM vitacare_tb, - UNNEST(contato.email) AS email -- Expandindo os elementos da array struct de email - UNION ALL - SELECT - cpf, - email.valor, - email.rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_tb, - UNNEST(contato.email) AS email - UNION ALL - SELECT - cpf, - email.valor, - email.rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_tb, - UNNEST(contato.email) AS email - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), + from + ( + select + cpf, + valor, + rank, + merge_order, + row_number() over ( + partition by cpf, valor order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + email.valor, + email.rank, + "vitacare" as sistema, + 1 as merge_order + from vitacare_tb, unnest(contato.email) as email -- Expandindo os elementos da array struct de email + union all + select + cpf, + email.valor, + email.rank, + "smsrio" as sistema, + 2 as merge_order + from smsrio_tb, unnest(contato.email) as email + union all + select + cpf, + email.valor, + email.rank, + "vitai" as sistema, + 3 as merge_order + from vitai_tb, unnest(contato.email) as email + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), -contato_dados AS ( - SELECT - COALESCE(t.cpf, e.cpf) AS cpf, - STRUCT( - ARRAY_AGG(STRUCT(t.valor, t.sistema,t.rank)) AS telefone, - ARRAY_AGG(STRUCT(e.valor, e.sistema, e.rank)) AS email - ) AS contato - FROM telefone_dedup t - FULL OUTER JOIN email_dedup e - ON t.cpf = e.cpf - GROUP BY COALESCE(t.cpf, e.cpf) -), + contato_dados as ( + select + coalesce(t.cpf, e.cpf) as cpf, + struct( + array_agg(struct(t.ddd, t.valor, t.sistema, t.rank)) as telefone, + array_agg(struct(e.valor, e.sistema, e.rank)) as email + ) as contato + from telefone_dedup t + full outer join email_dedup e on t.cpf = e.cpf + group by coalesce(t.cpf, e.cpf) + ), --- Endereco Dados: Merges address information --- UNION: 1. Vitacare | 2. SMSRIO | 3. Vitai -endereco_dedup AS ( - SELECT - cpf, - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank, - sistema - FROM ( - SELECT + -- Endereco Dados: Merges address information + -- UNION: 1. Vitacare | 2. smsrio | 3. Vitai + endereco_dedup as ( + select cpf, cep, tipo_logradouro, @@ -339,281 +321,326 @@ endereco_dedup AS ( cidade, estado, datahora_ultima_atualizacao, - merge_order, - rank, - ROW_NUMBER() OVER (PARTITION BY cpf, datahora_ultima_atualizacao ORDER BY merge_order, rank ASC) AS dedup_rank, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank, sistema - FROM ( - SELECT - cpf, - endereco.cep, - endereco.tipo_logradouro, - endereco.logradouro, - endereco.numero, - endereco.complemento, - endereco.bairro, - endereco.cidade, - endereco.estado, - endereco.datahora_ultima_atualizacao, - endereco.rank, - "VITACARE" AS sistema, - 1 AS merge_order - FROM vitacare_tb, - UNNEST(endereco) AS endereco -- Expandindo os elementos da array struct de endereço - UNION ALL - SELECT - cpf, - endereco.cep, - endereco.tipo_logradouro, - endereco.logradouro, - endereco.numero, - endereco.complemento, - endereco.bairro, - endereco.cidade, - endereco.estado, - endereco.datahora_ultima_atualizacao, - endereco.rank, - "SMSRIO" AS sistema, - 2 AS merge_order - FROM smsrio_tb, - UNNEST(endereco) AS endereco - UNION ALL - SELECT - cpf, - endereco.cep, - endereco.tipo_logradouro, - endereco.logradouro, - endereco.numero, - endereco.complemento, - endereco.bairro, - endereco.cidade, - endereco.estado, - endereco.datahora_ultima_atualizacao, - endereco.rank, - "VITAI" AS sistema, - 3 AS merge_order - FROM vitai_tb, - UNNEST(endereco) AS endereco - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 + from + ( + select + cpf, + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + merge_order, + rank, + row_number() over ( + partition by cpf, datahora_ultima_atualizacao + order by merge_order, rank asc + ) as dedup_rank, + sistema + from + ( + select + cpf, + endereco.cep, + endereco.tipo_logradouro, + endereco.logradouro, + endereco.numero, + endereco.complemento, + endereco.bairro, + endereco.cidade, + endereco.estado, + endereco.datahora_ultima_atualizacao, + endereco.rank, + "vitacare" as sistema, + 1 as merge_order + from vitacare_tb, unnest(endereco) as endereco -- Expandindo os elementos da array struct de endereço + union all + select + cpf, + endereco.cep, + endereco.tipo_logradouro, + endereco.logradouro, + endereco.numero, + endereco.complemento, + endereco.bairro, + endereco.cidade, + endereco.estado, + endereco.datahora_ultima_atualizacao, + endereco.rank, + "smsrio" as sistema, + 2 as merge_order + from smsrio_tb, unnest(endereco) as endereco + union all + select + cpf, + endereco.cep, + endereco.tipo_logradouro, + endereco.logradouro, + endereco.numero, + endereco.complemento, + endereco.bairro, + endereco.cidade, + endereco.estado, + endereco.datahora_ultima_atualizacao, + endereco.rank, + "vitai" as sistema, + 3 as merge_order + from vitai_tb, unnest(endereco) as endereco + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 -- ORDER BY merge_order ASC, rank ASC -), - -endereco_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - cep, - tipo_logradouro, - logradouro, - numero, - complemento, - bairro, - cidade, - estado, - datahora_ultima_atualizacao, - sistema, - rank - )) AS endereco - FROM endereco_dedup - GROUP BY cpf -), - + ), --- Prontuario Dados: Merges system medical record data --- UNION: 1. Vitacare | 2. SMSRIO | 3. Vitai -prontuario_dedup AS ( - SELECT - cpf, - sistema, - id_cnes, - id_paciente, - ROW_NUMBER() OVER (PARTITION BY cpf ORDER BY merge_order ASC, rank ASC) AS rank - FROM ( - SELECT + endereco_dados as ( + select cpf, - sistema, - id_cnes, - id_paciente, - rank, - merge_order, - ROW_NUMBER() OVER (PARTITION BY cpf, id_cnes, id_paciente ORDER BY merge_order, rank ASC) AS dedup_rank - FROM ( - SELECT - vc.cpf, - "VITACARE" AS sistema, - prontuario.id_cnes, - prontuario.id_paciente, - prontuario.rank, - 1 AS merge_order - FROM vitacare_tb vc, - UNNEST(prontuario) AS prontuario - UNION ALL - SELECT - sm.cpf, - "SMSRIO" AS sistema, - prontuario.id_cnes, - prontuario.id_paciente, - prontuario.rank, - 2 AS merge_order - FROM smsrio_tb sm, - UNNEST(prontuario) AS prontuario - UNION ALL - SELECT - vi.cpf, - "VITAI" AS sistema, - prontuario.id_cnes, - prontuario.id_paciente, - prontuario.rank, - 3 AS merge_order - FROM vitai_tb vi, - UNNEST(prontuario) AS prontuario - ) - ORDER BY merge_order ASC, rank ASC - ) - WHERE dedup_rank = 1 - ORDER BY merge_order ASC, rank ASC -), - -prontuario_dados AS ( - SELECT - cpf, - ARRAY_AGG(STRUCT( - sistema, - id_cnes, - id_paciente, - rank - )) AS prontuario - FROM prontuario_dedup - GROUP BY cpf -), - + array_agg( + struct( + cep, + tipo_logradouro, + logradouro, + numero, + complemento, + bairro, + cidade, + estado, + datahora_ultima_atualizacao, + sistema, + rank + ) + ) as endereco + from endereco_dedup + group by cpf + ), --- Paciente Dados: Merges patient data -all_cpfs AS ( - SELECT - DISTINCT cpf - FROM ( - SELECT - cpf - FROM vitacare_tb - UNION ALL - SELECT - cpf - FROM vitai_tb - UNION ALL - SELECT - cpf - FROM smsrio_tb - ) -), + -- Prontuario Dados: Merges system medical record data + -- UNION: 1. Vitacare | 2. smsrio | 3. Vitai + prontuario_dedup as ( + select + cpf, + sistema, + id_cnes, + id_paciente, + row_number() over ( + partition by cpf order by merge_order asc, rank asc + ) as rank + from + ( + select + cpf, + sistema, + id_cnes, + id_paciente, + rank, + merge_order, + row_number() over ( + partition by cpf, id_cnes, id_paciente + order by merge_order, rank asc + ) as dedup_rank + from + ( + select + vc.cpf, + "vitacare" as sistema, + prontuario.id_cnes, + prontuario.id_paciente, + prontuario.rank, + 1 as merge_order + from vitacare_tb vc, unnest(prontuario) as prontuario + union all + select + sm.cpf, + "smsrio" as sistema, + prontuario.id_cnes, + prontuario.id_paciente, + prontuario.rank, + 2 as merge_order + from smsrio_tb sm, unnest(prontuario) as prontuario + union all + select + vi.cpf, + "vitai" as sistema, + prontuario.id_cnes, + prontuario.id_paciente, + prontuario.rank, + 3 as merge_order + from vitai_tb vi, unnest(prontuario) as prontuario + ) + order by merge_order asc, rank asc + ) + where dedup_rank = 1 + order by merge_order asc, rank asc + ), --- merge priority: --- nome: 1. SMSRIO | 2. Vitacare | 3. Vitai --- nome_social: 1. Vitai --- data_nascimento: 1. SMSRIO | 2. Vitacare | 3. Vitai --- genero: 1. Vitacare | 2. SMSRIO | 3. Vitai --- raca: 1. Vitacare | 2. SMSRIO | 3. Vitai --- obito_indicador: 1. Vitacare | 2. SMSRIO | 3. Vitai --- obito_data: 1. Vitacare | 2. SMSRIO | 3. Vitai --- mae_nome: 1. SMSRIO | 2. Vitacare | 3. Vitai --- pai_nome: 1. SMSRIO | 2. Vitacare | 3. Vitai + prontuario_dados as ( + select cpf, array_agg(struct(sistema, id_cnes, id_paciente, rank)) as prontuario + from prontuario_dedup + group by cpf + ), -paciente_dados AS ( - SELECT - cpfs.cpf, - STRUCT( - CASE - WHEN sm.cpf IS NOT NULL THEN sm.nome - WHEN vc.cpf IS NOT NULL THEN vc.nome - WHEN vi.cpf IS NOT NULL THEN vi.nome - ELSE NULL - END AS nome, - CASE - WHEN vc.cpf IS NOT NULL THEN vc.nome_social - -- WHEN sm.cpf THEN sm.nome_social -- SMSRIO não possui nome social - -- WHEN vi.cpf IS NOT NULL THEN vi.nome_social -- VITAI não possui nome social - ELSE NULL - END AS nome_social, - CASE - WHEN sm.cpf IS NOT NULL THEN sm.data_nascimento - WHEN vc.cpf IS NOT NULL THEN vc.data_nascimento - WHEN vi.cpf IS NOT NULL THEN vi.data_nascimento - ELSE NULL - END AS data_nascimento, - COALESCE(vc.genero, sm.genero, vi.genero) AS genero, - COALESCE(vc.raca, sm.raca, vi.raca) AS raca, - CASE - WHEN ((COALESCE(vc.obito_indicador, sm.obito_indicador, vi.obito_indicador) is False - or COALESCE(vc.obito_indicador, sm.obito_indicador, vi.obito_indicador) is null)) - and (base_obitos_vitai.cpf is not null) - THEN True - ELSE COALESCE(vc.obito_indicador, sm.obito_indicador, vi.obito_indicador) - END AS obito_indicador, - CASE - WHEN COALESCE(vc.obito_data, sm.obito_data, vi.obito_data) is null - and (base_obitos_vitai.obito_data is not null) - THEN base_obitos_vitai.obito_data - ELSE COALESCE(vc.obito_data, sm.obito_data, vi.obito_data) - END AS obito_data, - CASE - WHEN sm.cpf IS NOT NULL THEN sm.mae_nome - WHEN vc.cpf IS NOT NULL THEN vc.mae_nome - WHEN vi.cpf IS NOT NULL THEN vi.mae_nome - ELSE NULL - END AS mae_nome, - CASE - WHEN sm.cpf IS NOT NULL THEN sm.pai_nome - WHEN vc.cpf IS NOT NULL THEN vc.pai_nome - WHEN vi.cpf IS NOT NULL THEN vi.pai_nome - ELSE NULL - END AS pai_nome, - CASE - WHEN sm.cpf IS NOT NULL THEN TRUE - ELSE FALSE - END AS identidade_validada_indicador, - CASE - WHEN sm.cpf IS NOT NULL THEN sm.cpf_valido_indicador - WHEN vc.cpf IS NOT NULL THEN vc.cpf_valido_indicador - WHEN vi.cpf IS NOT NULL THEN vi.cpf_valido_indicador - ELSE NULL - END AS cpf_valido_indicador - ) AS dados - FROM all_cpfs cpfs - LEFT JOIN vitacare_tb vc ON cpfs.cpf = vc.cpf - LEFT JOIN vitai_tb vi ON cpfs.cpf = vi.cpf - LEFT JOIN smsrio_tb sm ON cpfs.cpf = sm.cpf - LEFT JOIN base_obitos_vitai on cpfs.cpf = base_obitos_vitai.cpf -), + -- Paciente Dados: Merges patient data + all_cpfs as ( + select distinct cpf + from + ( + select cpf + from vitacare_tb + union all + select cpf + from vitai_tb + union all + select cpf + from smsrio_tb + ) + ), ----- FINAL JOIN: Joins all the data previously processed, creating the ----- integrated table of the patients. -paciente_integrado AS ( - SELECT - pd.cpf, - cns.cns, - pd.dados, - esf.equipe_saude_familia, - ct.contato, - ed.endereco, - pt.prontuario, - STRUCT(CURRENT_TIMESTAMP() AS processed_at) AS metadados, - safe_cast(pd.cpf as int64) as cpf_particao - FROM paciente_dados pd - LEFT JOIN cns_dados cns ON pd.cpf = cns.cpf - LEFT JOIN equipe_saude_familia_dados esf ON pd.cpf = esf.cpf - LEFT JOIN contato_dados ct ON pd.cpf = ct.cpf - LEFT JOIN endereco_dados ed ON pd.cpf = ed.cpf - LEFT JOIN prontuario_dados pt ON pd.cpf = pt.cpf - WHERE pd.dados.nome IS NOT NULL - -- AND pd.dados.data_nascimento IS NOT NULL - AND pd.dados.cpf_valido_indicador IS TRUE + -- merge priority: + -- nome: 1. smsrio | 2. Vitacare | 3. Vitai + -- nome_social: 1. Vitai + -- data_nascimento: 1. smsrio | 2. Vitacare | 3. Vitai + -- genero: 1. Vitacare | 2. smsrio | 3. Vitai + -- raca: 1. Vitacare | 2. smsrio | 3. Vitai + -- obito_indicador: 1. Vitacare | 2. smsrio | 3. Vitai + -- obito_data: 1. Vitacare | 2. smsrio | 3. Vitai + -- mae_nome: 1. smsrio | 2. Vitacare | 3. Vitai + -- pai_nome: 1. smsrio | 2. Vitacare | 3. Vitai + paciente_dados as ( + select + cpfs.cpf, + struct( + case + when sm.cpf is not null + then sm.nome + when vc.cpf is not null + then vc.nome + when vi.cpf is not null + then vi.nome + else null + end as nome, + case + when vc.cpf is not null + then vc.nome_social + -- WHEN sm.cpf THEN sm.nome_social -- smsrio não possui nome social + -- WHEN vi.cpf IS NOT NULL THEN vi.nome_social -- vitai não + -- possui nome social + else null + end as nome_social, + case + when sm.cpf is not null + then sm.data_nascimento + when vc.cpf is not null + then vc.data_nascimento + when vi.cpf is not null + then vi.data_nascimento + else null + end as data_nascimento, + coalesce(vc.genero, sm.genero, vi.genero) as genero, + coalesce(vc.raca, sm.raca, vi.raca) as raca, + case + when + ( + ( + coalesce( + vc.obito_indicador, + sm.obito_indicador, + vi.obito_indicador + ) + is false + or coalesce( + vc.obito_indicador, + sm.obito_indicador, + vi.obito_indicador + ) + is null + ) + ) + and (base_obitos_vitai.cpf is not null) + then true + else + coalesce( + vc.obito_indicador, sm.obito_indicador, vi.obito_indicador + ) + end as obito_indicador, + case + when + coalesce(vc.obito_data, sm.obito_data, vi.obito_data) is null + and (base_obitos_vitai.obito_data is not null) + then base_obitos_vitai.obito_data + else coalesce(vc.obito_data, sm.obito_data, vi.obito_data) + end as obito_data, + case + when sm.cpf is not null + then sm.mae_nome + when vc.cpf is not null + then vc.mae_nome + when vi.cpf is not null + then vi.mae_nome + else null + end as mae_nome, + case + when sm.cpf is not null + then sm.pai_nome + when vc.cpf is not null + then vc.pai_nome + when vi.cpf is not null + then vi.pai_nome + else null + end as pai_nome, + case + when sm.cpf is not null then true else false + end as identidade_validada_indicador, + case + when sm.cpf is not null + then sm.cpf_valido_indicador + when vc.cpf is not null + then vc.cpf_valido_indicador + when vi.cpf is not null + then vi.cpf_valido_indicador + else null + end as cpf_valido_indicador + ) as dados + from all_cpfs cpfs + left join vitacare_tb vc on cpfs.cpf = vc.cpf + left join vitai_tb vi on cpfs.cpf = vi.cpf + left join smsrio_tb sm on cpfs.cpf = sm.cpf + left join base_obitos_vitai on cpfs.cpf = base_obitos_vitai.cpf + ), -) + -- -- FINAL JOIN: Joins all the data previously processed, creating the + -- -- integrated table of the patients. + paciente_integrado as ( + select + pd.cpf, + cns.cns, + pd.dados, + esf.equipe_saude_familia, + ct.contato, + ed.endereco, + pt.prontuario, + struct(current_timestamp() as processed_at) as metadados, + safe_cast(pd.cpf as int64) as cpf_particao + from paciente_dados pd + left join cns_dados cns on pd.cpf = cns.cpf + left join equipe_saude_familia_dados esf on pd.cpf = esf.cpf + left join contato_dados ct on pd.cpf = ct.cpf + left join endereco_dados ed on pd.cpf = ed.cpf + left join prontuario_dados pt on pd.cpf = pt.cpf + where + pd.dados.nome is not null + -- AND pd.dados.data_nascimento IS NOT NULL + and pd.dados.cpf_valido_indicador is true + ) -SELECT - * -FROM paciente_integrado \ No newline at end of file +select * +from paciente_integrado diff --git a/models/marts/historico_clinico/mart_historico_clinico__paciente_suspeitos.sql b/models/marts/historico_clinico/mart_historico_clinico__paciente_suspeitos.sql index 396da897..a3b726b2 100644 --- a/models/marts/historico_clinico/mart_historico_clinico__paciente_suspeitos.sql +++ b/models/marts/historico_clinico/mart_historico_clinico__paciente_suspeitos.sql @@ -3,7 +3,7 @@ enabled= false, alias="paciente_suspeitos", materialized="table", - schema="saude_historico_clinico" + schema="saude_dados_mestres" ) }} diff --git a/models/marts/historico_clinico_app/mart_historico_clinico_app__paciente.sql b/models/marts/historico_clinico_app/mart_historico_clinico_app__paciente.sql index fc572e33..3ccc32d1 100644 --- a/models/marts/historico_clinico_app/mart_historico_clinico_app__paciente.sql +++ b/models/marts/historico_clinico_app/mart_historico_clinico_app__paciente.sql @@ -49,44 +49,6 @@ with group by cpf ), ---=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- - -- REGRAS DE EXIBIÇÃO - ---=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- - -- Regra 1: Menor de Idade - regra_menor_de_idade as ( - select - todos_pacientes.cpf, - safe_cast( - case - when todos_pacientes.dados.data_nascimento is null then false - when DATE_DIFF(current_date(), todos_pacientes.dados.data_nascimento, YEAR) >= 18 then false - when DATE_DIFF(current_date(), todos_pacientes.dados.data_nascimento, YEAR) < 18 then true - end - as boolean) as tem_exibicao_limitada, - safe_cast( - case - when todos_pacientes.dados.data_nascimento is null then null - when DATE_DIFF(current_date(), todos_pacientes.dados.data_nascimento, YEAR) >= 18 then null - when DATE_DIFF(current_date(), todos_pacientes.dados.data_nascimento, YEAR) < 18 then "Menor de Idade" - end - as string) as motivo - from todos_pacientes - ), - -- Juntando Regras - todas_regras as ( - select * from regra_menor_de_idade - -- union all - -- (...) - ), - -- Agrupando Regras - regras_exibicao as ( - select - cpf, - not(logical_or(tem_exibicao_limitada)) as indicador, - array_agg(motivo ignore nulls) as motivos - from todas_regras - group by cpf - ), - ---=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- -- FORMATAÇÃO ---=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- formatado as ( @@ -134,19 +96,13 @@ with -- JUNTANDO INFORMAÇÕES DE EXIBICAO ---=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=--=-- select - regras_exibicao.cpf, - formatado.* except(cpf, cpf_particao), + formatado.*, struct( - regras_exibicao.indicador, - regras_exibicao.motivos, + true as indicador, + array[] as motivos, ap_cadastro_por_paciente.ap_cadastro, unidades_cadastro_por_paciente.unidades_cadastro - ) as exibicao, - cpf_particao -from regras_exibicao - left join formatado on ( - regras_exibicao.cpf = formatado.cpf and - regras_exibicao.indicador = true - ) - left join ap_cadastro_por_paciente on ap_cadastro_por_paciente.cpf = regras_exibicao.cpf - left join unidades_cadastro_por_paciente on unidades_cadastro_por_paciente.cpf = regras_exibicao.cpf \ No newline at end of file + ) as exibicao +from formatado + left join ap_cadastro_por_paciente on ap_cadastro_por_paciente.cpf = formatado.cpf + left join unidades_cadastro_por_paciente on unidades_cadastro_por_paciente.cpf = formatado.cpf \ No newline at end of file diff --git a/models/raw/prontuario_vitacare/raw_prontuario_vitacare__estoque_posicao.sql b/models/raw/prontuario_vitacare/raw_prontuario_vitacare__estoque_posicao.sql index 03f03ccf..1f1eebcd 100644 --- a/models/raw/prontuario_vitacare/raw_prontuario_vitacare__estoque_posicao.sql +++ b/models/raw/prontuario_vitacare/raw_prontuario_vitacare__estoque_posicao.sql @@ -44,7 +44,7 @@ with final as ( select - -- Primary key + -- Primary Key concat(id_cnes, '.', id, '.', data_particao) as id, {{ dbt_utils.generate_surrogate_key( @@ -58,6 +58,7 @@ with ] ) }} as id_surrogate, + -- Foreign Keys safe_cast(area_programatica as string) as area_programatica, safe_cast(id_cnes as string) as id_cnes, diff --git a/models/raw/prontuario_vitacare/raw_prontuario_vitacare__paciente.sql b/models/raw/prontuario_vitacare/raw_prontuario_vitacare__paciente.sql index 9c8aa0f1..11810764 100644 --- a/models/raw/prontuario_vitacare/raw_prontuario_vitacare__paciente.sql +++ b/models/raw/prontuario_vitacare/raw_prontuario_vitacare__paciente.sql @@ -21,8 +21,7 @@ with qualify row_number() over ( partition by cnes_unidade, cpf, cns order by updated_at desc - ) - = 1 + ) = 1 ), corrige_cadastro as ( @@ -30,9 +29,7 @@ with * except (cadastro_permanente, nome_social, sexo, raca_cor, nome_mae), - case - when nome_social in ('') then null else nome_social - end as nome_social, + case when nome_social in ('') then null else nome_social end as nome_social, case when sexo in ("M", "MALE") @@ -56,7 +53,6 @@ with else null end as obito_indicador, - case when nome_mae in ("NONE") then null else nome_mae end as nome_mae, case @@ -96,7 +92,7 @@ with {{ remove_accents_upper("telefone") }} as telefone, {{ remove_accents_upper("email") }} as email, - {{ remove_accents_upper("endereco_cep") }} as cep, + {{ padronize_cep(remove_accents_upper("endereco_cep")) }} as cep, {{ remove_accents_upper("endereco_tipo_logradouro") }} as tipo_logradouro, {{ remove_accents_upper( diff --git a/poetry.lock b/poetry.lock index 426f3b51..2bc2c993 100644 --- a/poetry.lock +++ b/poetry.lock @@ -167,6 +167,17 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (>=0.23)"] +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] + [[package]] name = "async-timeout" version = "4.0.3" @@ -489,6 +500,17 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "chardet" +version = "5.2.0" +description = "Universal encoding detector for Python 3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -1064,6 +1086,26 @@ ordered-set = ">=4.1.0,<4.2.0" cli = ["click (==8.1.7)", "pyyaml (==6.0.1)"] optimize = ["orjson"] +[[package]] +name = "diff-cover" +version = "9.2.0" +description = "Run coverage and linting reports on diffs" +optional = false +python-versions = "<4.0.0,>=3.8.10" +files = [ + {file = "diff_cover-9.2.0-py3-none-any.whl", hash = "sha256:1e24edc51c39e810c47dd9986e76c333ed95859655c091f572e590c39cabbdbe"}, + {file = "diff_cover-9.2.0.tar.gz", hash = "sha256:85a0b353ebbb678f9e87ea303f75b545bd0baca38f563219bb72f2ae862bba36"}, +] + +[package.dependencies] +chardet = ">=3.0.0" +Jinja2 = ">=2.7.1" +pluggy = ">=0.13.1,<2" +Pygments = ">=2.9.0,<3.0.0" + +[package.extras] +toml = ["tomli (>=1.2.1)"] + [[package]] name = "dill" version = "0.3.8" @@ -2359,6 +2401,17 @@ requests = "2.31.0" dev = ["black (==23.3.0)", "devtools[pygments] (==0.11.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==1.1.1)", "ruff (==0.0.261)", "types-requests (==2.28.11.17)"] test = ["coverage[toml] (>=6.5.0,<8.0)", "pytest (>=7.1.3,<8.0.0)", "responses (==0.23.1)"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "isodate" version = "0.6.1" @@ -2404,6 +2457,20 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jinja2-simple-tags" +version = "0.6.1" +description = "Base classes for quick-and-easy template tag development" +optional = false +python-versions = ">=3.6" +files = [ + {file = "jinja2-simple-tags-0.6.1.tar.gz", hash = "sha256:54abf83883dcd13f8fd2ea2c42feeea8418df3640907bd5251dec5e25a6af0e3"}, + {file = "jinja2_simple_tags-0.6.1-py2.py3-none-any.whl", hash = "sha256:7b7cfa92f6813a1e0f0b61b9efcab60e6793674753e1f784ff270542e80ae20f"}, +] + +[package.dependencies] +Jinja2 = ">=2.10" + [[package]] name = "jsonschema" version = "4.23.0" @@ -3641,6 +3708,21 @@ docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx- test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] type = ["mypy (>=1.8)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "pre-commit" version = "3.8.0" @@ -4197,6 +4279,20 @@ files = [ {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pymongo" version = "4.8.0" @@ -4450,6 +4546,28 @@ files = [ {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, ] +[[package]] +name = "pytest" +version = "8.3.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "python-box" version = "7.2.0" @@ -4685,6 +4803,109 @@ files = [ attrs = ">=22.2.0" rpds-py = ">=0.7.0" +[[package]] +name = "regex" +version = "2024.9.11" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, + {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"}, + {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"}, + {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"}, + {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"}, + {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"}, + {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"}, + {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"}, + {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"}, + {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"}, + {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"}, + {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"}, + {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"}, + {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"}, + {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"}, + {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -5276,6 +5497,48 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] +[[package]] +name = "sqlfluff" +version = "3.2.3" +description = "The SQL Linter for Humans" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sqlfluff-3.2.3-py3-none-any.whl", hash = "sha256:159f29c6f2f6dc17a2cb7f0124b46446a7b7f9ad8ba526269a274013f1f42b1c"}, + {file = "sqlfluff-3.2.3.tar.gz", hash = "sha256:13c56ca0175808eaee183d2224ac863ee17113808890603c52d8fffcd1e069fe"}, +] + +[package.dependencies] +appdirs = "*" +chardet = "*" +click = "*" +colorama = ">=0.3" +diff-cover = ">=2.5.0" +Jinja2 = "*" +pathspec = "*" +pytest = "*" +pyyaml = ">=5.1" +regex = "*" +tblib = "*" +toml = {version = "*", markers = "python_version < \"3.11\""} +tqdm = "*" + +[[package]] +name = "sqlfluff-templater-dbt" +version = "3.2.3" +description = "Lint your dbt project SQL" +optional = false +python-versions = "*" +files = [ + {file = "sqlfluff_templater_dbt-3.2.3-py3-none-any.whl", hash = "sha256:85ab0e3b809bec48c4a89cc0ff2e75d009622fcbdfc445af585f35d598967a1b"}, + {file = "sqlfluff_templater_dbt-3.2.3.tar.gz", hash = "sha256:f44d5a32a4d980165679e52cd1ff883796ec9c916168cd502ad21b71719c1580"}, +] + +[package.dependencies] +dbt-core = ">=1.4.1" +jinja2-simple-tags = ">=0.3.1" +sqlfluff = "3.2.3" + [[package]] name = "sqlparse" version = "0.5.1" @@ -5774,4 +6037,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "78bf098b2309615f46b703c33e0411d4dd56826cf5a0c2dd19ad1a7f709e5d80" +content-hash = "92d996d173653c4528a8f7e6298837cbac64cb702a40309062e4e97057d30180" diff --git a/pyproject.toml b/pyproject.toml index 3ac348e3..7fc183ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,8 @@ idna = "^3.7" pillow = "^10.3.0" certifi = "^2024.07.04" tqdm = "^4.66.3" +sqlfluff = "^3.2.3" +sqlfluff-templater-dbt = "^3.2.3" [tool.poetry.group.dev] optional = true