Skip to content

Commit

Permalink
create dbt models for resultats commune, add dbt test when dbt folder… (
Browse files Browse the repository at this point in the history
#75)

* create dbt models for resultats commune, add dbt test when dbt folder is modified before pr

* change name of pipelines and dbt tests

* add sql fluff to dbt model in pre commit
  • Loading branch information
NicolasDuchenne authored Feb 27, 2025
1 parent 823f016 commit 1213b54
Show file tree
Hide file tree
Showing 14 changed files with 335 additions and 82 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/test_dbt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: 🧪 Run DBT Tests

on:
pull_request:
branches: [main]
paths:
- 'dbt_/**'

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Install a specific version of uv
uses: astral-sh/setup-uv@v5
with:
version: ">=0.4.0"

- name: Install dependencies
run: uv sync

- name: Download production database from Storage
run: |
uv run pipelines/run.py run download_database
- name: Install dbt dependencies
run: |
cd dbt_
uv run dbt deps || { echo "dbt deps failed"; exit 1; }
- name: Run dbt build
run: |
cd dbt_
uv run dbt build || { echo "dbt build failed"; exit 1; }
2 changes: 1 addition & 1 deletion .github/workflows/test_pipelines.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: 🧪 Run Tests
name: 🧪 Run Pipelines Tests

on:
pull_request:
Expand Down
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
repos:
- repo: https://github.com/sqlfluff/sqlfluff
rev: 3.3.1 # Vérifie la dernière version sur GitHub
hooks:
- id: sqlfluff-lint
args: [ "--config", ".sqlfluff" ] # Adapte au dialecte que tu utilises (bigquery, snowflake, etc.)
files: dbt_/

- id: sqlfluff-fix # Optionnel, pour corriger automatiquement les erreurs
args: [ "--config", ".sqlfluff" ]
files: dbt_/
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.9.3
Expand Down
4 changes: 4 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[sqlfluff]
dialect = duckdb


21 changes: 21 additions & 0 deletions dbt_/models/analytics/_ana__models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: 2

models:
- name: ana__resultats_communes
description: "Resultat des prélevement par commune et par année"
columns:
- name: commune_code_insee
description: "Code INSEE de la commune du point de surveillance"
type: VARCHAR
- name: commune_nom
description: "Nom de la commune du point de surveillance"
type: VARCHAR
- name: annee
description: "Annee du prevelvement"
type: SMALLINT
- name: resultat_cvm
description: "resultat du prélevement"
type: VARCHAR
tests:
- accepted_values:
values: ["conforme", "non conforme", "non analysé"]
71 changes: 71 additions & 0 deletions dbt_/models/analytics/ana__resultats_communes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
with edc_resultats as (select * from {{ ref("stg_edc__resultats") }}),

edc_prelevements as (select * from {{ ref("stg_edc__prevelevements") }}),

edc_communes as (select * from {{ ref("stg_edc__communes") }}),

cog_communes as (select * from {{ ref("stg_communes__cog") }}),


resultats_cvm as (
select
*,
(case when valtraduite > 0.5 then 1 else 0 end) as is_non_conforme
from edc_resultats
where cdparametresiseeaux = 'CLVYL'
),

prelevements_cvm as (
select
edc_prelevements.cdreseau,
resultats_cvm.de_partition,
SUM(resultats_cvm.is_non_conforme) as nbr_resultats_non_conformes,
COUNT(*) as nbr_resultats_total
from resultats_cvm
left join edc_prelevements
on
resultats_cvm.referenceprel = edc_prelevements.referenceprel
and
resultats_cvm.de_partition = edc_prelevements.de_partition
group by edc_prelevements.cdreseau, resultats_cvm.de_partition
),

communes_cvm as (
select
edc_communes.inseecommune,
edc_communes.de_partition,
COALESCE(SUM(prelevements_cvm.nbr_resultats_non_conformes), 0)
as nbr_resultats_non_conformes,
COALESCE(SUM(prelevements_cvm.nbr_resultats_total), 0)
as nbr_resultats_total,
case
when
SUM(prelevements_cvm.nbr_resultats_non_conformes) > 0
then 'non conforme'
when SUM(prelevements_cvm.nbr_resultats_total) > 0 then 'conforme'
else 'non analysé'
end as resultat
from edc_communes
left join prelevements_cvm
on
edc_communes.cdreseau = prelevements_cvm.cdreseau
and
edc_communes.de_partition = prelevements_cvm.de_partition
group by edc_communes.inseecommune, edc_communes.de_partition
),

annees as (select UNNEST(GENERATE_SERIES(2020, 2024)) as annee)


select
cog.com as commune_code_insee,
cog.libelle as commune_nom,
a.annee,
COALESCE(communes_cvm.resultat, 'non analysé') as resultat_cvm
from cog_communes as cog
cross join
annees as a
left join communes_cvm on
cog.com = communes_cvm.inseecommune
and
a.annee::string = communes_cvm.de_partition
29 changes: 14 additions & 15 deletions dbt_/models/staging/communes/stg_communes__cog.sql
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
SELECT
TYPECOM::VARCHAR as typecom,
COM::VARCHAR AS com,
REG::SMALLINT as reg,
DEP::VARCHAR as dep,
CTCD::VARCHAR as ctcd,
ARR::VARCHAR as arr,
ARR::VARCHAR as arr,
TNCC::SMALLINT as tncc,
NCC::VARCHAR as ncc,
NCCENR::VARCHAR as nccenr,
LIBELLE::VARCHAR as libelle,
CAN::VARCHAR as can,
COMPARENT::VARCHAR as comparent,
de_partition::SMALLINT as de_partition
FROM {{ source('communes', 'cog_communes') }}
TYPECOM::VARCHAR AS TYPECOM,
COM::VARCHAR AS COM,
REG::SMALLINT AS REG,
DEP::VARCHAR AS DEP,
CTCD::VARCHAR AS CTCD,
ARR::VARCHAR AS ARR,
TNCC::SMALLINT AS TNCC,
NCC::VARCHAR AS NCC,
NCCENR::VARCHAR AS NCCENR,
LIBELLE::VARCHAR AS LIBELLE,
CAN::VARCHAR AS CAN,
COMPARENT::VARCHAR AS COMPARENT,
DE_PARTITION::SMALLINT AS DE_PARTITION
FROM {{ source('communes', 'cog_communes') }}
16 changes: 8 additions & 8 deletions dbt_/models/staging/communes/stg_communes__laposte.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
SELECT
code_commune_insee::VARCHAR as code_commune_insee,
code_commune_insee::VARCHAR AS code_commune_insee,
nom_de_la_commune::VARCHAR AS nom_de_la_commune,
code_postal::VARCHAR as code_postal,
libelle_d_acheminement::VARCHAR as libelle_d_acheminement,
ligne_5::VARCHAR as ligne_5,
_geopoint::VARCHAR as _geopoint,
de_partition::SMALLINT as de_partition,
de_ingestion_date::VARCHAR as de_ingestion_date
FROM {{ source('communes', 'laposte_communes') }}
code_postal::VARCHAR AS code_postal,
libelle_d_acheminement::VARCHAR AS libelle_d_acheminement,
ligne_5::VARCHAR AS ligne_5,
_geopoint::VARCHAR AS _geopoint,
de_partition::SMALLINT AS de_partition,
de_ingestion_date::VARCHAR AS de_ingestion_date
FROM {{ source('communes', 'laposte_communes') }}
15 changes: 14 additions & 1 deletion dbt_/models/staging/edc/_edc__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ models:
type: SMALLINT
tests:
- dbt_utils.expression_is_true:
expression: ">= 2016"
expression: ">= 2020"

- name: stg_edc__resultats
description: "Liste des communes et leurs prélévements"
Expand Down Expand Up @@ -135,6 +135,13 @@ models:
laboratoires différents.
type: VARCHAR

- name: de_partition
description: Année de récupération des données. Ceci est un champ ajouté automatiquement lors de l'ingestion des données.
type: SMALLINT
tests:
- dbt_utils.expression_is_true:
expression: ">= 2020"

- name: stg_edc__prevelevements
description: "Liste des prélèvements"
columns:
Expand Down Expand Up @@ -283,6 +290,12 @@ models:
tests:
- accepted_values:
values: ["blanc", "C", "N", "S", "D"]
- name: de_partition
description: Année de récupération des données. Ceci est un champ ajouté automatiquement lors de l'ingestion des données.
type: SMALLINT
tests:
- dbt_utils.expression_is_true:
expression: ">= 2020"



16 changes: 8 additions & 8 deletions dbt_/models/staging/edc/stg_edc__communes.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
SELECT
inseecommune::VARCHAR(5) as inseecommune,
nomcommune::VARCHAR as nomcommune,
quartier::VARCHAR as quartier,
cdreseau::VARCHAR(9) as cdreseau,
nomreseau::VARCHAR as nomreseau,
debutalim::VARCHAR as debutalim,
de_partition::SMALLINT as de_partition
FROM {{ source('edc', 'edc_communes') }}
inseecommune::VARCHAR(5) AS inseecommune,
nomcommune::VARCHAR AS nomcommune,
quartier::VARCHAR AS quartier,
cdreseau::VARCHAR(9) AS cdreseau,
nomreseau::VARCHAR AS nomreseau,
debutalim::VARCHAR AS debutalim,
de_partition::SMALLINT AS de_partition
FROM {{ source('edc', 'edc_communes') }}
37 changes: 19 additions & 18 deletions dbt_/models/staging/edc/stg_edc__prevelevements.sql
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
SELECT
cddept::VARCHAR(3) as cddept,
cdreseau::VARCHAR(9) as cdreseau,
inseecommuneprinc::VARCHAR(5) as inseecommuneprinc,
nomcommuneprinc::VARCHAR as nomcommuneprinc,
cdreseauamont::VARCHAR(9) as cdreseauamont,
nomreseauamont::VARCHAR as nomreseauamont,
cddept::VARCHAR(3) AS cddept,
cdreseau::VARCHAR(9) AS cdreseau,
inseecommuneprinc::VARCHAR(5) AS inseecommuneprinc,
nomcommuneprinc::VARCHAR AS nomcommuneprinc,
cdreseauamont::VARCHAR(9) AS cdreseauamont,
nomreseauamont::VARCHAR AS nomreseauamont,
REPLACE(pourcentdebit, ' %', '')::TINYINT AS pourcentdebit,
referenceprel::VARCHAR(11) as referenceprel,
dateprel::DATE as dateprel,
heureprel::VARCHAR as heureprel,
conclusionprel::VARCHAR as conclusionprel,
ugelib::VARCHAR as ugelib,
distrlib::VARCHAR as distrlib,
moalib::VARCHAR as moalib,
plvconformitebacterio::VARCHAR(1) as plvconformitebacterio,
plvconformitechimique::VARCHAR(1) as plvconformitechimique,
plvconformitereferencebact::VARCHAR(1) as plvconformitereferencebact,
plvconformitereferencechim::VARCHAR(1) as plvconformitereferencechim
FROM {{ source('edc', 'edc_prelevements') }}
referenceprel::VARCHAR(11) AS referenceprel,
dateprel::DATE AS dateprel,
heureprel::VARCHAR AS heureprel,
conclusionprel::VARCHAR AS conclusionprel,
ugelib::VARCHAR AS ugelib,
distrlib::VARCHAR AS distrlib,
moalib::VARCHAR AS moalib,
plvconformitebacterio::VARCHAR(1) AS plvconformitebacterio,
plvconformitechimique::VARCHAR(1) AS plvconformitechimique,
plvconformitereferencebact::VARCHAR(1) AS plvconformitereferencebact,
plvconformitereferencechim::VARCHAR(1) AS plvconformitereferencechim,
de_partition::SMALLINT AS de_partition
FROM {{ source('edc', 'edc_prelevements') }}
37 changes: 19 additions & 18 deletions dbt_/models/staging/edc/stg_edc__resultats.sql
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
SELECT
cddept::VARCHAR(3) as cddept,
referenceprel::VARCHAR(11) as referenceprel,
cdparametresiseeaux::VARCHAR(10) as cdparametresiseeaux,
cdparametre::INT as cdparametre,
libmajparametre::VARCHAR as libmajparametre,
libminparametre::VARCHAR as libminparametre,
libwebparametre::VARCHAR as libwebparametre,
qualitparam::VARCHAR(1) as qualitparam,
insituana::VARCHAR(1) as insituana,
rqana::VARCHAR(8) as rqana,
cdunitereferencesiseeaux::VARCHAR(7) as cdunitereferencesiseeaux,
cdunitereference::VARCHAR as cdunitereference,
limitequal::VARCHAR as limitequal,
refqual::VARCHAR as refqual,
valtraduite::NUMERIC as valtraduite,
casparam::VARCHAR as casparam,
referenceanl::VARCHAR as referenceanl
FROM {{ source('edc', 'edc_resultats') }}
cddept::VARCHAR(3) AS cddept,
referenceprel::VARCHAR(11) AS referenceprel,
cdparametresiseeaux::VARCHAR(10) AS cdparametresiseeaux,
cdparametre::INT AS cdparametre,
libmajparametre::VARCHAR AS libmajparametre,
libminparametre::VARCHAR AS libminparametre,
libwebparametre::VARCHAR AS libwebparametre,
qualitparam::VARCHAR(1) AS qualitparam,
insituana::VARCHAR(1) AS insituana,
rqana::VARCHAR(8) AS rqana,
cdunitereferencesiseeaux::VARCHAR(7) AS cdunitereferencesiseeaux,
cdunitereference::VARCHAR AS cdunitereference,
limitequal::VARCHAR AS limitequal,
refqual::VARCHAR AS refqual,
valtraduite::NUMERIC AS valtraduite,
casparam::VARCHAR AS casparam,
referenceanl::VARCHAR AS referenceanl,
de_partition::SMALLINT AS de_partition
FROM {{ source('edc', 'edc_resultats') }}
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies = [
dev = [
"jupyter>=1.1.0,<2",
"pre-commit>=4.1.0,<5",
"pytest>=8.3.4",
"pytest-cov>=6.0.0",
"pytest>=8.3.4,<9",
"pytest-cov>=6.0.0,<7"
]

[build-system]
Expand Down
Loading

0 comments on commit 1213b54

Please sign in to comment.