diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 48ab489..8f9eb87 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -31,7 +31,7 @@ ] } }, - "features": { + "features": { "ghcr.io/devcontainers/features/common-utils:2": { "installZsh": true, "configureZshAsDefaultShell": true, diff --git a/Dockerfile b/Dockerfile index 7e8bfcd..88fbf23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,10 +3,9 @@ FROM fishtownanalytics/dbt:${DBT_VERSION} ENV DBT_PROFILES_DIR=. - RUN set -ex \ && python -m pip install setuptools \ - && python -m pip install dbt-postgres==1.4.0 dbt-core==1.4.0 numpy + && python -m pip install dbt-postgres==1.4.0 dbt-core==1.4.0 -ENTRYPOINT [ "tail", "-f", "/dev/null" ] +ENTRYPOINT [ "tail", "-f", "/dev/null" ] \ No newline at end of file diff --git a/analysis/.gitkeep b/analyses/.gitkeep similarity index 100% rename from analysis/.gitkeep rename to analyses/.gitkeep diff --git a/docker-compose.yml b/docker-compose.yml index 0aee0d3..4844ae5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: '2' +#version: '2' services: devcontainer: diff --git a/models/business_vault/customer_order_count.sql b/models/business_vault/customer_order_count.sql new file mode 100644 index 0000000..1645227 --- /dev/null +++ b/models/business_vault/customer_order_count.sql @@ -0,0 +1,99 @@ +{{ + config( + materialized='table', + schema='analytics', + tags=['analytics', 'orders'] +) +}} + +with orders as ( + SELECT + ho.order_pk + FROM + {{ ref('hub_order') }} ho + JOIN ( + SELECT + so.order_pk, + MAX(so.effective_from) AS latest_date + FROM + {{ ref('sat_order') }} so + GROUP BY + so.order_pk + ) latest + ON ho.order_pk = latest.order_pk + JOIN + {{ ref('sat_order') }} so + ON so.order_pk = latest.order_pk + AND so.effective_from = latest.latest_date + where + so.status ='completed' +), + +customers as ( + SELECT + hc.customer_pk, + concat(sc.first_name, ' ',sc.last_name) as customer_name + FROM + {{ ref('hub_customer') }} hc + JOIN ( + SELECT + sc.customer_pk, + MAX(sc.effective_from) AS latest_date + FROM + {{ ref('sat_customer') }} sc + GROUP BY + sc.customer_pk + ) latest + ON hc.customer_pk = latest.customer_pk + JOIN + {{ ref('sat_customer') }} sc + ON sc.customer_pk = latest.customer_pk + AND sc.effective_from = latest.latest_date +), + +customers_crm as( + SELECT + hc.customer_pk, + concat(sc.country, ' ',sc.age) as customer_name + FROM + {{ ref('hub_customer') }} hc + JOIN ( + SELECT + sc.customer_pk, + MAX(sc.effective_from) AS latest_date + FROM + {{ ref('sat_customer_crm') }} sc + GROUP BY + sc.customer_pk + ) latest + ON hc.customer_pk = latest.customer_pk + JOIN + {{ ref('sat_customer_crm') }} sc + ON sc.customer_pk = latest.customer_pk + AND sc.effective_from = latest.latest_date +) + +select + cust.customer_name, + count(o.order_pk) as count_orders +from + orders o +join + {{ ref('link_customer_order') }} lco + on o.order_pk = lco.order_pk +join ( + select + customer_name, + customer_pk + from + customers c + union all + select + customer_name, + customer_pk + from + customers_crm cc + ) cust + on lco.customer_pk = cust.customer_pk +group by cust.customer_name +order by count_orders desc \ No newline at end of file diff --git a/models/business_vault/customer_pit.sql b/models/business_vault/customer_pit.sql index a3392a6..d2c9b0b 100644 --- a/models/business_vault/customer_pit.sql +++ b/models/business_vault/customer_pit.sql @@ -3,14 +3,14 @@ {%- set yaml_metadata -%} source_model: hub_customer src_pk: CUSTOMER_PK -as_of_dates_table: AS_OF_DATE +as_of_dates_table: as_of_date satellites: - SAT_CUSTOMER: + sat_customer: pk: PK: CUSTOMER_PK ldts: LDTS: LOAD_DATE - SAT_CUSTOMER_CRM: + sat_customer_crm: pk: PK: CUSTOMER_PK ldts: @@ -31,7 +31,7 @@ src_ldts: LOAD_DATE {% set src_ldts = metadata_dict['src_ldts'] %} {{ automate_dv.pit(source_model=source_model, src_pk=src_pk, - as_of_dates_table=as_of_dates_table, - satellites=satellites, - stage_tables_ldts=stage_tables_ldts, - src_ldts=src_ldts) }} \ No newline at end of file + as_of_dates_table=as_of_dates_table, + satellites=satellites, + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts) }} \ No newline at end of file diff --git a/models/business_vault/our_customer_pit.sql b/models/business_vault/our_customer_pit.sql index d5954f6..fa8d4f1 100644 --- a/models/business_vault/our_customer_pit.sql +++ b/models/business_vault/our_customer_pit.sql @@ -1,12 +1,13 @@ -with all_history ( - select hc.customer_pk, +with all_history as ( + SELECT + hc.customer_pk, sc.last_name, sc.first_name, scc.age, sc.effective_from as sc_effective_from, - coalesce(lead(sc.effective_from) OVER (PARTITION BY hc.customer_pk ORDER BY sc.effective_from), '9999-12-31') as sc_effective_to, + coalesce(lead(sc.effective_from) OVER (PARTITION BY hc.customer_pk ORDER BY sc.effective_from), '9999-12-31') as sc_effective_to, scc.effective_from as scc_effective_from, - coalesce(lead(scc.effective_from) OVER (PARTITION BY hc.customer_pk ORDER BY scc.effective_from), '9999-12-31') as scc_effective_to + coalesce(lead(scc.effective_from) OVER (PARTITION BY hc.customer_pk ORDER BY scc.effective_from), '9999-12-31') as scc_effective_to from {{ref('hub_customer')}} hc LEFT JOIN {{ref('sat_customer')}} sc ON sc.customer_pk = hc.customer_pk LEFT JOIN {{ref('sat_customer_crm')}} scc ON scc.customer_pk = hc.customer_pk diff --git a/models/business_vault/pit_customer.sql b/models/business_vault/pit_customer.sql new file mode 100644 index 0000000..2b396e2 --- /dev/null +++ b/models/business_vault/pit_customer.sql @@ -0,0 +1,37 @@ +{{ config(materialized='pit_incremental') }} + +{%- set yaml_metadata -%} +source_model: hub_customer +src_pk: CUSTOMER_PK +as_of_dates_table: as_of_date +satellites: + sat_customer: + pk: + PK: CUSTOMER_PK + ldts: + LDTS: LOAD_DATE + sat_customer_crm: + pk:Ы + PK: CUSTOMER_PK + ldts: + LDTS: LOAD_DATE +stage_tables_ldts: + stg_customers: LOAD_DATE + stg_customers_crm: LOAD_DATE +src_ldts: LOAD_DATE +{%- endset -%} + +{% set metadata_dict = fromyaml(yaml_metadata) %} + +{% set source_model = metadata_dict['source_model'] %} +{% set src_pk = metadata_dict['src_pk'] %} +{% set as_of_dates_table = metadata_dict['as_of_dates_table'] %} +{% set satellites = metadata_dict['satellites'] %} +{% set stage_tables_ldts = metadata_dict['stage_tables_ldts'] %} +{% set src_ldts = metadata_dict['src_ldts'] %} + +{{ automate_dv.pit(source_model=source_model, src_pk=src_pk, + as_of_dates_table=as_of_dates_table, + satellites=satellites, + stage_tables_ldts=stage_tables_ldts, + src_ldts=src_ldts) }} \ No newline at end of file diff --git a/models/business_vault/schema.yml b/models/business_vault/schema.yml index 001c7e6..2ed1186 100644 --- a/models/business_vault/schema.yml +++ b/models/business_vault/schema.yml @@ -1,9 +1,11 @@ version: 2 sources: - - name: business_vault + - name: analyses database: postgres schema: dbt tables: - name: as_of_date - - name: customer_pit + - name: weekly_orders + - name: customer_order_count + - name: pit_customer \ No newline at end of file diff --git a/models/business_vault/weekly_orders.sql b/models/business_vault/weekly_orders.sql new file mode 100644 index 0000000..3aa14fb --- /dev/null +++ b/models/business_vault/weekly_orders.sql @@ -0,0 +1,37 @@ +{{ + config( + materialized='view', + schema='analytics', + tags=['weekly_reports'] + ) +}} + +with cte as ( + SELECT + ho.order_pk, + so.order_hashdiff, + date_trunc('week', so.order_date) as week_start + FROM + {{ ref('hub_order') }} ho + JOIN ( + SELECT + so.order_pk, + MAX(so.effective_from) AS latest_date + FROM + {{ ref('sat_order') }} so + GROUP BY + so.order_pk + ) latest + ON ho.order_pk = latest.order_pk + JOIN + {{ ref('sat_order') }} so + ON so.order_pk = latest.order_pk + AND so.effective_from = latest.latest_date +) +select + week_start, + count(*) as count_orders +from + cte +group by week_start +order by week_start \ No newline at end of file diff --git a/models/stage/stg_customers.sql b/models/stage/stg_customers.sql index 67f5736..2e57f5b 100644 --- a/models/stage/stg_customers.sql +++ b/models/stage/stg_customers.sql @@ -34,5 +34,9 @@ with staging as ( }} ) -select *, {{var('load_date')}} as LOAD_DATE, - {{ var('load_date') }} AS EFFECTIVE_FROM from staging \ No newline at end of file +select + *, + {{var('load_date')}} as LOAD_DATE, + {{ var('load_date') }} AS EFFECTIVE_FROM +from + staging \ No newline at end of file diff --git a/models/stage/stg_customers_crm.sql b/models/stage/stg_customers_crm.sql index bcf52c5..dc4e8a9 100644 --- a/models/stage/stg_customers_crm.sql +++ b/models/stage/stg_customers_crm.sql @@ -33,5 +33,9 @@ with staging as ( }} ) -select *, {{var('load_date')}} as LOAD_DATE, - {{ var('load_date') }} AS EFFECTIVE_FROM from staging \ No newline at end of file +select + *, + {{var('load_date')}} as LOAD_DATE, + {{ var('load_date') }} AS EFFECTIVE_FROM +from + staging \ No newline at end of file diff --git a/models/stage/stg_link_customer_order.sql b/models/stage/stg_link_customer_order.sql index 195a7d0..9d430a8 100644 --- a/models/stage/stg_link_customer_order.sql +++ b/models/stage/stg_link_customer_order.sql @@ -28,5 +28,9 @@ with staging as ( }} ) -select *, {{var('load_date')}} as LOAD_DATE, - {{ var('load_date') }} AS EFFECTIVE_FROM from staging \ No newline at end of file +select + *, + {{var('load_date')}} as LOAD_DATE, + {{ var('load_date') }} AS EFFECTIVE_FROM +from + staging \ No newline at end of file diff --git a/models/stage/stg_orders.sql b/models/stage/stg_orders.sql index 737b525..47a121b 100644 --- a/models/stage/stg_orders.sql +++ b/models/stage/stg_orders.sql @@ -29,13 +29,14 @@ hashed_columns: WITH staging AS ( {{ automate_dv.stage(include_source_columns=true, - source_model=source_model, - derived_columns=derived_columns, - hashed_columns=hashed_columns, - ranked_columns=none) }} + source_model=source_model, + derived_columns=derived_columns, + hashed_columns=hashed_columns, + ranked_columns=none) }} ) -SELECT *, +SELECT + *, {{ var('load_date') }} AS LOAD_DATE, {{ var('load_date') }} AS EFFECTIVE_FROM FROM staging \ No newline at end of file diff --git a/seeds/source_customers.csv b/seeds/source_customers.csv index 1f20dc4..879a8e5 100644 --- a/seeds/source_customers.csv +++ b/seeds/source_customers.csv @@ -87,7 +87,6 @@ id,first_name,last_name,email 86,Jason,Cole,jcole2d@ycombinator.com 87,Phillip,Bryant,pbryant2e@rediff.com 88,Adam,Torres,atorres2f@sun.com -89,Margaret,Johnston,mjohnston2g@ucsd.edu 90,Paul,Payne,ppayne2h@hhs.gov 91,Todd,Willis,twillis2i@businessweek.com 92,Willie,Oliver,woliver2j@noaa.gov diff --git a/seeds/source_customers_crm.csv b/seeds/source_customers_crm.csv index 8fdfccb..19678e0 100644 --- a/seeds/source_customers_crm.csv +++ b/seeds/source_customers_crm.csv @@ -4,5 +4,4 @@ jmcdonald2r@baidu.com,UK,35 d.trump@usa.gov,US,65 erickson1993@fake.com,FR,28 e.klum@fake.com,IT,30 -erickson1994@fake.com,FR,28 -e.klum1994@fake.com,IT,30 +erickson1994@fake.com,FR,28 \ No newline at end of file diff --git a/seeds/source_orders.csv b/seeds/source_orders.csv index c487062..6fb61c9 100644 --- a/seeds/source_orders.csv +++ b/seeds/source_orders.csv @@ -98,3 +98,7 @@ id,user_id,order_date,status 97,89,2018-04-07,placed 98,41,2018-04-07,placed 99,85,2018-04-09,placed +100,10,2018-04-16,completed +101,18,2018-04-17,shipped +102,89,2018-04-19,placed +102,88,2018-04-19,shipped \ No newline at end of file