Skip to content

Appendix M: Feature Engineering

-- Use a CTE to first aggregate repair orders
WITH repair_orders AS (
    SELECT
        g.VHCL_ID,
        g.VIN_ID,
        s.retail_dt,
        ARRAY_AGG(
            OBJECT_CONSTRUCT(
                'repair_order_id', ro.REPAIR_ORDER_NB,
                'repair_order_open_date', ro.REPAIR_ORDER_OPEN_DT,
                'repair_order_close_date', ro.REPAIR_ORDER_CLOSE_NB
            )
        ) AS repair_orders
       FROM c360_data_derived.DM_GOLD_CSTMR c
JOIN c360_data_derived.DM_GOLD_CSTMR_VHCL g ON g.cstmr_id = c.cstmr_id
JOIN c360_data_derived.FC_SALE s ON s.vin_id = g.vin_id
JOIN c360_data_derived.DM_GOLD_CSTMR_LNAGE cl ON cl.transctn_mstr_id = s.cstmr_transctn_mstr_id   
LEFT JOIN c360_data_derived.FC_REPAIR_ORDER ro ON ro.vin_id = g.vin_id
    -- AND ( g.disposal_dt IS NULL OR ( ro.REPAIR_ORDER_OPEN_DT <= g.disposal_dt
    --                                     AND TRY_TO_DATE(ro.repair_order_close_nb) >= s.retail_dt ))
    -- OR (g.disposal_dt IS NULL AND TRY_TO_DATE(ro.repair_order_close_nb) >= s.retail_dt)
   AND ( (g.disposal_dt IS NULL AND TRY_TO_DATE(ro.repair_order_close_nb) >= s.retail_dt)
            OR (g.disposal_dt IS NOT NULL AND ( ro.REPAIR_ORDER_OPEN_DT <= g.disposal_dt
                                        AND TRY_TO_DATE(ro.repair_order_close_nb) >= s.retail_dt ))
        )
WHERE c.cstmr_frst_nm LIKE 'W%' AND c.cstmr_last_nm = 'FERNANDEZ ARIAS'
AND s.status_cd IS NULL AND s.rewind_dt IS NULL AND s.new_used_in = 'N'
    GROUP BY g.VHCL_ID, g.VIN_ID, s.retail_dt
),

-- Aggregate purchases with repair orders
purchases AS (
    SELECT
        c.cstmr_id,
        c.CSTMR_FRST_NM AS first_name,
        c.CSTMR_LAST_NM AS last_name,
        c.cstmr_full_nm AS full_name,
        c.clnsd_cntct AS phone_number, -- Assuming phone number is not provided in the source data
        ARRAY_AGG(
            OBJECT_CONSTRUCT(
                'purchase_id', s.retail_dt,
                'purchase_date', s.retail_dt,
                'items', ARRAY_CONSTRUCT(
                    OBJECT_CONSTRUCT(
                        'item_id', g.VHCL_ID,
                        'item_name', g.VIN_ID,
                        'quantity', 1,
                        'price', 0.00, -- Assuming price is not provided in the source data
                        'repair_orders', r.repair_orders
                    )
                )
            )
        ) AS purchase_history
    FROM c360_data_derived.DM_GOLD_CSTMR c
    JOIN c360_data_derived.DM_GOLD_CSTMR_VHCL g ON g.cstmr_id = c.cstmr_id
    JOIN c360_data_derived.FC_SALE s ON s.vin_id = g.vin_id
    JOIN c360_data_derived.DM_GOLD_CSTMR_LNAGE cl ON cl.transctn_mstr_id = s.cstmr_transctn_mstr_id
    JOIN repair_orders r ON r.VHCL_ID = g.VHCL_ID AND r.retail_dt = s.retail_dt
    WHERE c.cstmr_frst_nm LIKE 'W%' AND c.cstmr_last_nm = 'FERNANDEZ ARIAS'
      AND s.status_cd IS NULL AND s.rewind_dt IS NULL AND s.new_used_in = 'N'
    GROUP BY c.CSTMR_FRST_NM, c.CSTMR_LAST_NM, c.cstmr_full_nm, c.clnsd_cntct, c.cstmr_id
)

SELECT cstmr_id, first_name, last_name, phone_number, purchase_history
FROM purchases;