基于SQL的分析挖掘案例

发布于:2024-01-25 ⋅ 阅读:(53) ⋅ 点赞:(0)

基于SQL的分析挖掘案例

背景: 一个餐厅,经营一段时间的销售数据, 根据销售数据分析客户喜好, 以提升餐厅的营业水平(素材来自网络) ;

1- 数据准备
-- t_sales: 销售订单表(customer_id: 客户id, order_date: 订单日期, product_id: 商品id)
WITH t_sales AS (
    SELECT * FROM (
        VALUES('A', '2023-11-01', '1'), ('C', '2023-11-07', '1') 
            , ('A', '2023-11-01', '2'), ('C', '2023-11-11', '2') 
            , ('A', '2023-11-07', '2'), ('C', '2023-11-15', '2') 
            , ('A', '2023-11-10', '3'), ('C', '2023-11-19', '3') 
            , ('A', '2023-11-11', '3'), ('D', '2023-11-02', '3') 
            , ('A', '2023-11-11', '3'), ('C', '2023-12-02', '3') 
            , ('B', '2023-11-01', '2'), ('D', '2023-11-05', '2') 
            , ('B', '2023-11-02', '2'), ('D', '2023-11-08', '2') 
            , ('B', '2023-11-04', '1'), ('D', '2023-11-09', '1') 
            , ('B', '2023-11-11', '1'), ('D', '2023-11-11', '1') 
            , ('B', '2023-11-16', '3'), ('E', '2023-11-01', '3') 
            , ('B', '2023-12-01', '3'), ('D', '2023-12-18', '3') 
            , ('C', '2023-11-01', '3'), ('E', '2023-11-03', '3') 
            , ('C', '2023-11-01', '3'), ('E', '2023-11-08', '3') 
            , ('C', '2023-11-07', '3'), ('E', '2023-11-12', '3') 
    ) AS tbl_name(customer_id, order_date, product_id)
)

-- t_product: 商品表(product_id: 商品id, product_name: 商品名字, price: 单价)
, t_product AS (
    SELECT * FROM (
        VALUES('1', 'pasta', '20')
            , ('2', 'salad', '25')
            , ('3', 'sauce', '22')
    ) AS tbl_name(product_id, product_name, price)
)

-- t_member: 会员表(member_id: 会员id, join_date: 加入日期)
, t_member AS (
    SELECT * FROM (
        VALUES('A', '2023-11-07')
            , ('C', '2023-11-09')
        	, ('D', '2023-11-10')
    ) AS tbl_name(member_id, join_date)
)
2- 题目
  1. 每位客户消费总额是多少
SELECT a.customer_id, SUM(b.price) AS sales_amount 
FROM t_sales a 
JOIN t_product b 
ON a.product_id = b.product_id
GROUP BY a.customer_id
;
customer_id sales_amount
A 136.0
B 134.0
C 180.0
D 134.0
E 88.0
  1. 每位客户总共消费天数
SELECT a.customer_id, COUNT(DISTINCT order_date) AS date_ct
FROM t_sales a 
GROUP BY a.customer_id
;
customer_id date_ct
A 4
B 6
C 6
D 6
E 4
  1. 每位客户购买的第一份食物是什么
SELECT customer_id, product_name
FROM ( -- 使用开窗函数,按照日期排序,获得购买食物排序
    SELECT 
          customer_id, product_id
        , RANK() OVER(PARTITION BY customer_id ORDER BY order_date) AS rk
    FROM t_sales 
) a 
JOIN t_product b 
ON a.product_id = b.product_id
WHERE a.rk = 1
GROUP BY customer_id, product_name
;
customer_id product_name
A pasta
A salad
B salad
C sauce
D sauce
E sauce
  1. 最受欢迎的食物是哪一个, 一共被购买了多少次, 每位顾客购买了多少次
SELECT 
      customer_id, product_name, customer_buy_ct, buy_ct
FROM (
    SELECT 
          customer_id, product_id, customer_buy_ct, buy_ct
        , RANK() OVER(ORDER BY buy_ct DESC) AS rk 
    FROM (
        SELECT customer_id, product_id
            , COUNT(1) OVER(PARTITION BY customer_id, product_id) AS customer_buy_ct -- 每位顾客购买次数
            , COUNT(1) OVER(PARTITION BY product_id) AS buy_ct -- 总共购买次数
        FROM t_sales
    )
    GROUP BY customer_id, product_id, customer_buy_ct, buy_ct
) a 
JOIN t_product b 
ON a.product_id = b.product_id
WHERE a.rk = 1
ORDER BY customer_id
;
customer_id product_name customer_buy_ct buy_ct
A sauce 3 16
B sauce 2 16
C sauce 5 16
D sauce 2 16
E sauce 4 16
  1. 每位顾客最喜欢的食物分别是什么
SELECT customer_id, product_name, customer_buy_ct
FROM (
    SELECT 
          customer_id, product_name, customer_buy_ct
        , RANK() OVER(PARTITION BY customer_id ORDER BY customer_buy_ct DESC) AS rk
    FROM (
        SELECT 
              customer_id, product_name
            , COUNT(1) AS customer_buy_ct -- 购买次数
        FROM t_sales a 
        JOIN t_product b
        ON a.product_id = b.product_id
        GROUP BY customer_id, product_name
    ) a 
) b 
WHERE rk =1
;
customer_id product_name customer_buy_ct
A sauce 3
B pasta 2
B salad 2
B sauce 2
C sauce 5
D pasta 2
D salad 2
D sauce 2
E sauce 4
  1. 顾客成为会员后, 最先购买的是什么
SELECT customer_id, product_name, order_date
FROM (
    SELECT 
          customer_id, product_name, order_date
        , RANK() OVER(PARTITION BY customer_id ORDER BY order_date) AS rk 
    FROM (
        SELECT 
              customer_id, order_date, product_id, join_date
        FROM t_member a 
        LEFT JOIN t_sales b ON a.member_id = b.customer_id
    ) a 
    JOIN t_product b ON a.product_id = b.product_id
    WHERE order_date >= join_date
) t 
WHERE rk = 1
GROUP BY customer_id, product_name, order_date
;
customer_id product_name order_date
A salad 2023-11-07
C salad 2023-11-11
D pasta 2023-11-11
  1. 顾客在成为会员之前, 最后购买的产品是什么
SELECT customer_id, product_name, order_date
FROM (
    SELECT 
          customer_id, product_name, order_date
        , RANK() OVER(PARTITION BY customer_id ORDER BY order_date DESC ) AS rk 
    FROM (
        SELECT 
              customer_id, order_date, product_id, join_date
        FROM t_member a 
        LEFT JOIN t_sales b ON a.member_id = b.customer_id
    ) a 
    JOIN t_product b ON a.product_id = b.product_id
    WHERE order_date < join_date
) t 
WHERE rk = 1
GROUP BY customer_id, product_name, order_date
;
customer_id product_name order_date
A pasta 2023-11-01
A salad 2023-11-01
C pasta 2023-11-07
C sauce 2023-11-07
D pasta 2023-11-09
  1. 顾客在成为会员之前, 总的购买产品数,和总的消费金额
SELECT 
      customer_id
    , COUNT(a.product_id) AS sales_quantity
    , SUM(price) AS sales_amount
FROM (
    SELECT 
          customer_id, order_date, product_id, join_date
    FROM t_member a 
    LEFT JOIN t_sales b ON a.member_id = b.customer_id
) a 
JOIN t_product b ON a.product_id = b.product_id
WHERE order_date < join_date
GROUP BY customer_id
;
customer_id sales_quantity sales_amount
A 2 45.0
C 4 86.0
D 4 92.0
  1. 每消费1元有10积分, sauce 有双倍积分, 每位顾客有多少积分
SELECT 
      customer_id
    , SUM(CASE WHEN product_name = 'sauce' THEN price * 2 * 10 ELSE price * 10 END) AS total_scores
FROM (
    SELECT customer_id, product_name, price
    FROM t_sales a 
    JOIN t_product b ON a.product_id = b.product_id
) a 
GROUP BY customer_id
;
customer_id total_scores
A 2020.0
B 1780.0
C 2900.0
D 1780.0
E 1760.0
  1. 每消费1元有10积分, sauce 有双倍积分, 成为会员后所有产品都是双倍积分, 统计在11月份之前, 每位顾客积分是多少
SELECT 
      customer_id
    -- 判断是否是 'sauce' 或者 是成为会员后的订单, 统计积分
    , SUM(CASE WHEN (product_name = 'sauce' OR order_date >= join_date) THEN price * 2 * 10 ELSE price * 10 END) AS total_scores
FROM (
    SELECT customer_id, product_name, price, order_date
        , NVL(join_date, '9999-12-31') AS join_date  -- 不是会员的日期置为最大日期
    FROM t_sales a 
    JOIN t_product b ON a.product_id = b.product_id
    LEFT JOIN t_member c ON a.customer_id = c.member_id
    WHERE order_date <= '2023-11-30'  -- 11月份订单
) a
GROUP BY customer_id
;
customer_id total_scores
A 2270.0
B 1340.0
C 2960.0
D 1540.0
E 1760.0
  1. 创建新的宽表, 包含字段 customer_id, product_name, price, order_date, member
SELECT 
      customer_id, product_name, price, order_date
    -- 判断是否是 会员后的订单
    , CASE WHEN order_date >= join_date THEN 'Y' ELSE 'N' END AS member
FROM (
    SELECT customer_id, product_name, price, order_date
        , NVL(join_date, '9999-12-31') AS join_date  -- 不是会员的日期置为最大日期
    FROM t_sales a 
    JOIN t_product b ON a.product_id = b.product_id
    LEFT JOIN t_member c ON a.customer_id = c.member_id
) a
;
customer_id product_name price order_date member
A pasta 20 2023-11-01 N
A salad 25 2023-11-01 N
A salad 25 2023-11-07 Y
A sauce 22 2023-11-10 Y
A sauce 22 2023-11-11 Y
A sauce 22 2023-11-11 Y
B salad 25 2023-11-01 N
B salad 25 2023-11-02 N
B pasta 20 2023-11-04 N
B pasta 20 2023-11-11 N
B sauce 22 2023-11-16 N
B sauce 22 2023-12-01 N
C pasta 20 2023-11-07 N
C salad 25 2023-11-11 Y
C salad 25 2023-11-15 Y
C sauce 22 2023-11-19 Y
C sauce 22 2023-12-02 Y
C sauce 22 2023-11-01 N
C sauce 22 2023-11-01 N
C sauce 22 2023-11-07 N
D sauce 22 2023-11-02 N
D salad 25 2023-11-05 N
D salad 25 2023-11-08 N
D pasta 20 2023-11-09 N
D pasta 20 2023-11-11 Y
D sauce 22 2023-12-18 Y
E sauce 22 2023-11-01 N
E sauce 22 2023-11-03 N
E sauce 22 2023-11-08 N
E sauce 22 2023-11-12 N
  1. 对顾客购买产品按时间升序排序, 区分会员与非会员, 非会员不参与排序, 记为NULL
SELECT
      customer_id, product_name, price, order_date, member
      -- 直接在上一个结果上进行子查询, 是会员的参与排序
    , CASE WHEN member = 'Y' THEN (RANK() OVER(PARTITION BY customer_id, member ORDER BY order_date)) ELSE NULL END AS rk
FROM (
    SELECT 
          customer_id, product_name, price, order_date
        , CASE WHEN order_date >= join_date THEN 'Y' ELSE 'N' END AS member
    FROM (
        SELECT customer_id, product_name, price, order_date
            , NVL(join_date, '9999-12-31') AS join_date  -- 不是会员的日期置为最大日期
        FROM t_sales a 
        JOIN t_product b ON a.product_id = b.product_id
        LEFT JOIN t_member c ON a.customer_id = c.member_id
    ) a
) b
;
customer_id product_name price order_date member rk
A salad 25 2023-11-01 N \N
A pasta 20 2023-11-01 N \N
A salad 25 2023-11-07 Y 1
A sauce 22 2023-11-10 Y 2
A sauce 22 2023-11-11 Y 3
A sauce 22 2023-11-11 Y 3
B salad 25 2023-11-01 N \N
B salad 25 2023-11-02 N \N
B pasta 20 2023-11-04 N \N
B pasta 20 2023-11-11 N \N
B sauce 22 2023-11-16 N \N
B sauce 22 2023-12-01 N \N
C sauce 22 2023-11-01 N \N
C sauce 22 2023-11-01 N \N
C sauce 22 2023-11-07 N \N
C pasta 20 2023-11-07 N \N
C salad 25 2023-11-11 Y 1
C salad 25 2023-11-15 Y 2
C sauce 22 2023-11-19 Y 3
C sauce 22 2023-12-02 Y 4
D sauce 22 2023-11-02 N \N
D salad 25 2023-11-05 N \N
D salad 25 2023-11-08 N \N
D pasta 20 2023-11-09 N \N
D pasta 20 2023-11-11 Y 1
D sauce 22 2023-12-18 Y 2
E sauce 22 2023-11-01 N \N
E sauce 22 2023-11-03 N \N
E sauce 22 2023-11-08 N \N
E sauce 22 2023-11-12 N \N
end
本文含有隐藏内容,请 开通VIP 后查看