SQL 使用场景（二）

发表于 2023-06-01

场景描述

根据上一行填充本行的空白栏位^[1]。这是在参考资料中提出的一个问题，它给出了在 SQL Server 数据库中的解决方案，现在要在 PostgreSQL 中来模拟并解决它。

假设在 PostgreSQL 12.10 的库中有一个用户的考试成绩表

create table t_score (
    user_id integer,   -- 用户 ID
    user_name varchar, -- 用户姓名
    exam_time integer, -- 考试时间，yyyyMM 格式
    score integer      -- 分数
);

在成绩表中有如下的一些测试数据

-- 用户 11 的考试成绩
insert into t_score(user_id, user_name, exam_time, score) values(11, 'u1', 202209, 32);
insert into t_score(user_id, user_name, exam_time, score) values(11, 'u1', 202303, 30);
insert into t_score(user_id, user_name, exam_time, score) values(11, 'u1', 202304, 31);
insert into t_score(user_id, user_name, exam_time, score) values(11, 'u1', 202308, 35);
insert into t_score(user_id, user_name, exam_time, score) values(11, 'u1', 202309, 37);

-- 用户 12 的考试成绩
insert into t_score(user_id, user_name, exam_time, score) values(12, 'u2', 202304, 51);
insert into t_score(user_id, user_name, exam_time, score) values(12, 'u2', 202308, 65);
insert into t_score(user_id, user_name, exam_time, score) values(12, 'u2', 202309, 77);

我们期望写一句 SQL 查询出用户 11 的成绩，在没有考试的月份的成绩为最近一次有考试的月份的成绩，下面是我们期望的结果

在结果中有成绩的月份都用红色进行了标记，其他没有考试的月份的成绩为最近一次有考试成绩的月份的成绩。比如在 202210 月没有考试，则该月的成绩为 202209 月的成绩，一直到 202302 月。又比如 202305 月没有考试，则该月的成绩为 202304 月的成绩。

场景分析

-- 递归是针对名称为 t_score_b 的 CTE
with recursive t_score_a as (
    -- 构建月份与成绩的关系，并为每一行分配一个连续的行号
    select a.exam_time, b.score, row_number() over (order by a.exam_time) as row_no
    from (
        -- 构建连续的月份
        select to_char(
            generate_series(
                (select min(to_date(exam_time::text, 'YYYYMM')) from t_score where user_id = 11)::date,
                (select max(to_date(exam_time::text, 'YYYYMM')) from t_score where user_id = 11)::date,
                '1 month'::interval
            )::date,
            'YYYYMM'
        )::integer as exam_time
    ) as a left join t_score as b on a.exam_time = b.exam_time and b.user_id = 11
), t_score_b as (
    -- 非递归项，查询出成绩不为空的行
    select exam_time, score, row_no from t_score_a where score is not null
    union all
    -- 递归项，注意 select 子句中各个字段的取值来源
    select t_score_a.exam_time, t_score_b.score, t_score_a.row_no
    -- 这里的 t_score_b 是递归自引用
    from t_score_b inner join t_score_a
        -- 递归的链接条件
        on t_score_b.row_no + 1 = t_score_a.row_no
    -- 只处理 t_score_a 中成绩为空的行
    where t_score_a.score is null
)
select exam_time, score
from t_score_b
order by exam_time;