— Day 4: Printing Department —
Part One
找出某个位置上下左右以及四个对角8个方位上,少于4个@的个数..@..@ .@@.@
关联后聚合
可以直接关联查询,关联查询的条件就是行、列在1个距离内,且非当前位置。
with rows as (
SELECT (row_number() over ()) :: INTEGER as _row, line
FROM lance_input
), matrix AS (
SELECT _row :: INTEGER, x.idx :: INTEGER as _col, x.pos
FROM rows, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
)
SELECT COUNT(*)
FROM (
SELECT a._row, a._col
FROM matrix a
JOIN matrix b
ON abs(a._row - b._row) <= 1
AND abs(a._col - b._col) <= 1
AND a.pos = '@'
AND b.pos = '@'
AND NOT (a._row = b._row AND a._col = b._col)
GROUP BY a._row, a._col
HAVING count(*) < 4
) t;
正确的关联
但是上述的结果缺失错误的,缺失了几个位置。因为如果其8个方位都没有的话,则不会出现在最终的聚合结果中。因此需要修改下关联条件,允许有当前位置
with rows as (
SELECT (row_number() over ()) :: INTEGER as _row, line
FROM lance_input
), matrix AS (
SELECT _row :: INTEGER, x.idx :: INTEGER as _col, x.pos
FROM rows, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
)
SELECT COUNT(*)
FROM (
SELECT a._row, a._col
FROM matrix a
JOIN matrix b
ON abs(a._row - b._row) <= 1
AND abs(a._col - b._col) <= 1
AND a.pos = '@'
AND b.pos = '@'
GROUP BY a._row, a._col
HAVING count(*) <= 4
) t;
性能优化
上述的方法特别慢,需要耗时超过了20秒
count
-------
1320
(1 row)
Time: 23506.176 ms (00:23.506)
换种思路,直接将某个位置的8个邻居查询出来,然后直接判断即可。partition的分组,除了行与列,还有两个对角线。
with rows as (
SELECT (row_number() over ()) :: INTEGER as _row, line
FROM lance_input
), matrix AS (
SELECT _row :: INTEGER, x.idx :: INTEGER as _col, x.pos
FROM rows, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
), matrix_neighbours AS (
SELECT _row, _col, pos,
lead(pos) over (partition by _col order by _row) as down_pos,
lag(pos) over (partition by _col order by _row) as up_pos,
lead(pos) over (partition by _row order by _col) as right_pos,
lag(pos) over (partition by _row order by _col) as left_pos,
lead(pos) over (partition by _row - _col order by _row) as se_pos,
lag(pos) over (partition by _row - _col order by _row) as nw_pos,
lead(pos) over (partition by _row + _col order by _col) as ne_pos,
lag(pos) over (partition by _row + _col order by _col) as sw_pos
FROM matrix
)
SELECT COUNT(*)
FROM matrix_neighbours
WHERE concat(down_pos, up_pos, right_pos, left_pos, se_pos, nw_pos, ne_pos, sw_pos) not like '%@%@%@%@%'
AND pos = '@';
最终的性能提升非常明显
count
-------
1320
(1 row)
Time: 76.320 ms
Part Two
将第一部分符合条件的位置移除,重复运行直到没有更多符合条件的位置,最终移除了多少个?
通过递归查询,重复运行第一部分的判断即可,并将符合记录的位置标记为x,最终计算x的个数即可
with recursive rows as (
SELECT (row_number() over ()) :: INTEGER as _row, line
FROM lance_input
), matrix AS (
SELECT _row :: INTEGER, x.idx :: INTEGER as _col, x.pos
FROM rows, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
), matrix_neighbours AS (
SELECT 1 as step, _row, _col, pos, true as forklift
FROM matrix
UNION ALL
SELECT *
FROM (
WITH inner_tbl AS (select * from matrix_neighbours)
SELECT step + 1 as step, _row, _col, case when pos = '@' and neighbours not like '%@%@%@%@%' then 'x' else pos end as pos,
pos = '@' and neighbours not like '%@%@%@%@%' as forklift
FROM (
SELECT step, _row, _col, pos, concat(
lead(pos) over (partition by _col order by _row),
lag(pos) over (partition by _col order by _row),
lead(pos) over (partition by _row order by _col),
lag(pos) over (partition by _row order by _col),
lead(pos) over (partition by _row - _col order by _row),
lag(pos) over (partition by _row - _col order by _row),
lead(pos) over (partition by _row + _col order by _col),
lag(pos) over (partition by _row + _col order by _col)
) as neighbours
FROM inner_tbl
) t
WHERE exists (select * from inner_tbl where forklift)
) t
)
SELECT count(*) from matrix_neighbours where step = 69 and pos = 'x';
迭代了68次后,最终耗时4秒左右运行完成
SELECT count(*) from matrix_neighbours where step = 69 and pos = 'x';
count
-------
8354
(1 row)
Time: 4611.715 ms (00:04.612)