Part One
走迷宫,从起点走到终点,找到最短的路径。只不过路径并不是常见的步数,而是尽量少的转弯
定位岔路口
根据之前的经验,迷宫中很多步是可以一次走多步的,如下图所示。因此只要找出关键的岔路口,这样就可以大大减少计算量。

先根据一个函数,计算出每一行,每一列中,连续的可行进路段
CREATE OR REPLACE FUNCTION find_dot_ranges(input_str text)
RETURNS TABLE(start_pos int, end_pos int) AS $$
DECLARE
i int := 1;
n int := length(input_str);
in_dot_sequence boolean := false;
current_start int;
BEGIN
WHILE i <= n LOOP
-- 当遇到点且不在点序列中时,开始新的序列
IF substring(input_str FROM i FOR 1) IN ('.', 'S', 'E') AND NOT in_dot_sequence THEN
current_start := i;
in_dot_sequence := true;
-- 当遇到非点且正在点序列中时,结束当前序列
ELSIF substring(input_str FROM i FOR 1) NOT IN ('.', 'S', 'E') AND in_dot_sequence THEN
start_pos := current_start;
end_pos := i - 1;
RETURN NEXT;
in_dot_sequence := false;
END IF;
i := i + 1;
END LOOP;
-- 处理字符串以点结尾的情况
IF in_dot_sequence THEN
start_pos := current_start;
end_pos := n;
RETURN NEXT;
END IF;
RETURN;
END;
$$ LANGUAGE plpgsql;
行列的可行进路段,会存在交叉点,这些也需要记录为岔路口,如下图所示

因此,通过一个函数,来将上一步计算出来的完整路程,根据行列交叉,分拆为多个路程。
CREATE OR REPLACE FUNCTION split_range(low int, high int, splits int[])
RETURNS TABLE(start_pos int, end_pos int) AS $$
DECLARE
current_value int := low;
BEGIN
IF splits is null THEN
start_pos := low;
end_pos := high;
RETURN NEXT;
ELSE
FOR i IN 1..#splits LOOP
IF splits[i] > current_value THEN
start_pos := current_value;
end_pos := splits[i];
current_value = splits[i];
RETURN NEXT;
END IF;
END LOOP;
IF high > current_value THEN
start_pos := current_value;
end_pos := high;
RETURN NEXT;
END IF;
END IF;
END;
$$ LANGUAGE plpgsql;
计算候选路程
有了上述两个函数之后,就可以计算出每个岔路口之间的路程,包括起点坐标、终点坐标、方向。
with recursive rows as (
SELECT (row_number() over ()) :: INTEGER as _row, line
FROM lance_input
), matrix AS (
SELECT _row :: INTEGER, x.idx :: INTEGER as _col, x.pos
FROM rows, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
), cols AS (
SELECT _col, string_agg(pos, '' order by _row) as line
FROM matrix
GROUP BY _col
), rows_range AS (
SELECT _row, start_pos, end_pos
FROM rows, find_dot_ranges(line) t
WHERE t.start_pos < t.end_pos
), cols_range AS (
SELECT _col, start_pos, end_pos
FROM cols, find_dot_ranges(line) t
WHERE t.start_pos < t.end_pos
), rows_new_range AS (
SELECT t._row, s.start_pos as _col, s.end_pos as next_col
FROM (
SELECT a._row, a.start_pos, a.end_pos, b.pos_array
FROM rows_range a
LEFT JOIN (
SELECT a._row, a.start_pos, a.end_pos, array_agg(b._col order by b._col) as pos_array
FROM rows_range a
JOIN cols_range b
ON a._row between b.start_pos and b.end_pos
AND b._col between a.start_pos and a.end_pos
GROUP BY a._row, a.start_pos, a.end_pos
) b
ON a._row = b._row
AND a.start_pos = b.start_pos
AND a.end_pos = b.end_pos
) t, split_range(start_pos, end_pos, pos_array) s
), cols_new_range AS (
SELECT t._col, s.start_pos as _row, s.end_pos as next_row
FROM (
SELECT a._col, a.start_pos, a.end_pos, b.pos_array
FROM cols_range a
LEFT JOIN (
SELECT b._col, b.start_pos, b.end_pos, array_agg(a._row order by a._row) as pos_array
FROM rows_range a
JOIN cols_range b
ON a._row between b.start_pos and b.end_pos
AND b._col between a.start_pos and a.end_pos
GROUP BY b._col, b.start_pos, b.end_pos
) b
ON a._col = b._col
AND a.start_pos = b.start_pos
AND a.end_pos = b.end_pos
) t, split_range(start_pos, end_pos, pos_array) s
), all_range AS (
SELECT _row as start_row, _col as start_col, _row as end_row, next_col as end_col, 'E' as direction
FROM rows_new_range
UNION ALL
SELECT _row as start_row, next_col as start_col, _row as end_row, _col as end_col, 'W' as direction
FROM rows_new_range
UNION ALL
SELECT _row as start_row, _col as start_col, next_row as end_row, _col as end_col, 'S' as direction
FROM cols_new_range
UNION ALL
SELECT next_row as start_row, _col as start_col, _row as end_row, _col as end_col, 'N' as direction
FROM cols_new_range
)
寻找最优路径
接下来就比较简单了,从S出发,在候选路程中挑选未走过的路继续前进,直到找到终点。
walkthrough AS (
SELECT _row, _col, 'E' as direction, 0 as steps, 0 as turns, array[_row * 10000 + _col] as path
FROM matrix
WHERE pos = 'S'
UNION ALL
SELECT *
FROM (
WITH walkthrough_inner as (select * From walkthrough)
SELECT b.end_row as _row,
b.end_col as _col,
b.direction,
a.steps + abs(b.end_row - b.start_row) + abs(b.end_col - b.start_col) as steps,
case when a.direction != b.direction then turns + 1 else turns end as turns,
path || (b.end_row * 10000 + b.end_col) as path
FROM walkthrough_inner a
JOIN all_range b
ON a._row = b.start_row
AND a._col = b.start_col
AND NOT (b.end_row * 10000 + b.end_col) = ANY(a.path)
WHERE NOT EXISTS (select * From walkthrough_inner where _row = 2 and _col = 16)
) t
)
SELECT * FROM walkthrough;
但是计算量比较大,性能上不太好,需要优化下。其实可以在每一步迭代的时候,在某一处坐标时,仅保留score最低的一个路径即可,其他可以忽略掉。
walkthrough AS (
SELECT _row, _col, 'E' as direction, 0 as steps, 0 as turns, array[_row * 10000 + _col] as path
FROM matrix
WHERE pos = 'S'
UNION ALL
SELECT _row, _col, direction, steps, turns, path
FROM (
WITH walkthrough_inner as (
SELECT _row, _col, direction, steps, turns, path
FROM (
SELECT *, row_number() over (partition by _row, _col, direction order by turns * 1000 + steps) as rn
From walkthrough
) t
WHERE rn = 1
)
SELECT b.end_row as _row,
b.end_col as _col,
b.direction,
a.steps + abs(b.end_row - b.start_row) + abs(b.end_col - b.start_col) as steps,
case when a.direction != b.direction then turns + 1 else turns end as turns,
path || (b.end_row * 10000 + b.end_col) as path
FROM walkthrough_inner a
JOIN all_range b
ON a._row = b.start_row
AND a._col = b.start_col
AND NOT (b.end_row * 10000 + b.end_col) = ANY(a.path)
WHERE NOT EXISTS (select * From walkthrough_inner where _row = 2 and _col = 16)
) t
)
SELECT * FROM walkthrough;
Part Two
需要找出所有潜在最优路径路过的坐标之和
上一步使用row_number(),因此过滤掉了很多潜在的最优路径。这一步需要找出所有的最优路径,因此这里需要替换为rank(),保证最优路径不会被过滤掉。
但是注意到,最后的过滤条件有WHERE NOT EXISTS (select * From walkthrough_inner where _row = 2 and _col = 16),也就是说,只要有路径达到了终点,那就立即终止搜索。这样做真的是正确的吗?
先达到就是最优吗?
因为我们的方法是每一次行走多步,先达到终点,并不意味着最终的score是最小的,score还要考虑转向的次数。通过计算,先达到终点的score是159524,而实际的最小score是143580。
postgres=# select turns * 1000 + steps from lance_test where _row = 2 and _col = 140;
?column?
----------
159524
159524
(2 rows)
postgres=# select turns * 1000 + steps from lance_test where _row = 2 and _col = 140 order by turns * 1000 + steps limit 2;
?column?
----------
143580
143580
(2 rows)
找到最优路径的集合
因此,先采用笨的方法,迭代一定次数后,基本上最优路径也就齐全了。
walkthrough AS (
SELECT _row, _col, 'E' as direction, 0 as steps, 0 as turns, array[_row * 10000 + _col] as path
FROM matrix
WHERE pos = 'S'
UNION ALL
SELECT _row, _col, direction, steps, turns, path
FROM (
WITH walkthrough_inner as (
SELECT _row, _col, direction, steps, turns, path
FROM (
SELECT *, row_number() over (partition by _row, _col, direction order by turns * 1000 + steps) as rn
From walkthrough
) t
WHERE rn = 1
)
SELECT b.end_row as _row,
b.end_col as _col,
b.direction,
a.steps + abs(b.end_row - b.start_row) + abs(b.end_col - b.start_col) as steps,
case when a.direction != b.direction then turns + 1 else turns end as turns,
path || (b.end_row * 10000 + b.end_col) as path
FROM walkthrough_inner a
JOIN all_range b
ON a._row = b.start_row
AND a._col = b.start_col
AND NOT (b.end_row * 10000 + b.end_col) = ANY(a.path)
) t
)
SELECT * FROM walkthrough limit 2000000;
通过下面的方法,将path分拆为一段段路径
postgres=# WITH example AS ( -- 示例数据(替换为实际表和列)
postgres(# SELECT ARRAY[1, 2, 3, 4] AS arr
postgres(# )
postgres-# SELECT
postgres-# arr[i] AS first, -- 当前元素
postgres-# arr[i + 1] AS next -- 下一个元素
postgres-# FROM
postgres-# example,
postgres-# generate_series( -- 生成索引序列
postgres(# array_lower(arr, 1), -- 数组起始索引(默认为 1)
postgres(# array_upper(arr, 1) - 1 -- 最后一个有效索引(确保 i+1 不越界)
postgres(# ) AS i;
first | next
-------+------
1 | 2
2 | 3
3 | 4
(3 rows)
最终,将path转换为许多段路径,去重之后,顶点数量+中间线段的数量,即是最终结果
WITH top_path AS (
select path from (select rank() over (order by turns * 1000 + steps) as rn, path from lance_test where _row = 2 and _col = 140) t where rn = 1
), all_route AS (
SELECT DISTINCT
path[i] AS first,
path[i + 1] AS next
FROM
top_path,
generate_series(
array_lower(path, 1),
array_upper(path, 1) - 1
) AS i
), all_coordinates AS (
SELECT first / 10000 as first_row,
first % 10000 as first_col,
next / 10000 as next_row,
next % 10000 as next_col
FROM all_route
)
SELECT sum(case when first_row = next_row then abs(first_col - next_col) - 1 else abs(first_row - next_row) - 1 end)
FROM all_coordinates
UNION ALL
SELECT COUNT(DISTINCT coordinate)
FROM (
SELECT first as coordinate
FROM all_route
UNION ALL
SELECT next as coordinate
FROM all_route
) t