Day 12: Hill Climbing Algorithm
Part One
走迷宫,从S走到E的最少步数,唯一限制是每一步的编码最多增加1,可以相等或减少
Sabqponm
abcryxxl
accszExk
acctuvwj
abdefghi
穷举路径
直观的想法就是穷举所有的路径,并且从最终有效路径中挑选出步数最少的
WITH RECURSIVE maze AS (
SELECT _row, row_number() over (partition by _row) as _col, pos
FROM (
SELECT row_number() over () as _row, unnest(regexp_split_to_array(line, '')) as pos
FROM lance_input
)t
),
path AS (
SELECT _row :: integer, _col :: integer, 'a' as pos, '{}' :: integer[] as trail, true as valid
FROM maze
WHERE pos = 'S'
UNION ALL
SELECT t._row, t._col, s.pos,
t.trail + (t._row * 1000 + t._col) as trail,
ascii(case when s.pos = 'E' then 'z' else s.pos end) - ascii(t.pos) between 0 and 1 as valid
FROM (
SELECT case when y.direction = 'N' then x._row - 1
when y.direction = 'S' then x._row + 1
else x._row
end as _row,
case when y.direction = 'E' then x._col + 1
when y.direction = 'W' then x._col - 1
else x._col
end as _col,
x.pos,
x.trail,
y.direction
FROM path x, (select unnest(regexp_split_to_array('ESWN', '')) as direction) y
WHERE x.valid
AND x.pos != 'E'
) t
JOIN maze s
ON t._row = s._row
AND t._col = s._col
AND t.trail # (t._row * 1000 + t._col) = 0
)
SELECT min(#trail) from path where pos = 'E' and valid;
min
-----
31
(1 row)
Time: 6.030 ms
样例数据运行很快,但是实际运用到真实例子的时候,运行很慢,8分钟都没有出来结果,看来需要优化下。
Dijkstra最短路径
计算最短路径,使用经典的Dijkstra算法即可。
首先用一张表来记录每个点距离起点的距离,以及是否被看见的标记。距离初始化为10000,后续的计算会不断更新最短距离。
DROP TABLE origin;
CREATE TABLE origin
AS
WITH RECURSIVE maze AS (
SELECT _row, row_number() over (partition by _row) as _col, pos
FROM (
SELECT row_number() over () as _row, unnest(regexp_split_to_array(line, '')) as pos
FROM lance_input
)t
)
SELECT _row :: integer, _col :: integer, pos, 10000 as dist, false as seen
FROM maze;
CREATE INDEX idx_origin on origin(_row, _col);
然后,通过一个函数,实现了Dijkstra算法。具体算法的话就是:
- 初始化起点的距离为0
- 找出距离起点最短的顶点,记录为seen已看见
- 更新seen节点的邻居的距离,就是seen节点距离+1,最终取最短的
- 重复2-3两步,直到所有的节点都已看见
CREATE OR REPLACE FUNCTION dijkstra()
RETURNS VOID AS $$
DECLARE
unseen INTEGER;
BEGIN
-- INIT Start
UPDATE origin SET dist = 0 WHERE pos = 'S';
-- WHILE loop
WHILE true LOOP
-- any left unseen
unseen := (SELECT count(*) FROM origin WHERE seen = false);
IF unseen = 0 THEN
RETURN;
END IF;
RAISE NOTICE '(%) unseen left', unseen;
-- FIND SHORTEST UNSEEN
UPDATE origin SET seen = true
FROM (
SELECT _row, _col
FROM (
SELECT _row, _col, rank() over (order by dist) as rn
FROM origin
WHERE NOT seen
) s
WHERE s.rn = 1
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen;
-- UPDATE neighbors
UPDATE origin SET dist = t.dist
FROM (
SELECT _row, _col, dist,
row_number() over (partition by _row, _col order by dist) as rn
FROM (
SELECT b._row, b._col, a.dist + 1 as dist
FROM (select * from origin where seen) a
JOIN (select * from origin where not seen) b
ON ((a._row = b._row AND a._col + 1 = b._col)
OR (a._row = b._row AND a._col - 1 = b._col)
OR (a._row + 1 = b._row AND a._col = b._col)
OR (a._row - 1 = b._row AND a._col = b._col))
AND ascii(case when b.pos = 'E' then 'z' else b.pos end) - ascii(case when a.pos = 'S' then 'a' else a.pos end) <= 1
) s
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen
AND rn = 1;
END LOOP;
END;
$$ LANGUAGE plpgsql;
最终20多秒就运算出来了
postgres=# select dijkstr();
NOTICE: (6642) unseen left
NOTICE: (6641) unseen left
NOTICE: (6638) unseen left
NOTICE: (6633) unseen left
NOTICE: (6626) unseen left
NOTICE: (6617) unseen left
…………
NOTICE: (54) unseen left
NOTICE: (46) unseen left
NOTICE: (43) unseen left
NOTICE: (40) unseen left
NOTICE: (39) unseen left
dijkstr
---------
(1 row)
Time: 24942.950 ms (00:24.943)
postgres=# select * From origin where pos = 'E';
_row | _col | pos | dist | seen
------+------+-----+------+------
21 | 138 | E | 437 | t
(1 row)
Part Two
从任一的a出发,找到到达E的最短路径
多起点逐个计算
将每个a作为起点,逐个计算最短路径即可
CREATE OR REPLACE FUNCTION dijkstra2(start_row INTEGER, start_col INTEGER)
RETURNS INTEGER AS $$
DECLARE
unseen INTEGER;
result INTEGER;
BEGIN
-- INIT DATA
DROP TABLE origin;
CREATE TABLE origin
AS
WITH RECURSIVE maze AS (
SELECT _row, row_number() over (partition by _row) as _col, pos
FROM (
SELECT row_number() over () as _row, unnest(regexp_split_to_array(line, '')) as pos
FROM lance_input
)t
)
SELECT _row :: integer, _col :: integer, pos, 10000 as dist, false as seen
FROM maze;
CREATE INDEX idx_origin on origin(_row, _col);
-- INIT Start
UPDATE origin SET dist = 0 WHERE _row = start_row AND _col = start_col;
-- WHILE loop
WHILE true LOOP
-- any left unseen
unseen := (SELECT count(*) FROM origin WHERE seen = false);
IF unseen = 0 THEN
EXIT;
END IF;
-- FIND SHORTEST UNSEEN
UPDATE origin SET seen = true
FROM (
SELECT _row, _col
FROM (
SELECT _row, _col, rank() over (order by dist) as rn
FROM origin
WHERE NOT seen
) s
WHERE s.rn = 1
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen;
-- UPDATE neighbors
UPDATE origin SET dist = t.dist
FROM (
SELECT _row, _col, dist,
row_number() over (partition by _row, _col order by dist) as rn
FROM (
SELECT b._row, b._col, a.dist + 1 as dist
FROM (select * from origin where seen) a
JOIN (select * from origin where not seen) b
ON ((a._row = b._row AND a._col + 1 = b._col)
OR (a._row = b._row AND a._col - 1 = b._col)
OR (a._row + 1 = b._row AND a._col = b._col)
OR (a._row - 1 = b._row AND a._col = b._col))
AND ascii(case when b.pos = 'E' then 'z' else b.pos end) - ascii(case when a.pos = 'S' then 'a' else a.pos end) <= 1
AND NOT (b.pos = 'a' AND a.pos = 'c')
) s
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen
AND rn = 1;
END LOOP;
RAISE NOTICE '(%, %) finish', start_row, start_col;
result := (SELECT dist FROM origin WHERE pos = 'E');
return result;
END;
$$ LANGUAGE plpgsql;
起点筛选
但是真实例子中,起点非常多,需要进行精简。原始的起点有2166个。
postgres=# select count(*)
postgres-# from origin
postgres-# where pos IN ('a', 'S') ;
count
-------
2166
(1 row)
但是仔细观察可以发现,有很多a是无法达到终点的,因为它们都被c包围着。因此需要过滤掉这些无效的a。
可以在Dijkstra算法过程中,故意不允许c->a的路径,从而找出这些孤立的a。
SELECT b._row, b._col, a.dist + 1 as dist
FROM (select * from origin where seen) a
JOIN (select * from origin where not seen) b
ON ((a._row = b._row AND a._col + 1 = b._col)
OR (a._row = b._row AND a._col - 1 = b._col)
OR (a._row + 1 = b._row AND a._col = b._col)
OR (a._row - 1 = b._row AND a._col = b._col))
AND ascii(case when b.pos = 'E' then 'z' else b.pos end) - ascii(case when a.pos = 'S' then 'a' else a.pos end) <= 1
AND NOT (b.pos = 'a' AND a.pos = 'c')
这样过滤后,可以减少到343个
postgres=# select count(*)
postgres-# from origin
postgres-# where pos IN ('a', 'S')
postgres-# and dist != 10000;
count
-------
343
(1 row)
再仔细观察,从a到c一定要经过b,那么靠近b的a只会出现在1,3两行,这样可以精简到68个起点
postgres=# select count(*)
postgres-# from origin
postgres-# where pos IN ('a', 'S')
postgres-# and dist != 10000
postgres-# and _col in (1,3);
count
-------
68
(1 row)
计算68个起点的所有距离后,找出最短距离,总共耗时接近半小时,效率还是很低的。
postgres=# select _row, _col, dijkstra2(_row, _col) as dist from lance_test order by 3;
_row | _col | dist
------+------+------
28 | 1 | 430
27 | 1 | 431
27 | 3 | 431
29 | 1 | 431
Time: 1796312.193 ms (29:56.312)
反向寻找
其实可以换个思路,求出从E出发,达到a或者S的最短距离即可。从E出发,其规则需要和原始规则恰好相反,也就是最多减少1,增加没有约束。
CREATE OR REPLACE FUNCTION dijkstra_reverse()
RETURNS VOID AS $$
DECLARE
unseen INTEGER;
BEGIN
-- INIT Start
UPDATE origin SET dist = 0 WHERE pos = 'E';
-- WHILE loop
WHILE true LOOP
-- any left unseen
unseen := (SELECT count(*) FROM origin WHERE seen = false);
IF unseen = 0 THEN
RETURN;
END IF;
RAISE NOTICE '(%) unseen left', unseen;
-- FIND SHORTEST UNSEEN
UPDATE origin SET seen = true
FROM (
SELECT _row, _col
FROM (
SELECT _row, _col, rank() over (order by dist) as rn
FROM origin
WHERE NOT seen
) s
WHERE s.rn = 1
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen;
-- UPDATE neighbors
UPDATE origin SET dist = t.dist
FROM (
SELECT _row, _col, dist,
row_number() over (partition by _row, _col order by dist) as rn
FROM (
SELECT b._row, b._col, a.dist + 1 as dist
FROM (select * from origin where seen) a
JOIN (select * from origin where not seen) b
ON ((a._row = b._row AND a._col + 1 = b._col)
OR (a._row = b._row AND a._col - 1 = b._col)
OR (a._row + 1 = b._row AND a._col = b._col)
OR (a._row - 1 = b._row AND a._col = b._col))
AND ascii(case when b.pos = 'S' then 'a' else b.pos end) - ascii(case when a.pos = 'E' then 'z' else a.pos end) >= -1
) s
) t
WHERE origin._row = t._row AND origin._col = t._col
AND NOT seen
AND rn = 1;
END LOOP;
END;
$$ LANGUAGE plpgsql;
效率提升很明显,只需要11秒就运行出了结果
Time: 11979.761 ms (00:11.980)
postgres=# select * From origin where pos in ('a', 'S') order by dist limit 3;
_row | _col | pos | dist | seen
------+------+-----+------+------
28 | 1 | a | 430 | t
27 | 3 | a | 431 | t
27 | 1 | a | 431 | t
(3 rows)