Part One
一串数字,将末尾的数字逐个填充到前面的空格中
00...111...2...333.44.5555.6666.777.888899 0099811188827773336446555566..............
这部分比较简单,一个正向group_a 一个反向group_b,按照位置关联即可
with origin AS (
SELECT idx, case when mod(idx, 2) = 1 then ((idx - 1) / 2) :: VARCHAR else '.' end as pos
FROM (
SELECT x.idx :: INTEGER, x.pos
FROM lance_input, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
) t, generate_series(1, 9) as id
WHERE pos :: INTEGER >= id
ORDER BY idx
), group_a AS (
SELECT idx, pos,
row_number() over (partition by case when pos = '.' then 1 else 2 end order by idx) as rn
FROM origin
), group_b AS (
SELECT idx, pos,
row_number() over (partition by case when pos = '.' then 1 else 2 end order by idx desc) as rn
FROM origin
), move AS (
SELECT pos, row_number() over (order by idx, rn) as rn
FROM (
SELECT a.idx, a.rn, coalesce(b.pos, a.pos) as pos
FROM group_a a
LEFT JOIN group_b b
ON a.rn = b.rn
AND a.pos = '.'
AND b.pos != '.'
) t
)
SELECT sum((rn - 1) * pos :: INTEGER)
FROM move
WHERE rn <= (select count(*) from group_a where pos != '.');
Part Two
这部分增加了难度,不再是一个个数字往前移动,而是按照一组数字去移动。
00...111...2...333.44.5555.6666.777.888899 0099.111...2...333.44.5555.6666.777.8888.. 0099.1117772...333.44.5555.6666.....8888.. 0099.111777244.333....5555.6666.....8888.. 00992111777.44.333....5555.6666.....8888..
问题分析
每次移动,首先要找出第一个足够大的空闲区。而且移动后,原先的位置又变成了一个新的空闲区,而且这个空闲区可以和周围的组装成新的更大的空闲区。因此,可以考虑只记录数字的位置,空闲区每次通过数字区计算出来。
输出数字位置的方式
首先,根据数字区,计算出空闲区的相关信息,比如起始索引以及长度
free AS (
SELECT a.start_pos + a.length as start_pos,
b.start_pos - a.start_pos - a.length as length
FROM t a
JOIN (
SELECT start_pos,
lag(start_pos) over (order by start_pos) as pre_pos
FROM t
) b
ON a.start_pos = b.pre_pos
AND a.start_pos + a.length < b.start_pos
)
那么从free中找出第一个满足移动条件的即可,即长度足够,且位置在数字之前
mv_idx AS (
SELECT a.start_pos
FROM free a
JOIN t b
ON a.length >= b.length
AND b.file_id = b.current_file
AND a.start_pos < b.start_pos
ORDER BY a.start_pos
LIMIT 1
)
每次递归查询,均输出移动后的最新的数字位置,完整代码如下:
with recursive origin AS (
SELECT idx, pos :: INTEGER as length, case when mod(idx, 2) = 1 then ((idx - 1) / 2) :: VARCHAR else '.' end as file_id
FROM (
SELECT x.idx :: INTEGER, x.pos
FROM lance_input, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
) t
ORDER BY idx
), num_pos AS (
SELECT start_pos, length, file_id :: INTEGER
FROM (
SELECT sum(length) over (order by idx) - length as start_pos, length, file_id
FROM origin
) t
WHERE file_id != '.'
), mv_pos AS (
SELECT start_pos, length, file_id, (SELECT max(file_id) FROM num_pos) as current_file
FROM num_pos
UNION ALL
SELECT start_pos, length, file_id, current_file
FROM (
WITH t AS (
SELECT * FROM mv_pos
), free AS (
SELECT a.start_pos + a.length as start_pos,
b.start_pos - a.start_pos - a.length as length
FROM t a
JOIN (
SELECT start_pos,
lag(start_pos) over (order by start_pos) as pre_pos
FROM t
) b
ON a.start_pos = b.pre_pos
AND a.start_pos + a.length < b.start_pos
), mv_idx AS (
SELECT a.start_pos
FROM free a
JOIN t b
ON a.length >= b.length
AND b.file_id = b.current_file
AND a.start_pos < b.start_pos
ORDER BY a.start_pos
LIMIT 1
)
SELECT CASE WHEN t.file_id = current_file AND a.start_pos IS NOT null THEN a.start_pos
ELSE t.start_pos
END as start_pos,
t.length,
t.file_id,
t.current_file - 1 as current_file
FROM t LEFT JOIN mv_idx a
ON 1 = 1
WHERE t.current_file >= 0
) s
)
SELECT sum(t.file_id * (t.start_pos + id - 1)) as result
FROM (
SELECT *
FROM mv_pos
WHERE current_file = -1
) t
JOIN generate_series(1, 9) as id
ON t.length >= id;
postgres-# SELECT start_pos,length,file_id
postgres-# FROM mv_pos
postgres-# WHERE current_file = -1 order by 1
postgres-# ;
start_pos | length | file_id
-----------+--------+---------
0 | 2 | 0
2 | 2 | 9
4 | 1 | 2
5 | 3 | 1
8 | 3 | 7
12 | 2 | 4
15 | 3 | 3
22 | 4 | 5
27 | 4 | 6
36 | 4 | 8
(10 rows)
上述最后的结果,就是移动完成之后的最终状态。虽然逻辑是没有问题的,但是效率太差,每次迭代需要1s左右,性能上不可接受。
输出位置数组的方式
总共2w组数字,其实可以用int[]来存储相关信息,并通过function来计算位置移动,输出新的int[]。每次只输出一条记录,因此性能上应该会好不少。
通过以下几个array来计算信息
- idx_array INTEGER[],数字的索引信息,每次迭代都要更新。因为会往前插入,因此需要整体更新
- position_array INTEGER[],每个数字的起始位置,每次迭代都仅更新当前数字的位置
- length_array INTEGER[],每次数字的长度信息,不需要更新
CREATE OR REPLACE FUNCTION move_file(idx_array INTEGER[], position_array INTEGER[], length_array INTEGER[], current_file INTEGER)
RETURNS RECORD AS $$
DECLARE
file_position INTEGER;
file_length INTEGER;
insert_index INTEGER;
BEGIN
file_position := position_array[current_file];
file_length := length_array[current_file];
insert_index := 0;
FOR i IN 1..#idx_array LOOP
-- last position
CONTINUE WHEN i = #idx_array;
-- no free space left
CONTINUE WHEN position_array[idx_array[i]] + length_array[idx_array[i]] = position_array[idx_array[i + 1]];
-- beyond current file position
EXIT WHEN position_array[idx_array[i]] >= file_position;
-- not enough free space
CONTINUE WHEN position_array[idx_array[i + 1]] - (position_array[idx_array[i]] + length_array[idx_array[i]]) < file_length;
-- calculate start position of the free space
insert_index := i;
EXIT;
END LOOP;
IF insert_index > 0 THEN
position_array[current_file] := position_array[idx_array[insert_index]] + length_array[idx_array[insert_index]];
RETURN (idx_array[1:insert_index] || current_file || (idx_array[insert_index + 1:#idx_array] - current_file), position_array);
ELSE
RETURN (idx_array, position_array);
END IF;
END;
$$ LANGUAGE plpgsql;
因为每次迭代,idx_array和pos_array都需要更新,因此这个function返回RECORD类型,从而能够输出多个结果。
with recursive origin AS (
SELECT idx, pos :: INTEGER as length, case when mod(idx, 2) = 1 then ((idx - 1) / 2) :: VARCHAR else '.' end as file_id
FROM (
SELECT x.idx :: INTEGER, x.pos
FROM lance_input, regexp_split_to_table(line, '') with ordinality as x(pos, idx)
) t
ORDER BY idx
), num_pos AS (
SELECT start_pos, length, file_id :: INTEGER + 1 as file_id
FROM (
SELECT sum(length) over (order by idx) - length as start_pos, length, file_id
FROM origin
) t
WHERE file_id != '.'
), mv_pos AS (
SELECT array_agg(file_id order by file_id) :: int[] as idx_array,
array_agg(start_pos order by file_id) :: int[] as pos_array,
array_agg(length order by file_id) :: int[] as length_array,
max(file_id) as current_file
FROM num_pos
UNION ALL
SELECT t.idx_array, t.pos_array, s.length_array, s.current_file - 1 as current_file
FROM mv_pos s, move_file(idx_array, pos_array, length_array, current_file) as t(idx_array int[], pos_array int[])
WHERE s.current_file >0
)
postgres-# select * From mv_pos;
idx_array | pos_array | length_array | current_file
------------------------+-------------------------------+-----------------------+--------------
{1,2,3,4,5,6,7,8,9,10} | {0,5,11,15,19,22,27,32,36,40} | {2,3,1,3,2,4,4,3,4,2} | 10
{1,10,2,3,4,5,6,7,8,9} | {0,5,11,15,19,22,27,32,36,2} | {2,3,1,3,2,4,4,3,4,2} | 9
{1,10,2,3,4,5,6,7,8,9} | {0,5,11,15,19,22,27,32,36,2} | {2,3,1,3,2,4,4,3,4,2} | 8
{1,10,2,8,3,4,5,6,7,9} | {0,5,11,15,19,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 7
{1,10,2,8,3,4,5,6,7,9} | {0,5,11,15,19,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 6
{1,10,2,8,3,4,5,6,7,9} | {0,5,11,15,19,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 5
{1,10,2,8,3,5,4,6,7,9} | {0,5,11,15,12,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 4
{1,10,2,8,3,5,4,6,7,9} | {0,5,11,15,12,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 3
{1,10,3,2,8,5,4,6,7,9} | {0,5,4,15,12,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 2
{1,10,3,2,8,5,4,6,7,9} | {0,5,4,15,12,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 1
{1,10,3,2,8,5,4,6,7,9} | {0,5,4,15,12,22,27,8,36,2} | {2,3,1,3,2,4,4,3,4,2} | 0
(11 rows)
最后,将int array展开后,即能得到第一种方法的同样结果
postgres-# SELECT x.id - 1 as file_id, y.pos, z.length
postgres-# FROM (
postgres(# SELECT id.*
postgres(# FROM mv_pos, unnest(idx_array) WITH ORDINALITY id
postgres(# WHERE current_file = 0
postgres(# ) x
postgres-# JOIN (
postgres(# SELECT pos.*
postgres(# FROM mv_pos, unnest(pos_array) WITH ORDINALITY pos
postgres(# WHERE current_file = 0
postgres(# ) y
postgres-# ON x.id = y.ORDINALITY
postgres-# JOIN (
postgres(# SELECT length.*
postgres(# FROM mv_pos, unnest(length_array) WITH ORDINALITY length
postgres(# WHERE current_file = 0
postgres(# ) z
postgres-# ON x.id = z.ORDINALITY order by pos;
file_id | pos | length
---------+-----+--------
0 | 0 | 2
9 | 2 | 2
2 | 4 | 1
1 | 5 | 3
7 | 8 | 3
4 | 12 | 2
3 | 15 | 3
5 | 22 | 4
6 | 27 | 4
8 | 36 | 4
(10 rows)