Day 7: No Space Left On Device
通过cd的目录以及ls的结果,统计出每个目录的总大小。
$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
求出当前目录
通过递归查询,求出当前所在目录。遇到cd ..,则需要跳转到上一级目录。其他情况,则直接加到上次目录后缀即可。
SELECT dir as current_dir, 1 as current_rn, 0 as size
FROM origin
WHERE rn = 1
UNION ALL
SELECT case when is_cd and dir = '..' then left(current_dir, greatest(1, length(current_dir) - position('/' in reverse(current_dir))))
when is_cd then t.current_dir || case when current_dir = '/' then '' else '/' end || x.dir
else t.current_dir
end as current_dir,
x.rn as current_rn,
case when line ~ '^\d+' then left(line, position(' ' in line) - 1)::integer end as size
FROM origin x, visit t
WHERE x.rn = t.current_rn + 1
效果如下:
current_dir | l
-------------+----------------
/ | $ cd /
/ | $ ls
/ | dir a
/ | 14848514 b.txt
/ | 8504156 c.dat
/ | dir d
/a | $ cd a
/a | $ ls
/a | dir e
/a | 29116 f
/a | 2557 g
/a | 62596 h.lst
/a/e | $ cd e
/a/e | $ ls
/a/e | 584 i
/a | $ cd ..
/ | $ cd ..
/d | $ cd d
/d | $ ls
/d | 4060174 j
/d | 8033020 d.log
/d | 5626152 d.ext
/d | 7214296 k
(23 rows)
求出每个目录的总大小
size_by_dir as (
SELECT current_dir, sum(size) as size
FROM visit
GROUP BY current_dir
)
效果如下:
current_dir | size
-------------+----------
/d | 24933642
/a/e | 584
/a | 94269
/ | 23352670
(4 rows)
求出每个目录的真实大小
目录之间是有嵌套包含关系的,子目录的大小应该也计入父目录的大小,因此需要自关联再汇总
SELECT x.current_dir, sum(y.size) as size
FROM size_by_dir x, size_by_dir y
WHERE position(x.current_dir in y.current_dir) = 1
GROUP BY x.current_dir
完整SQL
with recursive origin AS (
SELECT row_number() over() :: integer as rn, line, left(line, 4) = '$ cd' as is_cd, substring(line, 6) as dir
FROM lance_input
), visit AS (
SELECT dir as current_dir, 1 as current_rn, 0 as size
FROM origin
WHERE rn = 1
UNION ALL
SELECT case when is_cd and dir = '..' then left(current_dir, greatest(1, length(current_dir) - position('/' in reverse(current_dir))))
when is_cd then t.current_dir || case when current_dir = '/' then '' else '/' end || x.dir
else t.current_dir
end as current_dir,
x.rn as current_rn,
case when line ~ '^\d+' then left(line, position(' ' in line) - 1)::integer end as size
FROM origin x, visit t
WHERE x.rn = t.current_rn + 1
), size_by_dir as (
SELECT current_dir, sum(size) as size
FROM visit
GROUP BY current_dir
)
SELECT SUM(size)
FROM (
SELECT x.current_dir, sum(y.size) as size
FROM size_by_dir x, size_by_dir y
WHERE position(x.current_dir in y.current_dir) = 1
GROUP BY x.current_dir
HAVING sum(y.size) <= 100000
) t