Part One
hash算法,比较简单
CREATE OR REPLACE FUNCTION hashCode(_string text) RETURNS INTEGER AS $$
DECLARE
val_ CHAR[];
h_ INTEGER := 0;
ascii_ INTEGER;
c_ char;
BEGIN
val_ = regexp_split_to_array(_string, '');
FOR i in 1 .. array_length(val_, 1)
LOOP
c_ := (val_)[i];
ascii_ := ascii(c_);
h_ = 17 * (h_ + ascii_) % 256;
END LOOP;
RETURN h_;
END;
$$ LANGUAGE plpgsql;
Part Two
hashMap的实现,经过一系列操作后,求出hashMap的最终结果
After "ot=7":
Box 0: [rn 1] [cm 2]
Box 3: [ot 7] [ab 5] [pc 6]
首先,求出每个label的最后一次操作,从而确定哪些label最后被删除了,不会出现在结果中。
LAST_OP AS (
SELECT *
FROM (
SELECT row_number() over (partition by label order by seq desc) as rn, *
FROM origin
) t
WHERE rn = 1
)
再求出每个label首次插入hashMap的时机。因此有中途删除再次插入的场景,因此需要找出最后一个删除之后的第一次插入时机。
FIRST_INSERT AS (
SELECT a.label, min(seq) as seq
FROM origin a
WHERE NOT EXISTS(SELECT * FROM origin where origin.label = a.label and origin.focal = 0 and a.focal >0 and origin.seq > a.seq)
AND a.focal > 0
GROUP BY a.label
)
最后在每个box中按照插入时机进行排序,就是最终的结果
WITH ORIGIN AS (
SELECT seq, hashCode(split_input[1]) as box, split_input[1] as label, case when cardinality(split_input) = 2 then split_input[2] :: integer else 0 end as focal
FROM (
SELECT row_number() over () as seq, array_remove(regexp_split_to_array(input, '[-, =]'), '') as split_input
FROM (
SELECT regexp_split_to_table(line, ',') as input
FROM lance_input
) t
) t
),
LAST_OP AS (
SELECT *
FROM (
SELECT row_number() over (partition by label order by seq desc) as rn, *
FROM origin
) t
WHERE rn = 1
),
FIRST_INSERT AS (
SELECT a.label, min(seq) as seq
FROM origin a
WHERE NOT EXISTS(SELECT * FROM origin where origin.label = a.label and origin.focal = 0 and a.focal >0 and origin.seq > a.seq)
AND a.focal > 0
GROUP BY a.label
)
SELECT sum((box + 1) * slot * focal)
FROM (
SELECT a.box, a.label, a.focal, row_number() over (partition by a.box order by b.seq) as slot
FROM last_op a
JOIN first_insert b
ON a.label = b.label
AND a.focal > 0
) t