Part One
对比两个类似json字符串的顺序
[1,1,3,1,1]
[1,1,5,1,1]
JSON对比
是否可以直接当做json类型来进行对比呢?那答案肯定是否定的,见下面这个例子
postgres=# select '[[1,2], 3]'::jsonb < '[[1,2], [2,3]]'::jsonb;
?column?
----------
t
(1 row)
但是按照例子中的规则,应该是左边更大
查询了postgresql的源代码,针对jsonb的类型的对比部分如下所示。可见,当两个类型不同的时候,直接对比两个的type,因此才有上面的结果。
if (va.type == vb.type)
{
switch (va.type)
{
case jbvString:
case jbvNull:
case jbvNumeric:
case jbvBool:
res = compareJsonbScalarValue(&va, &vb);
break;
case jbvArray:
/*
* This could be a "raw scalar" pseudo array. That's
* a special case here though, since we still want the
* general type-based comparisons to apply, and as far
* as we're concerned a pseudo array is just a scalar.
*/
if (va.val.array.rawScalar != vb.val.array.rawScalar)
res = (va.val.array.rawScalar) ? -1 : 1;
if (va.val.array.nElems != vb.val.array.nElems)
res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
break;
case jbvObject:
if (va.val.object.nPairs != vb.val.object.nPairs)
res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
break;
case jbvBinary:
elog(ERROR, "unexpected jbvBinary value");
break;
case jbvDatetime:
elog(ERROR, "unexpected jbvDatetime value");
break;
}
}
else
{
/* Type-defined order */
res = (va.type > vb.type) ? 1 : -1;
}
JSONB展开
还是通过直观的方式,将JSONB进行展开,从而继续对比下面一层的对象。
postgres=# SELECT t.item, jsonb_typeof(t.item) as item_type
postgres-# FROM (select '[[1,2], 3]'::jsonb as value) s, jsonb_array_elements(value :: jsonb) WITH ORDINALITY as t(item, seq);
item | item_type
--------+-----------
[1, 2] | array
3 | number
(2 rows)
Number转Array
当number和array进行对比的时候,需要将number转换为array进行下一轮的对比,这里需要找到这种特殊情况,然后将number转换为array
SELECT t1._group, t1.pos,
case when t2.pos is not null then ('[' || (t1.item #> '{}')::text || ']') :: jsonb else t1.item end as item,
t1.sub_group,
case when t2.pos is not null then 'array' else t1.item_type end as item_type
FROM init_result t1
LEFT JOIN (
SELECT m.sub_group, case when m.item_type = 'number' then m.pos else n.pos end as pos
FROM init_result m
JOIN init_result n
ON m.pos = 'left'
AND n.pos = 'right'
AND m.sub_group = n.sub_group
AND ((m.item_type = 'number'
AND n.item_type = 'array')
OR (m.item_type = 'array'
AND n.item_type = 'number'))
) t2
ON t1.sub_group = t2.sub_group
AND t1.pos = t2.pos
最终SQL
WITH RECURSIVE origin AS (
SELECT (rn - 1) / 3 + 1 as _group, case when rn % 3 = 1 then 'left' else 'right' end as pos, line as value
FROM (
SELECT row_number() over () as rn, line
FROM lance_input
) t
WHERE rn % 3 in (1,2)
), splits AS (
SELECT _group, pos, item, sub_group, item_type
FROM (
WITH init_result AS (
SELECT origin._group, origin.pos, t.item, origin._group || '.' || seq as sub_group, jsonb_typeof(t.item) as item_type
FROM origin, jsonb_array_elements(value :: jsonb) WITH ORDINALITY as t(item, seq)
)
SELECT t1._group, t1.pos,
case when t2.pos is not null then ('[' || (t1.item #> '{}')::text || ']') :: jsonb else t1.item end as item,
t1.sub_group,
case when t2.pos is not null then 'array' else t1.item_type end as item_type
FROM init_result t1
LEFT JOIN (
SELECT m.sub_group, case when m.item_type = 'number' then m.pos else n.pos end as pos
FROM init_result m
JOIN init_result n
ON m.pos = 'left'
AND n.pos = 'right'
AND m.sub_group = n.sub_group
AND ((m.item_type = 'number'
AND n.item_type = 'array')
OR (m.item_type = 'array'
AND n.item_type = 'number'))
) t2
ON t1.sub_group = t2.sub_group
AND t1.pos = t2.pos
) t
UNION ALL
SELECT _group, pos, item, sub_group, item_type
FROM (
WITH init_result AS (
SELECT splits._group, splits.pos, t.item, splits.sub_group || '.' || seq as sub_group, jsonb_typeof(t.item) as item_type
FROM splits, jsonb_array_elements(item :: jsonb) WITH ORDINALITY as t(item, seq)
WHERE splits.item_type = 'array'
)
SELECT t1._group, t1.pos,
case when t2.pos is not null then ('[' || (t1.item #> '{}')::text || ']') :: jsonb else t1.item end as item,
t1.sub_group,
case when t2.pos is not null then 'array' else t1.item_type end as item_type
FROM init_result t1
LEFT JOIN (
SELECT m.sub_group, case when m.item_type = 'number' then m.pos else n.pos end as pos
FROM init_result m
JOIN init_result n
ON m.pos = 'left'
AND n.pos = 'right'
AND m.sub_group = n.sub_group
AND ((m.item_type = 'number'
AND n.item_type = 'array')
OR (m.item_type = 'array'
AND n.item_type = 'number'))
) t2
ON t1.sub_group = t2.sub_group
AND t1.pos = t2.pos
) t
)
SELECT sum(_group)
FROM (
SELECT _group, sub_group, result, row_number() over (partition by _group order by sub_group) as rn
FROM (
SELECT coalesce(a._group, b._group) as _group,
coalesce(a.sub_group, b.sub_group) as sub_group,
case when b.item_type is null then 1
when a.item_type is null then -1
when a.item_type = 'number' and b.item_type = 'number' then
case when (a.item #> '{}') :: integer = (b.item #> '{}') :: integer then 0
when (a.item #> '{}') :: integer > (b.item #> '{}') :: integer then 1
when (a.item #> '{}') :: integer < (b.item #> '{}') :: integer then -1
end
when a.item_type = 'array' and b.item_type = 'array' then
case when array_length(jsonb_array_to_text_array(a.item), 1) = 0 AND array_length(jsonb_array_to_text_array(b.item), 1) != 0 then -1
when array_length(jsonb_array_to_text_array(a.item), 1) != 0 AND array_length(jsonb_array_to_text_array(b.item), 1) = 0 then 1
when array_length(jsonb_array_to_text_array(a.item), 1) = 0 AND array_length(jsonb_array_to_text_array(b.item), 1) = 0 then 0
end
end as result
FROM (SELECT * FROM splits WHERE pos = 'left') a
FULL OUTER JOIN
(SELECT * FROM splits WHERE pos = 'right') b
ON a._group = b._group
AND a.sub_group = b.sub_group
) t
WHERE t.result != 0
) s
WHERE rn = 1 AND result = -1
Part Two
将下面两个放入排好序的字符串队列中,计算两者真实次数的乘积
[[2]]
[[6]]
转换队列
将之前的对比字符串进行转换,转换为[[2]]与左右两个字符串的对比
WITH RECURSIVE origin_result AS (
SELECT (rn - 1) / 3 + 1 as _group, case when rn % 3 = 1 then 'left' else 'right' end as pos, line as value
FROM (
SELECT row_number() over () as rn, line
FROM lance_input
) t
WHERE rn % 3 in (1,2)
),
origin AS (
SELECT case when a.pos = 'left' THEN a._group else 0 - a._group end as _group,
case when b.id = 1 THEN 'left' else 'right' end as pos,
case when b.id = 1 THEN '[[6]]' else a.value end as value
FROM origin_result a, (select 1 as id UNION ALL select 2 as id) b
)
1 | left | [[2]]
1 | right | [[[[2]],[2,[3],[9,3],10],8],[[9,[5,7,5,5],6,8],[[],7,7,2]],[[]]]
-1 | left | [[2]]
-1 | right | [[[0,[5,6,5],[0,4,1]],[],[3],[]],[5,0,1],[1,3,6,[1,[7,4]],10],[]]
2 | left | [[2]]
这样,仍然按照之前的计算方式,统计有多少个分组是排序错误的,就可以计算出真实的次序