1.需求
根据身份证号分组聚合求第一次(开始时间最小)出现的基站及对应时间和最后一次(结束时间最大)出现的基站,ip4,ip6及对应时间
2.应用场景
安全行业,上网流量日志监控行业,如追击罪犯,要查看第一次出现的位置和最后一次出现的位置及最后一次联系人等相关信息,从而可以和最后一次联系人及相关信息从而分析罪犯去向等信息
3.示例数据
with a as
(select 1 as identitycode,'st_001' as stationid,'ip4_001' as ip4,'' as ip6, 1 as isipv4,20240615150101 as starttime,20240615150601 as endtime
union all
select 1 as identitycode,'st_002' as stationid,'' as ip4,'ip6_001' as ip6, 0 as isipv4,20240615150601 as starttime,20240615150901 as endtime
union all
select 1 as identitycode,'st_003' as stationid,'ip4_02' as ip4,'' as ip6, 1 as isipv4,20240615151301 as starttime,20240615151701 as endtime
union all
select 2 as identitycode,'st_004' as stationid,'' as ip4,'ip6_004' as ip6, 0 as isipv4,20240618150101 as starttime,20240618150601 as endtime
union all
select 2 as identitycode,'st_005' as stationid,'' as ip4,'ip6_001' as ip6, 0 as isipv4,20240618150601 as starttime,20240618150901 as endtime
union all
select 2 as identitycode,'st_006' as stationid,'ip4_07' as ip4,'' as ip6, 1 as isipv4,20240618151301 as starttime,20240618151701 as endtime)
4.sql实现
hivesql实现,在仓内实现结果直接应用
with a as
(select 1 as identitycode,'st_001' as stationid,'ip4_001' as ip4,'' as ip6, 1 as isipv4,20240615150101 as starttime,20240615150601 as endtime
union all
select 1 as identitycode,'st_002' as stationid,'' as ip4,'ip6_001' as ip6, 0 as isipv4,20240615150601 as starttime,20240615150901 as endtime
union all
select 1 as identitycode,'st_003' as stationid,'ip4_02' as ip4,'' as ip6, 1 as isipv4,20240615151301 as starttime,20240615151701 as endtime
union all
select 2 as identitycode,'st_004' as stationid,'' as ip4,'ip6_004' as ip6, 0 as isipv4,20240618150101 as starttime,20240618150601 as endtime
union all
select 2 as identitycode,'st_005' as stationid,'' as ip4,'ip6_001' as ip6, 0 as isipv4,20240618150601 as starttime,20240618150901 as endtime
union all
select 2 as identitycode,'st_006' as stationid,'ip4_07' as ip4,'' as ip6, 1 as isipv4,20240618151301 as starttime,20240618151701 as endtime)select b.identitycode,b.starttime as starttime,b.stationid as first_stationid,c.endtime as lasttime,c.stationid as last_stationid,c.ip4,c.ip6,c.isipv4 from
(select identitycode,stationid,starttime,row_number() over(PARTITION by identitycode order by starttime) srn from a) b
left join
(select identitycode,stationid,ip4,ip6,isipv4,endtime,row_number() over(PARTITION by identitycode order by endtime desc) lrn from a) c
on b.identitycode=c.identitycode where b.srn=1 and c.lrn=1
;
输出结果
olap引擎如clickhouse实现
select
cur.identitycode AS identitycode,
toUInt64(cur.first.2) AS firsttime,
toUInt64(cur.last.5) AS lasttime,
cur.first.1 AS firststationid,
cur.last.1 AS laststationid,
toUInt64(cur.last.2) AS ip4,
cur.last.3 AS ip6,
toUInt8(cur.last.4) AS isipv4
from
(SELECT
identitycode,
any(account) as account,
any(accounttype) as accounttype,
argMin(tuple(stationid ,starttime),starttime) AS first,
argMax(tuple(stationid,ip4,ip6,isipv4,endtime),endtime) AS last
FROM dwd_identity_base_cur_hour cur