在这里插入图片描述
在这里插入图片描述
方案一实现如下:

-- 连续登录统计
/*
login.txt

A,2021-03-22
B,2021-03-22
C,2021-03-22
A,2021-03-23
C,2021-03-23
A,2021-03-24
B,2021-03-24 
*/
create table login
(
    user_id    string,
    login_time string
) row format delimited fields terminated by ',';

load data local inpath '/root/hivedata/login.txt' overwrite into table login;

select *
from login;

-- 解决方案一:笛卡尔积过滤
create table login_tmp as
select a.user_id    as a_user_id,
       a.login_time as a_login_time,
       b.user_id    as b_user_id,
       b.login_time as b_login_time
from login a,
     login b;

-- 过滤数据,用户id相同且登录日期相差1天
select a_user_id,
       a_login_time,
       b_user_id,
       b_login_time
from login_tmp
where a_user_id = b_user_id
  and cast(substr(a_login_time, 9, 2) as int) - 1 = cast(substr(b_login_time, 9, 2) as int);

-- 去重
select distinct a_user_id
from login_tmp
where a_user_id = b_user_id
  and cast(substr(a_login_time, 9, 2) as int) - 1 = cast(substr(b_login_time, 9, 2) as int);

在这里插入图片描述
方案二实现如下:

-- 连续登录统计
/*
login.txt

A,2021-03-22
B,2021-03-22
C,2021-03-22
A,2021-03-23
C,2021-03-23
A,2021-03-24
B,2021-03-24
*/
create table login
(
    user_id    string,
    login_time string
) row format delimited fields terminated by ',';

load data local inpath '/root/hivedata/login.txt' overwrite into table login;

select *
from login;

-- 方案二:窗口函数实现
select user_id,
       login_time,
       -- 本次登录日期的第二天
       date_add(login_time, 1)                                                as nextday,
       -- 当前行上移1行
       lead(login_time, 1, 0) over (partition by user_id order by login_time) as nextlogin
from login;

-- 连续两天登录
with t as (
    select user_id,
           login_time,
           -- 本次登录日期的第二天
           date_add(login_time, 1)                                                as nextday,
           -- 当前行上移1行
           lead(login_time, 1, 0) over (partition by user_id order by login_time) as nextlogin
    from login
)
select distinct user_id
from t
where t.nextday = t.nextlogin;

-- 连续三天登录
with t as (
    select user_id,
           login_time,
           -- 本次登录日期的第二天
           date_add(login_time, 2)                                                as nextday,
           -- 当前行上移1行
           lead(login_time, 2, 0) over (partition by user_id order by login_time) as nextlogin
    from login
)
select distinct user_id
from t
where t.nextday = t.nextlogin;

连续N天登录公式:
在这里插入图片描述


版权声明:本文为m0_58535137原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
原文链接:https://blog.csdn.net/m0_58535137/article/details/125042065