id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:30 | 0 | 1 | 1 | NULL |
2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:24 | 0 | 0 | 1 | NULL |
3 | 103 | 2001 | 2021-10-01 11:00:00 | 2021-10-01 11:00:34 | 0 | 1 | 0 | 1732526 |
4 | 101 | 2002 | 2021-09-01 10:00:00 | 2021-9-01 10:00:42 | 1 | 0 | 1 | NULL |
5 | 102 | 2002 | 2021-10-01 11:00:00 | 2021-10-01 10:00:30 | 1 | 0 | 1 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
video_id | avg_comp_play_rate |
2001 | 0.667 |
2002 | 0.000 |
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(3, 103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(4, 101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(5, 102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
insert into tb_video_info
values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');
-- 第一步:找出2021年有过播放的视频
select * from tb_user_video_log where year(start_time) = 2021;
-- 第二步:计算(每一个视频的)完播次数。完播:结束时间-起始时间>=视频时长
select a.video_id as video_id,
sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0))
from (
select * from tb_user_video_log where year(start_time) = 2021
) a left join tb_video_info b on a.video_id = b.video_id
group by a.video_id;
-- 第三步:计算完播率。完播次数/总的播放次数
select a.video_id as video_id,
sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0)) / count(*)
from (
select * from tb_user_video_log where year(start_time) = 2021
) a left join tb_video_info b on a.video_id = b.video_id
group by a.video_id;
-- 第四步:保留三位小数,还需要降序排序
select a.video_id as video_id,
round(sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0)) / count(*), 3) as avg_comp_play_rate
from (
select * from tb_user_video_log where year(start_time) = 2021
) a left join tb_video_info b on a.video_id = b.video_id
group by a.video_id
order by avg_comp_play_rate desc;
id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:30 | 0 | 1 | 1 | NULL |
2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:21 | 0 | 0 | 1 | NULL |
3 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:20 | 0 | 1 | 0 | 1732526 |
4 | 102 | 2002 | 2021-10-01 11:00:00 | 2021-10-01 11:00:30 | 1 | 0 | 1 | NULL |
5 | 103 | 2002 | 2021-10-01 10:59:05 | 2021-10-01 11:00:05 | 1 | 0 | 1 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
tag | avg_play_progress |
影视 | 90.00% |
美食 | 75.00% |
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:21', 0, 0, 1, null),
(3, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:20', 0, 1, 0, 1732526),
(4, 102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null),
(5, 103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 1, null);
insert into tb_video_info
values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');
-- 第一步:计算每次播放的播放时长
select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log;
-- 第二步:计算每一次的播放进度
select a.video_id as video_id,
if(a.total_time / b.duration > 1, 1, a.total_time / b.duration) as play_progress
from (
select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log
) a left join tb_video_info b on a.video_id = b.video_id;
-- 第三步:计算各类视频的平均播放进度
select b.tag,
avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress
from (
select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag;
-- 第四步:过滤,排序
select b.tag,
avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress
from (
select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag having avg_play_progress > 0.6 order by avg_play_progress desc;
-- 第五步:百分比
select tag,
concat(round(avg_play_progress * 100, 2), '%') as avg_play_progress
from (
select b.tag as tag,
avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress
from (
select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag
having avg_play_progress > 0.6
order by avg_play_progress desc
id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 0 | 1 | 1 | NULL |
2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
3 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 0 | 1 | 0 | 1732526 |
4 | 102 | 2002 | 2021-09-10 11:00:00 | 2021-09-10 11:00:30 | 1 | 0 | 1 | NULL |
5 | 103 | 2002 | 2021-10-01 10:59:05 | 2021-10-01 11:00:05 | 1 | 0 | 0 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
tag | retweet_cut | retweet_rate |
影视 | 2 | 0.667 |
美食 | 1 | 0.500 |
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 0, 1, 1, null),
(2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),
(3, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 0, 1, 0, 1732526),
(4, 102, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null),
(5, 103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 0, null);
insert into tb_video_info
values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');
-- 1. 找出最后一次的播放时间
select max(start_time) from tb_user_video_log;
-- 2. 基于最后一次的播放时间,向前推29天(包含当天在内的近30天算),获取到近30天内的所有播放记录
select *
from tb_user_video_log a,
(select max(start_time) as last_date from tb_user_video_log) b
where datediff(b.last_date, a.start_time) <= 29;
-- 3. 计算每一类视频的转发量和转发率
select t2.tag as tag,
sum(if_retweet) as retweet_cut,
round(sum(if_retweet) / count(*), 3) as retweet_rate
from (
select *
from tb_user_video_log a,
(select max(start_time) as last_date from tb_user_video_log) b
where datediff(b.last_date, a.start_time) <= 29
) t1 left join tb_video_info t2 on t1.video_id = t2.video_id
group by t2.tag order by retweet_rate desc;
id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-09-01 10:00:00 | 2021-09-01 10:00:20 | 0 | 1 | 1 | NULL |
2 | 105 | 2002 | 2021-09-10 11:00:00 | 2021-09-10 11:00:30 | 1 | 0 | 1 | NULL |
3 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 1 | 1 | 1 | NULL |
4 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
5 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 1 | 1 | 0 | 1732526 |
6 | 106 | 2002 | 2021-10-01 10:59:05 | 021-10-01 11:00:05 | 2 | 0 | 0 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
4 | 2004 | 902 | 美女 | 90 | 2020-01-01 08:00:00 |
author | month | fans_growth_rate | total_fans |
901 | 2021-09 | 0.500 | 1 |
901 | 2021-10 | 0.250 | 2 |
注:涨粉率=(加粉量 - 掉粉量) / 播放量。结果按创作者ID、总粉丝量升序排序。if_follow-是否关注,为1表示用户观看视频中关注了视频创作者,为0表示此次互动前后关注状态未发生变化,为2表示本次观看过程中取消了关注。
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-09-01 10:00:00', '2021-09-01 10:00:20', 0, 1, 1, null),
(2, 105, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null),
(3, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 1, null),
(4, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),
(5, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526),
(6, 106, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 2, 0, 0, null);
insert into tb_video_info
VALUES (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2, 2002, 901, '影视', 60, '2021-01-01 7:00:00'),
(3, 2003, 902, '旅游', 90, '2020-01-01 7:00:00'),
(4, 2004, 902, '美女', 90, '2020-01-01 8:00:00');
-- 1. 获取2021年的数据,日期整理成月的形式
select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow
from tb_user_video_log
where year(start_time) = 2021;
-- 2. 计算每一个作者每一个月的粉丝变化数量以及视频的播放次数
select b.author as author,
a.m as m,
sum(if(a.if_follow = 2, -1, a.if_follow)) as total_fans_m,
count(*) as total_play_m
from (
select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow
from tb_user_video_log
where year(start_time) = 2021
) a left join tb_video_info b on a.video_id = b.video_id
group by b.author, a.m;
-- 3. 计算每一个作者到当前月的粉丝变化率以及总粉丝量
select author,
m as `month`,
round(total_fans_m / total_play_m, 3) as fans_growth_rate,
sum(total_fans_m) over (partition by author order by m rows between unbounded preceding and current row ) as total_fans
from (
select b.author as author,
a.m as m,
sum(if(a.if_follow = 2, -1, a.if_follow)) as total_fans_m,
count(*) as total_play_m
from (
select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow
from tb_user_video_log
where year(start_time) = 2021) a left join tb_video_info b on a.video_id = b.video_id
group by b.author, a.m
) t order by author, total_fans;
id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-09-24 10:00:00 | 2021-09-24 10:00:20 | 1 | 1 | 0 | NULL |
2 | 105 | 2002 | 2021-09-25 11:00:00 | 2021-09-25 11:00:30 | 0 | 0 | 1 | NULL |
3 | 102 | 2002 | 2021-09-25 11:00:00 | 2021-09-25 11:00:30 | 1 | 1 | 1 | NULL |
4 | 101 | 2002 | 2021-09-26 11:00:00 | 2021-09-26 11:00:30 | 1 | 0 | 1 | NULL |
5 | 101 | 2002 | 2021-09-27 11:00:00 | 2021-09-27 11:00:30 | 1 | 1 | 0 | NULL |
6 | 102 | 2002 | 2021-09-28 11:00:00 | 2021-09-28 11:00:30 | 1 | 0 | 1 | NULL |
7 | 103 | 2002 | 2021-09-29 11:00:00 | 2021-10-02 11:00:30 | 1 | 0 | 1 | NULL |
8 | 102 | 2002 | 2021-09-30 11:00:00 | 2021-09-30 11:00:30 | 1 | 1 | 1 | NULL |
9 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 1 | 1 | 0 | NULL |
10 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
11 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 1 | 1 | 0 | 1732526 |
12 | 106 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 2 | 0 | 1 | NULL |
13 | 107 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 1 | 0 | 1 | NULL |
14 | 108 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 1 | 1 | 1 | NULL |
15 | 109 | 2002 | 2021-10-03 10:59:05 | 2021-10-03 11:00:05 | 0 | 1 | 0 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
4 | 2004 | 902 | 美女 | 90 | 2020-01-01 08:00:00 |
tag | dt | sum_like_cnt_7d | max_retweet_cnt_7d |
旅游 | 2021-10-01 | 5 | 2 |
旅游 | 2021-10-02 | 5 | 3 |
旅游 | 2021-10-03 | 6 | 3 |
tag | dt | like_cnt | retweet_cnt |
旅游 | 2021-09-25 | 1 | 2 |
旅游 | 2021-09-26 | 0 | 1 |
旅游 | 2021-09-27 | 1 | 0 |
旅游 | 2021-09-28 | 0 | 1 |
旅游 | 2021-09-29 | 0 | 1 |
旅游 | 2021-09-30 | 1 | 1 |
旅游 | 2021-10-01 | 2 | 1 |
旅游 | 2021-10-02 | 1 | 3 |
旅游 | 2021-10-03 | 1 | 0 |
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:20', 1, 1, 0, null),
(2, 105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 0, 0, 1, null),
(3, 102, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 1, 1, null),
(4, 101, 2002, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 1, null),
(5, 101, 2002, '2021-09-27 11:00:00', '2021-09-27 11:00:30', 1, 1, 0, null),
(6, 102, 2002, '2021-09-28 11:00:00', '2021-09-28 11:00:30', 1, 0, 1, null),
(7, 103, 2002, '2021-09-29 11:00:00', '2021-09-29 11:00:30', 1, 0, 1, null),
(8, 102, 2002, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 1, null),
(9, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 0, null),
(10, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),
(11, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526),
(12, 106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 2, 0, 1, null),
(13, 107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 0, 1, null),
(14, 108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null),
(15, 109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:05', 0, 1, 0, null);
insert into tb_video_info
VALUES (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2, 2002, 901, '影视', 60, '2021-01-01 7:00:00'),
(3, 2003, 902, '旅游', 90, '2020-01-01 7:00:00'),
(4, 2004, 902, '美女', 90, '2020-01-01 8:00:00');
-- 1. 锁定数据范围:2021-09.25~2021-10-03
select video_id, date(start_time), if_like, if_retweet
from tb_user_video_log
where datediff('2021-10-03', start_time) < 9;
-- 2. 统计每一类视频每天的点赞量和转发量
select b.tag as tag,
a.dt as dt,
sum(a.if_like) as total_like_d,
sum(a.if_retweet) as total_retweet_d
from (
select video_id, date(start_time) as dt, if_like, if_retweet
from tb_user_video_log
where datediff('2021-10-03', start_time) < 9
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag, a.dt;
-- 3. 统计最近7天的点赞总量和最大转发量
select tag,
sum(total_like_d) over (partition by tag order by dt rows between 6 preceding and current row ) as sum_like_cnt_7d,
max(total_retweet_d) over (partition by tag order by dt rows between 6 preceding and current row) as max_retweet_cnt_7d
from (
select b.tag as tag,
a.dt as dt,
sum(a.if_like) as total_like_d,
sum(a.if_retweet) as total_retweet_d
from (
select video_id, date(start_time) as dt, if_like, if_retweet
from tb_user_video_log
where datediff('2021-10-03', start_time) < 9
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag, a.dt
) t1;
-- 4. 过滤出10-01~10-03
select *
from (
select tag,
sum(total_like_d) over (partition by tag order by dt rows between 6 preceding and current row ) as sum_like_cnt_7d,
max(total_retweet_d) over (partition by tag order by dt rows between 6 preceding and current row) as max_retweet_cnt_7d
from (
select b.tag as tag,
a.dt as dt,
sum(a.if_like) as total_like_d,
sum(a.if_retweet) as total_retweet_d
from (
select video_id, date(start_time) as dt, if_like, if_retweet
from tb_user_video_log
where datediff('2021-10-03', start_time) < 9
) a left join tb_video_info b on a.video_id = b.video_id
group by b.tag, a.dt
) t1
) t2 where month(dt) = 10
order by tag desc, dt asc;
id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
1 | 101 | 2001 | 2021-09-24 10:00:00 | 2021-09-24 10:00:30 | 1 | 1 | 1 | NULL |
2 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:31 | 1 | 1 | 0 | NULL |
3 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:35 | 0 | 0 | 1 | NULL |
4 | 103 | 2001 | 2021-10-03 11:00:50 | 2021-10-03 10:00:35 | 1 | 1 | 0 | 1732526 |
5 | 106 | 2002 | 2021-10-02 11:00:05 | 2021-10-02 11:01:04 | 2 | 0 | 1 | NULL |
6 | 107 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:06 | 1 | 0 | 0 | NULL |
7 | 108 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 1 | 1 | 1 | NULL |
8 | 109 | 2002 | 2021-10-03 10:59:05 | 2021-10-03 11:00:01 | 0 | 1 | 0 | NULL |
9 | 105 | 2002 | 2021-09-25 11:00:00 | 2021-09-25 11:00:30 | 1 | 0 | 1 | NULL |
10 | 101 | 2003 | 2021-09-26 11:00:00 | 2021-09-26 11:00:30 | 1 | 0 | 0 | NULL |
11 | 101 | 2003 | 2021-09-30 11:00:00 | 2021-09-30 11:00:30 | 1 | 1 | 0 | NULL |
id | video_id | author | tag | duration | release_time |
1 | 2001 | 901 | 影视 | 30 | 2021-09-05 07:00:00 |
2 | 2002 | 901 | 美食 | 60 | 2021-09-05 07:00:00 |
3 | 2003 | 902 | 旅游 | 90 | 2021-09-05 07:00:00 |
4 | 2004 | 902 | 美女 | 90 | 2021-09-05 08:00:00 |
video_id | hot_index |
2001 | 122 |
2002 | 56 |
2003 | 1 |
4)最近播放日期以end_time为准,假设为T,则最近一个月按[T-29, T]闭区间统计;
-- 建立用户-视频互动表
drop table if exists tb_user_video_log;
create table tb_user_video_log (
id int comment '自增ID',
uid int comment '用户ID',
video_id int comment '视频ID',
start_time string COMMENT '开始观看时间',
end_time string COMMENT '结束观看时间',
if_follow int comment '是否关注',
if_like int comment '是否点赞',
if_retweet int comment '是否转发',
comment_id int comment '评论ID'
) comment '用户-视频互动表'
row format delimited fields terminated by ',';
-- 建立短视频信息表
drop table if exists tb_video_info;
create table tb_video_info (
id int comment '自增ID',
video_id int comment '视频ID',
author int comment '创作者ID',
tag string comment '类别标签',
duration int comment '视频时长(秒数)',
release_time string comment '发布时间'
) comment '短视频信息表'
row format delimited fields terminated by ',';
-- 插入数据
insert into tb_user_video_log
values (1, 101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:30', 1, 1, 1, null),
(2, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:31', 1, 1, 0, null),
(3, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:35', 0, 0, 1, null),
(4, 103, 2001, '2021-10-03 11:00:50', '2021-10-03 11:01:35', 1, 1, 0, 1732526),
(5, 106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:04', 2, 0, 1, null),
(6, 107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:06', 1, 0, 0, null),
(7, 108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null),
(8, 109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:01', 0, 1, 0, null),
(9, 105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 0, 1, null),
(10, 101, 2003, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 0, null),
(11, 101, 2003, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 0, null);
insert into tb_video_info
VALUES (1, 2001, 901, '旅游', 30, '2021-09-05 7:00:00'),
(2, 2002, 901, '旅游', 60, '2021-09-05 7:00:00'),
(3, 2003, 902, '影视', 90, '2021-09-05 7:00:00'),
(4, 2004, 902, '影视', 90, '2021-09-05 8:00:00');
-- 1. 获取最后一次的结束时间
select max(date(end_time)) as last_date
from tb_user_video_log;
-- 2. 获取最近一个月的发布的视频
select video_id, duration
from tb_video_info a,
(select max(date(end_time)) as last_date from tb_user_video_log) b
where datediff(b.last_date, date(a.release_time)) <= 29;
-- 3. 统计每一条视频结束日期到最后日期之间的日期差、播放时长和其他信息
select video_id,
datediff(last_date, date(end_time)) as no_play_day,
unix_timestamp(end_time) - unix_timestamp(start_time) as play_time,
if(comment_id is null, 0, 1) as if_comment,
from tb_user_video_log a,
(select max(date(end_time)) as last_date from tb_user_video_log) b;
-- 4. 统计最近一个月发布的每一个视频最近的无播放天数、完播率、总点赞量、总评论数、总转发数
select t1.video_id as video_id,
min(t1.no_play_day) as no_play_count,
sum(if(t1.play_time >= t2.duration, 1, 0)) / count(*) as play_rate,
sum(t1.if_like) as like_count,
sum(t1.if_comment) as comment_count,
sum(t1.if_retweet) as retweet_count
from (
select video_id,
datediff(last_date, date(end_time)) as no_play_day,
unix_timestamp(end_time) - unix_timestamp(start_time) as play_time,
if(comment_id is null, 0, 1) as if_comment,
from tb_user_video_log a,
(select max(date(end_time)) as last_date from tb_user_video_log) b) t1
left join (select video_id, duration
from tb_video_info a,
(select max(date(end_time)) as last_date from tb_user_video_log) b
where datediff(b.last_date, date(a.release_time)) <= 29) t2 on t1.video_id = t2.video_id
group by t1.video_id;
-- 5. 计算每一个视频的热度
select video_id,
round((100 * play_rate + 5 * like_count + 3*comment_count + 2 * retweet_count) / (no_play_count + 1)) as hot_index
from (
select t1.video_id as video_id,
min(t1.no_play_day) as no_play_count,
sum(if(t1.play_time >= t2.duration, 1, 0)) / count(*) as play_rate,
sum(t1.if_like) as like_count,
sum(t1.if_comment) as comment_count,
sum(t1.if_retweet) as retweet_count
from (
select video_id,
datediff(last_date, date(end_time)) as no_play_day,
unix_timestamp(end_time) - unix_timestamp(start_time) as play_time,
if(comment_id is null, 0, 1) as if_comment,
from tb_user_video_log a, (select max(date(end_time)) as last_date from tb_user_video_log) b
) t1 left join (
select video_id, duration from tb_video_info a, (select max(date(end_time)) as last_date from tb_user_video_log) b
where datediff(b.last_date, date(a.release_time)) <= 29
) t2 on t1.video_id = t2.video_id group by t1.video_id
) t order by hot_index desc limit 3;