数据处理:
·统计最受欢迎的视频/文章的Top10访问次数 (video/article)
create table id_count as
select word,count(*) as cnt from
(select explode(split(mid,' ')) as word from data) w
group by word
order by cnt desc;
·按照地市统计最受欢迎的Top10课程 (ip)
create table count_ip as
select word,count(*) as cnt from
(select explode(split(mip,' ')) as word from data) w
group by word
order by cnt desc;
·按照流量统计最受欢迎的Top10课程 (traffic)
create table traffic_count as
select mid,sum(mt) as cnt from data group by mid order by cnt desc;