- 导入库
import pandas as pd
import numpy as np
import time
import jieba
import datetime
from pyecharts import options as opts
from pyecharts.charts import *
from pyecharts.commons.utils import JsCode
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts
- 数据概览
df = pd.read_excel('B站新榜_总粉丝数榜单.xlsx')
del df['头像']
df.head(2)
数据更新时间 | 创作领域 | 等级 | 获赞数 | mid | up主 | up主标签 | 播放数 | 性别 | 类型 | 平均获赞数 | 平均播放数 | 充电人数 | 总粉丝人数 | 作品数 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2022-05-15 14:14:58 | 生活 | 6 | 6104495 | 321173469 | 哔哩哔哩大会员 | 哔哩哔哩大会员官方账号 | 2939318 | 男 | 生活 | 290690.2 | 139967.5 | 351 | 24620688 | 21 |
1 | 2022-05-15 14:14:58 | 生活 | 6 | 91718101 | 9824766 | 敬汉卿 | bilibili 2020百大UP主、2019年度弹幕人气奖UP主 | 1588762824 | 男 | 生活 | 77793.1 | 1347551.2 | 20379 | 9272243 | 1179 |
type = df['创作领域'].value_counts().index.tolist()
type
['娱乐',
'游戏',
'国创',
'动物圈',
'动画',
'影视',
'番剧',
'纪录片',
'资讯',
'鬼畜',
'数码',
'舞蹈',
'美食',
'知识',
'运动',
'科技',
'时尚',
'汽车',
'音乐',
'生活',
'电视剧',
'电影']
headers = df.columns.tolist()[1:]
headers
['创作领域',
'等级',
'获赞数',
'mid',
'up主',
'up主标签',
'播放数',
'性别',
'类型',
'平均获赞数',
'平均播放数',
'充电人数',
'总粉丝人数',
'作品数']
- 绘制各个创作领域多选项卡表
tab = Tab()
for i in range(len(type)):
tab_category = df[df['创作领域']==type[i]]
rows = tab_category[headers].apply(lambda x: list(x), axis=1).values.tolist()
table = (
Table()
.add(
headers,
rows,
attributes={
'class':'fl_table',
'style':'margin: 0 auto'
}
)
.set_global_opts(
title_opts=ComponentTitleOpts(
title=f'{type[i]} - 总粉丝排行榜单Top50',
subtitle='更新时间:2022-05-15 充电人数为负是数据本身问题'
)
)
)
tab.add(table,type[i])
tab.render_notebook()
- 定义柱状图绘图函数
def bar_chart(desc, title_pos, num):
df_t = df.sort_values(by=[desc], ascending=False).head(num)
chart = (
Bar()
.add_xaxis(
df_t['up主'].tolist()
)
.add_yaxis(
'',
df_t[desc].tolist()
)
.set_global_opts(
xaxis_opts=opts.AxisOpts(
is_scale=True,
axislabel_opts={'rotate':'90'},
splitline_opts=opts.SplitLineOpts(
is_show=True,
linestyle_opts=opts.LineStyleOpts(
type_='dashed'
)
)
),
yaxis_opts=opts.AxisOpts(
is_scale=True,
name='',
type_='value',
splitline_opts=opts.SplitLineOpts(
is_show=True,
linestyle_opts=opts.LineStyleOpts(
type_='dashed'
)
)
),
tooltip_opts=opts.TooltipOpts(
trigger='axis',
axis_pointer_type='shadow'
),
title_opts=opts.TitleOpts(
title='up主-'+desc,
subtitle=f'👇👇👇👇',
pos_left=title_pos[0],
pos_top=title_pos[1],
title_textstyle_opts=opts.TextStyleOpts(
color='#42B983',
font_size=16
),
)
)
)
return chart
- 划分粉丝数/作品数区间
# 粉丝数区间划分
def transform_fans(x):
if x <= 100000:
return '10w'
elif x <= 500000:
return '10w~50w'
elif x <= 1000000:
return '50w~100w'
elif x <= 2000000:
return '100w~200w'
elif x <= 3000000:
return '200w~300w'
elif x <= 4000000:
return '300w~400w'
elif x <= 5000000:
return '400w~500w'
elif x <= 6000000:
return '500w~600w'
elif x <= 7000000:
return '600w~700w'
elif x <= 8000000:
return '700w~800w'
elif x <= 9000000:
return '800w~900w'
elif x <= 10000000:
return '900w~1000w'
else:
return '>1000w'
# 作品数区间划分
def transform_work(x):
if x <= 100:
return '0~100'
elif x <= 200:
return '100~200'
elif x <= 300:
return '200~300'
elif x <= 400:
return '300~400'
elif x <= 500:
return '400~500'
else:
return '>500'
- 定义饼图函数
def pie_chart():
df['fans_cut'] = df['总粉丝人数'].apply(lambda x: transform_fans(x))
df_f = df['fans_cut'].value_counts()
fans_pairs = [list(z) for z in zip(df_f.index.tolist(),df_f.values.tolist())]
df['works_cut'] = df['作品数'].apply(lambda x: transform_work(x))
df_w = df['works_cut'].value_counts()
works_pairs = [list(z) for z in zip(df_w.index.tolist(), df_w.values.tolist())]
pie = (
Pie()
.add(
'',
fans_pairs,
radius=['55','100'],
center=['30%','90%']
)
.add(
'',
works_pairs,
radius=['55','100'],
center=['75%','90%']
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c} {d}%'))
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False),
title_opts=[
dict(
text=f'总粉丝人数区间分布',
left='15',
top='80%',
textStyle=dict(
color='#334B5C',
fontSize=16
)
),
dict(
text='作品数区间分布',
left='60%',
top='80%',
textStyle=dict(
color='#334B5C',
fontSize=16
)
)
]
)
)
return pie
- 并行多图
grid = Grid(
init_opts=opts.InitOpts(
width='1000px',
height='2000px',
theme='light'
)
)
grid.add(
bar_chart('获赞数',['15%','1%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='5%',
pos_bottom='85%',
pos_left='15%',
pos_right='50%'
)
)
grid.add(
bar_chart('平均获赞数',['60%','1%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='5%',
pos_bottom='85%',
pos_left='60%',
pos_right='5%'
)
)
grid.add(
bar_chart('播放数',['15%','21%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='15%',
pos_right='50%'
)
)
grid.add(
bar_chart('平均播放数',['60%','21%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='60%',
pos_right='5%'
)
)
grid.add(
bar_chart('总粉丝人数',['15%','40%'],15),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='44%',
pos_bottom='46%',
pos_left='15%',
pos_right='5%'
)
)
grid.add(
bar_chart('作品数',['15%','58%'],15),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='61%',
pos_bottom='29%',
pos_left='15%',
pos_right='5%'
)
)
grid.add(
pie_chart(),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='5%',
pos_right='5%'
)
)
grid.render_notebook()
本文含有隐藏内容,请 开通VIP 后查看