本期我们通过去全国各地区景点门票的售卖情况,简单分析一下全国比较热门的景点分布和国民出游情况,看看哪些地区景点比较受欢迎,希望对大家有所帮助,如有疑问或者需要改进的地方可以联系小编。
Pandas — 数据处理 Pyecharts — 数据可视化 jieba — 分词 collections — 数据统计
import jieba
import pandas as pd
from collections import Counter
from pyecharts.charts import Line,Pie,Scatter,Bar,Map,Grid
from pyecharts.charts import WordCloud
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.commons.utils import JsCode
2.1 读取数据
df = pd.read_excel('旅游景点.xlsx')
df.head()
df.info()
df.sort_values('销量', ascending=False).head()
sort_info = df.sort_values(by='销量', ascending=True)
b1 = (
Bar()
.add_xaxis(list(sort_info['名称'])[-20:])
.add_yaxis('热门景点销量', sort_info['销量'].values.tolist()[-20:],itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.reversal_axis()
.set_global_opts(
title_opts=opts.TitleOpts(title='热门景点销量数据'),
yaxis_opts=opts.AxisOpts(name='景点名称'),
xaxis_opts=opts.AxisOpts(name='销量'),
)
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
)
# 将图形整体右移
g1 = (
Grid()
.add(b1, grid_opts=opts.GridOpts(pos_left='20%', pos_right='5%'))
)
df_tmp1 = df[['城市','销量']]
df_counts = df_tmp1.groupby('城市').sum()
m1 = (
Map()
.add('假期出行分布', [list(z) for z in zip(df_counts.index.values.tolist(), df_counts.values.tolist())], 'china')
.set_global_opts(
title_opts=opts.TitleOpts(title='假期出行数据地图分布'),
visualmap_opts=opts.VisualMapOpts(max_=100000, is_piecewise=False,range_color=colors),
)
)
df_tmp2 = df[df['星级'].isin(['4A', '5A'])]
df_counts = df_tmp2.groupby('城市').count()['星级']
b2 = (
Bar()
.add_xaxis(df_counts.index.values.tolist())
.add_yaxis('4A-5A景区数量', df_counts.values.tolist(),itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
title_opts=opts.TitleOpts(title='各省市4A-5A景区数量'),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_='inside')],
)
)
df0 = df_counts.copy()
df0.sort_values(ascending=False, inplace=True)
c1 = (
Pie()
.add('', [list(z) for z in zip(df0.index.values.tolist(), df0.values.tolist())],
radius=['30%', '100%'],
center=['50%', '60%'],
rosetype='area',
)
.set_global_opts(title_opts=opts.TitleOpts(title='地区景点数量'),
legend_opts=opts.LegendOpts(is_show=False),
toolbox_opts=opts.ToolboxOpts())
.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='inside', font_size=12,
formatter='{b}: {c}', font_style='italic',
font_weight='bold', font_family='Microsoft YaHei'
))
)
item_style = {'normal': {'shadowColor': '#000000',
'shadowBlur': 20,
'shadowOffsetX':5,
'shadowOffsetY':15
}
}
s1 = (
Scatter()
.add_xaxis(df_counts.index.values.tolist())
.add_yaxis('4A-5A景区数量', df_counts.values.tolist(),symbol_size=50,itemstyle_opts=item_style)
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
type_='size',
range_size=[5,50]))
)
df_tmp3 = df[df['星级'].isin(['4A', '5A'])]
df_counts = df_tmp3.groupby('城市').count()['星级']
m2 = (
Map()
.add('4A-5A景区分布', [list(z) for z in zip(df_counts.index.values.tolist(), df_counts.values.tolist())], 'china')
.set_global_opts(
title_opts=opts.TitleOpts(title='地图数据分布'),
visualmap_opts=opts.VisualMapOpts(max_=50, is_piecewise=True),
)
)
3.7 门票价格区间占比玫瑰图
price_level = [0, 50, 100, 150, 200, 250, 300, 350, 400, 500]
label_level = ['0-50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-350', '350-400', '400-500']
jzmj_cut = pd.cut(df['价格'], price_level, labels=label_level)
df_price = jzmj_cut.value_counts()
p1 = (
Pie(init_opts=opts.InitOpts(
width='800px', height='600px',
)
)
.add(
'',
[list(z) for z in zip(df_price.index.tolist(), df_price.values.tolist())],
radius=['20%', '60%'],
center=['40%', '50%'],
rosetype='radius',
label_opts=opts.LabelOpts(is_show=True),
)
.set_global_opts(title_opts=opts.TitleOpts(title='门票价格占比',pos_left='33%',pos_top="5%"),
legend_opts=opts.LegendOpts(type_='scroll', pos_left="80%",pos_top="25%",orient="vertical")
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c} ({d}%)'),position='outside')
)
3.8 门票价格区间数量散点图
s2 = (
Scatter()
.add_xaxis(df_price.index.tolist())
.add_yaxis('门票价格区间', df_price.values.tolist(),symbol_size=50,itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
yaxis_opts=opts.AxisOpts(name='数量'),
xaxis_opts=opts.AxisOpts(name='价格区间(元)'))
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
# 设置通过图形大小来表现数据
type_='size',
# 图形大小映射范围
range_size=[5,50]))
)
w2 = (
WordCloud()
.add(
"",
ac.most_common(200),
word_size_range=[5, 80],
textstyle_opts=opts.TextStyleOpts(font_family="cursive"),
mask_image='1.jpg'
)
.set_global_opts(
title_opts=opts.TitleOpts(title="自定义样式词云图"),
)
)
华东、华南、华中等地区属于国民出游热点地区,尤其是北京、上海、江苏、广东、四川、陕西等地区出行比较密集。 江苏、安徽、河南、北京、湖北等地区4A、5A级景区数量比较多。 门票价格100以内居多,大概占比70%,还是比较实惠的。
往期推荐