01
# 引言
最近接到这么一个需求,需要将年龄结构图和堆积柱状图结合在一起,主要是Matplotlib简单柱状图的变形,但简单的组合后,能够非常直观地展现年龄结构和组成成分,丰富了图表内容,节约了文章版面。
02
# 读取处理数据
借助pandas读取数据如下图所示,可以发现就是简单的问卷调查类型的数据(样例数据仅供绘图使用,不必纠结数据的合理性),由于需要分组和绘制堆积柱状图,这里可以借助panda的cut和cumsum对数据进行处理。
# 读取数据
fn = r'D:\ForestMeteorology\FM230613\data\鼻炎问卷数据样例.xlsx'
df = pd.read_excel(fn)
df.columns = ['kind','age','gender']
print(df)
# 年龄分组
bins = np.arange(10,101,10)
df['group'] = pd.cut(df.age,bins = bins)
df['level'] = df['group'].astype(str).str[1:3].astype(int)
kinddict = {'鼻炎前期': 1, '鼻炎确诊3年内': 2, '鼻炎确诊3年以上(无并发症)': 3,'鼻炎确诊3年以上(有并发症)':4}
df['kind'].replace(kinddict, inplace=True)
df['gender'].replace({'男': 1, '女': 0}, inplace=True)
df.sort_values('level',inplace=True)
print(df)
03
# 可视化
主要分性别分年龄组去绘制,需要注意的是图层的上下层关系。
fig,ax = plt.subplots(figsize=(8,4.5))
colors = ['#D1B26F', '#E7D9B6', '#B4D7D3', '#6FB2AB']
for g in [0,1]:
for i in df['level'].unique():
idf = df[(df['level']==i)&(df['gender']==g)]
ires = idf.groupby('kind').count()
ires.reset_index(inplace=True)
ires['csum'] = ires['age'].cumsum()
ires.sort_values('csum',inplace=True,ascending=False)
print((i,g))
print(ires)
for j in range(len(ires)):
if g==0:
jx = ires.iloc[j,-1]
jtype = ires.iloc[j,0]
ax.barh(i,jx
,label = list(kinddict.keys())[jtype-1]
,color = colors[jtype-1]
,height=10
)
# 添加外框
ax.barh(i,ires.iloc[0,-1]
,label = list(kinddict.keys())[jtype-1]
,facecolor = 'none'
,height=10
,edgecolor = 'k'
,zorder = 0
)
else:
jx = ires.iloc[j,-1]*(-1)
jtype = ires.iloc[j,0]
ax.barh(i,jx
,label = list(kinddict.keys())[jtype-1]
,color = colors[jtype-1]
,height=10
)
# 添加外框
ax.barh(i,ires.iloc[0,-1]*(-1)
,label = list(kinddict.keys())[jtype-1]
,facecolor = 'none'
,height=10
,edgecolor = 'k'
,zorder = 0
)
plt.show()
04
# 完整代码
最终添加图例、坐标轴名称等细节就完成了整幅图啦!
# -*- encoding: utf-8 -*-
'''
@File : gzh230616.py
@Time : 2023/06/16 23:59:51
@Author : HMX
@Version : 1.0
@Contact : kzdhb8023@163.com
'''
# here put the import lib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 读取数据
fn = r'D:\ForestMeteorology\FM230613\data\鼻炎问卷数据样例.xlsx'
df = pd.read_excel(fn)
df.columns = ['kind','age','gender']
print(df)
# 年龄分组
bins = np.arange(10,101,10)
df['group'] = pd.cut(df.age,bins = bins)
df['level'] = df['group'].astype(str).str[1:3].astype(int)
kinddict = {'鼻炎前期': 1, '鼻炎确诊3年内': 2, '鼻炎确诊3年以上(无并发症)': 3,'鼻炎确诊3年以上(有并发症)':4}
df['kind'].replace(kinddict, inplace=True)
df['gender'].replace({'男': 1, '女': 0}, inplace=True)
df.sort_values('level',inplace=True)
print(df)
fig,ax = plt.subplots(figsize=(8,4.5))
colors = ['#D1B26F', '#E7D9B6', '#B4D7D3', '#6FB2AB']
for g in [0,1]:
for i in df['level'].unique():
idf = df[(df['level']==i)&(df['gender']==g)]
ires = idf.groupby('kind').count()
ires.reset_index(inplace=True)
ires['csum'] = ires['age'].cumsum()
ires.sort_values('csum',inplace=True,ascending=False)
print((i,g))
print(ires)
for j in range(len(ires)):
if g==0:
jx = ires.iloc[j,-1]
jtype = ires.iloc[j,0]
ax.barh(i,jx
,label = list(kinddict.keys())[jtype-1]
,color = colors[jtype-1]
,height=10
)
# 添加外框
ax.barh(i,ires.iloc[0,-1]
,label = list(kinddict.keys())[jtype-1]
,facecolor = 'none'
,height=10
,edgecolor = 'k'
,zorder = 0
)
else:
jx = ires.iloc[j,-1]*(-1)
jtype = ires.iloc[j,0]
ax.barh(i,jx
,label = list(kinddict.keys())[jtype-1]
,color = colors[jtype-1]
,height=10
)
# 添加外框
ax.barh(i,ires.iloc[0,-1]*(-1)
,label = list(kinddict.keys())[jtype-1]
,facecolor = 'none'
,height=10
,edgecolor = 'k'
,zorder = 0
)
# 图例设置 图例去重
handles, labels = ax.get_legend_handles_labels()
unique_labels = []
unique_handles = []
for i, label in enumerate(labels):
if label not in unique_labels:
unique_labels.append(label)
unique_handles.append(handles[i])
ax.legend(unique_handles, unique_labels)
# 添加辅助线
ax.axvline(x = 0,c='k',ls=':',lw=1)
# 设置xy轴刻度及名称
ax.set_yticks(bins[:-1])
ax.set_ylim(0,100)
ax.set_xlim(-35,35)
ax.set_xticks(np.arange(-30,31,10))
ax.set_xticklabels([abs(i) for i in np.arange(-30,31,10)])
ax.set_ylabel('年龄')
ax.set_xlabel('人数')
ax.text(30,10,'女',ha = 'center',va = 'center')
ax.text(-30,10,'男',ha = 'center',va = 'center')
plt.tight_layout()
plt.savefig(r'D:\ForestMeteorology\FM230613\data\gzh230616.png',dpi = 600)
plt.show()
05
# 随笔
毕业一年,迷茫依旧是那个迷茫,只是偶然读到苏轼的《定风波》:【莫听穿林打叶声,何妨吟啸且徐行】,内心渐渐接受自己大抵无法成为理想中的人。只是不甘于躺平摆烂,希望能够坚持奋斗,早日享受平淡生活。
戳这里关注我
请点赞、在看、关注,你们的支持是我更新的动力。