虽然年底很忙,但是作为一个数据科学科普博主,还是有必要每年把公众号运营状况分析一下的。下面是2024年公众号运营数据分析和数据分析代码。
数据来源
数据来源于公众号后台下载的数据明细,微信公众号大部分参数都是每两个月可以下载一次,然后在R里面用tidyverse进行清洗一下,得到数据分析结果。
阅读量分析
逐日总阅读量变化
分析每日总阅读量的变化情况。
library(tidyverse)
library(readxl)
userdata = list.files(path = "./2024/Liuliang/", pattern = ".xls$")
userdir = paste0("./2024/Liuliang/", userdata)
#分析阅读量
user1 = read_excel(path = userdir[1], sheet = 1, col_names = T)
for (i in 2:length(userdir)) {
userdataNew = read_excel(path = userdir[i], sheet = 1, col_names = T)
user1 = rbind(user1, userdataNew)
}
user2 = user1%>%
mutate_at(.vars = vars(阅读人数, 阅读次数, 分享次数), .fun = as.numeric)%>%
mutate(PubDay = as.Date(日期, format = '%Y%m%d')) %>%
arrange(PubDay)
#计算每天总的阅读量
read_daily = user2 %>%
select(c(PubDay, 阅读次数, 渠道)) %>%
set_names(c("PubDay", "ReadNum", "Source")) %>%
filter(Source == "全部")
#绘制阅读量时间变化图
library(ggpmisc)
library(ggsci)
p1 = ggplot(read_daily, aes(PubDay, ReadNum))+
labs(x="日期", y="总阅读量")+
theme_bw()+
theme(title = element_text(size = 10),
axis.title = element_text(size = 10), #调整标题大小
axis.text.x = element_text(size = 8), #x轴标签大小
axis.text.y = element_text(size = 8))+ #y轴标签大小
geom_line()+
geom_point(size = 0.5)+
scale_color_npg()+
scale_fill_npg()
p1
ggsave(filename = "./2024/Rplots/逐日阅读量变化.jpg", p1, width = 12, height = 5, dpi = 300, units = "cm")
summary(read_daily)
本号单日阅读最少305,中值1650,平均每日阅读量3793,2024年阅读量最大的一天为18943。
定位一下最少和最大阅读量所在日期:
# 找到最小阅读量对应的日期
min_read_date <- read_daily %>%
filter(TotalReadNum == min(TotalReadNum))
# 找到最大阅读量对应的日期
max_read_date <- read_daily %>%
filter(TotalReadNum == max(TotalReadNum))
# 打印结果
print(min_read_date)
print(max_read_date)
不同渠道的逐日阅读量变化
从不同渠道来看,年初的阅读量最高来自于“聊天会话”,后面来自于“公众号消息”,“推荐”带来的阅读量比较少,可能还是我的内容比较小众,达不到微信公众号推荐的标准。公众号现在推送机制总是改,希望大家给我加个“星标”,不然可能就看不到了
#分析不同渠道每日的阅读量分布
read_daily_source = user2 %>%
select(c(PubDay, 阅读次数, 渠道)) %>%
set_names(c("PubDay", "ReadNum", "Source")) %>%
filter(Source != "全部")
p2 = ggplot(read_daily_source, aes(PubDay, ReadNum, color =Source))+
labs(x="日期", y="总阅读量")+
theme_bw()+
theme(title = element_text(size = 10),
axis.title = element_text(size = 10), #调整标题大小
axis.text.x = element_text(size = 8), #x轴标签大小
axis.text.y = element_text(size = 8), #y轴标签大小
legend.position = "bottom",
legend.background = element_blank())+
geom_line()+
geom_point(size = 0.5)+
scale_color_manual(values = c("公众号消息" = "#a6cee3", "聊天会话" = "#1f78b4", "朋友圈" = "#b2df8a",
"公众号主页" = "#33a02c", "其它" = "#fb9a99", "推荐" = "#e31a1c",
"搜一搜" = "#fdbf6f", "朋友在看" = "#ff7f00"),
name = "阅读量来源")
p2
ggsave(filename = "./2024/Rplots/分渠道逐日阅读量变化.jpg", p2, width = 12, height = 8, dpi = 300, units = "cm")
plotly::ggplotly(p2)
推文标题内容分析
使用jiebaR
进行了一下分词,构建了一下推文标题的词云图。结果显示“数据”还是公众号的主旋律,词云占比最大,“论文”、“遥感”、“可视化”也是出现频率非常高的关键词。
sourcedata = list.files(path = "./2024/Tuiwen/", pattern = ".xls$")
sourcedir = paste0("./2024/Tuiwen/", sourcedata)
source1 = read_excel(path = sourcedir[1], sheet = 1, col_names = T)
for (i in 2:length(sourcedir)) {
sourcedataNew = read_excel(path = sourcedir[i], sheet = 1, col_names = T)
source1 = rbind(source1, sourcedataNew)
}
#总的词云图
wordstotal = source1 %>%
select(内容标题)
# write.csv(wordstotal, "./wordstotal.csv")
library(jiebaR)
library(wordcloud2)
# 提取文本列
mixseg = worker("mix", user = "./dict.txt")
a= segment(wordstotal$内容标题, mixseg)
stopwords = as.vector(read.table("stopwords.txt", encoding = "UTF-8")[,1])
`%w/o%` <- function (x, y){
x[!x %in% y]
}
a2=a %w/o% stopwords
JCfreq = table(a2)
JCfreq2 = sort(JCfreq, decreasing = T)
wordcloud2(JCfreq2, shape = "circle")
粉丝数量分析
2024年1月1日总粉丝数48625,2024年12月31日总粉丝数55848,本年度增长7223个粉丝,增长14.85%。
下面是计算的每月粉丝增长情况,没有净增长数量超过1000的月份,6、7月期末和暑假期间明显下降,8月有所回升,12月增长数量最多,净增长912。
#用户情况分析
useranalysis = read_excel(path = "./2024/User/UserAnalysis.xlsx", sheet = 1, col_names = T) %>%
na.omit() %>%
mutate(YearMonth = format(Time, "%Y-%m")) #提取年月
#按月汇总净增长
monthly_net_increase <- useranalysis %>%
group_by(YearMonth) %>%
summarise(TotalNetIncrease = sum(NetIncresae, na.rm = TRUE))
p3 = ggplot(monthly_net_increase, aes(x = YearMonth, y = TotalNetIncrease)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
labs(
title = "粉丝月增长情况",
x = "年月",
y = "粉丝净增长量"
)
p3
ggsave(filename = "./2024/Rplots/粉丝月净增长量.jpg", p3, width = 12, height = 8, dpi = 300, units = "cm")
感谢大家过去一年对走天涯徐小洋地理数据科学公众号的支持!接下来我继续坚持输出,为大家提供更好的教程和资料!记得给个星标哦!