引言
气候变化越来越能被人们感知。以下图为例,
那么,您所在城市的年均气温上升情况如何?如何获取历史气候数据?如何将这些数据可视化?
这些数据(平均温度、最大温度、最小温度、降水等)可以从 NOAA 的公开历史气候数据库(Past Weather)获取。
包
library(readr)
library(dplyr)
library(janitor)
library(lubridate)
library(hydroTSM)
library(ggplot2)
library(ggrepel)
数据及清理
数据来自 NOAA,每日气候数据,更新到约近一周。
https://www.ncei.noaa.gov/access/past-weather
例如,输入
shijiazhuang
,
到底部
Download Station's Data: csv json xml text
,点击 Excel 图标(绿色),可下载csv
格式数据。
read_csv
的skip = 1
参数,表示跳过第一行不读。janitor::clean_names()
清理为整洁的列名,即统一为小写、下划线连接。
weather_data <- read_csv(
file = "data.csv",
skip = 1,
show_col_types = FALSE
) |>
janitor::clean_names()
colnames(weather_data)
[1] "date" "tavg_degrees_fahrenheit"
[3] "tmax_degrees_fahrenheit" "tmin_degrees_fahrenheit"
[5] "prcp_inches" "snow_inches"
[7] "snwd_inches"
weather_data <- weather_data |>
mutate(
date = lubridate::ymd(date),
year = year(date),
month = month(date),
season = hydroTSM::time2season(date, out.fmt = "seasons"),
tavg_degrees_c = (tavg_degrees_fahrenheit - 32) / 1.8,
tmax_degrees_c = (tmax_degrees_fahrenheit - 32) / 1.8,
tmin_degrees_c = (tmin_degrees_fahrenheit - 32) / 1.8,
prcp_mm = prcp_inches * 25.4
)
glimpse(weather_data)
Rows: 25,422
Columns: 14
$ date <date> 1955-01-01, 1955-01-02, 1955-01-03, 1955-01-0…
$ tavg_degrees_fahrenheit <dbl> 14, 13, 13, 14, 18, 11, 13, 15, 20, 22, 22, 22…
$ tmax_degrees_fahrenheit <dbl> 19, 23, 17, 23, 29, 25, 20, 26, 29, 37, 39, 29…
$ tmin_degrees_fahrenheit <dbl> 6, 2, 4, 5, 9, -3, 4, 4, 10, 16, 9, 14, 11, 17…
$ prcp_inches <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.02, 0.00…
$ snow_inches <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ snwd_inches <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ year <dbl> 1955, 1955, 1955, 1955, 1955, 1955, 1955, 1955…
$ month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ season <chr> "winter", "winter", "winter", "winter", "winte…
$ tavg_degrees_c <dbl> -10.000000, -10.555556, -10.555556, -10.000000…
$ tmax_degrees_c <dbl> -7.2222222, -5.0000000, -8.3333333, -5.0000000…
$ tmin_degrees_c <dbl> -14.4444444, -16.6666667, -15.5555556, -15.000…
$ prcp_mm <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.50…
设置绘图风格
font <- "Prompt"
cex <- 14
lwd <- 0.4
theme_set(
theme_classic(
base_family = font,
base_size = cex,
base_rect_size = lwd,
base_line_size = lwd
) +
theme(
strip.clip = "off",
strip.background = element_blank(),
axis.text = element_text(size = cex, color = "grey0")
)
)
绘图
数据汇总为年均温,
data_filter_summarise <- weather_data |>
filter(
# date > ymd("1950-01-01"),
date < ymd("2024-01-01"),
# season == "summer"
) |>
summarise(
year_tavg_degrees_c = mean(tavg_degrees_c, na.rm = TRUE),
.by = year
)
glimpse(data_filter_summarise)
Rows: 69
Columns: 2
$ year <dbl> 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 19…
$ year_tavg_degrees_c <dbl> 13.32725, 11.85944, 12.35616, 13.00000, 13.16895, …
绘图,
fig <- data_filter_summarise |>
ggplot(
aes(
x = year,
y = year_tavg_degrees_c,
color = year_tavg_degrees_c,
fill = year_tavg_degrees_c
)
) +
geom_vline(
xintercept = 1990,
lwd = 0.4,
lty = 2
) +
geom_point(
shape = 21,
size = 2,
color = "grey0"
) +
geom_rug(sides = "r") +
stat_smooth(
geom = "line",
aes(group = 1),
method = "gam",
formula = y ~ s(x, k = 50),
color = "grey0",
lwd = 0.8
) +
stat_smooth(
geom = "ribbon",
aes(group = 1),
method = "gam",
formula = y ~ s(x, k = 50),
color = "grey0",
fill = NA,
lwd = 0.6,
lty = 2
) +
geom_label_repel(
seed = 1,
data = ~ filter(.x, year == 1990 | year >= 2020),
aes(label = year),
label.r = unit(0, "pt"),
force = 1e2,
segment.size = 0.4,
color = "grey0",
family = font
) +
scale_color_distiller(
name = NULL,
palette = "RdBu"
) +
scale_fill_distiller(
name = NULL,
palette = "RdBu"
) +
scale_y_continuous(
expand = expansion(
mult = c(0, 0.04)
),
position = "right"
) +
labs(
x = NULL, # "年份",
y = "年平均温度(摄氏度)",
caption = "数据:NOAA|Past Weather"
) +
coord_cartesian(
xlim = c(1950, 2024),
ylim = c(14, 18) - 2
) +
theme(
legend.position = "none"
)
# fig
ggsave(
"daily-climate.png",
fig,
width = 5,
height = 5,
dpi = 800
)
结语
如是我闻:网络上看到的数据可视化,不给数据出处的,一律只能当作涂鸦,而不能当作数据看待。我觉很有道理。