上篇:全网首份资料 | GreenPlum之搭建gpdr灾备环境(一)-- 理论介绍
环境准备
1docker run -itd --name gpdb7p -h gpdb7 -p 54198:5432 -p 28298:28080 --privileged=true lhrbest/greenplum:7.3.0 /usr/sbin/init
2
3docker run -itd --name gpdb7r -h gpdb7 -p 54199:5432 -p 28299:28080 --privileged=true lhrbest/greenplum:7.3.0 /usr/sbin/init
安装gpdr软件
1-- 所有节点
2rpm -ivh greenplum-disaster-recovery-*.rpm
3sudo chown -R gpadmin:gpadmin /usr/local/gpdr
4echo 'source /usr/local/gpdr/gpdr_path.sh' >> /home/gpadmin/.bashrc
配置Primary Cluster
1-- 配置Primary Cluster
2-- /usr/local/gpdr/templates/s3_config_file.yml
3cat > /usr/local/gpdr/s3_config_file.yml <<"EOF"
4type: s3
5path: /gpdb7
6s3_region: cn
7s3_endpoint: https://192.17.8.162:9003
8s3_bucket: gpdr
9s3_key: 12345678
10s3_key_secret: 12345678
11s3_uri_style: path
12storage_verify_tls: n
13storage_port: 9003
14process-max: 4
15buffer-size: 4MiB
16archive-async: y
17spool-path: /usr/local/gpdr/spool
18archive-push-queue-max: 10GiB
19archive-get-queue-max: 5GiB
20archive-timeout: 300
21repo1-retention-full: 1
22repo1-retention-archive: 1
23EOF
24
25# 会自动重启主库
26rm -rf /usr/local/gpdr/configs/pgbackrest-*.conf
27gpdr configure backup --config-file /usr/local/gpdr/s3_config_file.yml
28# gpdr configure backup --reconfigure /usr/local/gpdr/s3_config_file.yml
29
30gpconfig -s archive_mode
31gpconfig -s archive_command
32gpconfig -s gp_segment_configuration_file
33
34[gpadmin@gpdb7 ~]$ gpconfig -s archive_mode
35Values on all segments are consistent
36GUC : archive_mode
37Coordinator value: on
38Segment value: on
39[gpadmin@gpdb7 ~]$ gpconfig -s archive_command
40Values on all segments are consistent
41GUC : archive_command
42Coordinator value: pgbackrest --stanza=gpdb-seg%c --config=/usr/local/gpdr/configs/pgbackrest-seg%c.conf archive-push %p --pg1-path $(pwd)
43Segment value: pgbackrest --stanza=gpdb-seg%c --config=/usr/local/gpdr/configs/pgbackrest-seg%c.conf archive-push %p --pg1-path $(pwd)
44
45
46
47
48create database db1;
49\c db1
50CREATE TABLE t_hash AS
51SELECT id, md5(id::text)
52FROM generate_series(1, 2000000) AS id;
53
54insert into t_hash select * from t_hash;
配置备集群
这里可以使用已有的集群,也可以直接使用gpdr来新建备用集群。
1-- 配置 the Recovery Cluster
2cat > /usr/local/gpdr/s3_config_file.yml <<"EOF"
3type: s3
4path: /gpdb7
5s3_region: cn
6s3_endpoint: https://192.17.8.162:9003
7s3_bucket: gpdr
8s3_key: 12345678
9s3_key_secret: 12345678
10s3_uri_style: path
11storage_verify_tls: n
12storage_port: 9003
13process-max: 4
14buffer-size: 4MiB
15archive-async: y
16spool-path: /usr/local/gpdr/spool
17archive-push-queue-max: 10GiB
18archive-get-queue-max: 5GiB
19archive-timeout: 300
20repo1-retention-full: 1
21repo1-retention-archive: 1
22EOF
23gpdr configure restore --config-file /usr/local/gpdr/s3_config_file.yml --use-existing-cluster
24/*
25# gpdr configure restore --config-file /usr/local/gpdr/s3_config_file.yml --recovery-cluster-config-file /usr/local/gpdr/recovery_cluster_config_file_gp7.yml
26
27cat > /usr/local/gpdr/recovery_cluster_config_file_gp7.yml <<"EOF"
28pguser: gpadmin
29prefix: gpseg
30num_segments: 3
31coordinator_host: gpdb7
32coordinator_port: 7000
33coordinator_data_directory: /opt/greenplum/data/master
34segment_hosts:
35 - gpdb7
36segment_port_base: 7002
37data_directories:
38 - /opt/greenplum/data/primary
39 - /opt/greenplum/data/primary
40 - /opt/greenplum/data/primary
41EOF
42
43
44cat > /home/gpadmin/conf/initgp_config <<"EOF"
45declare -a DATA_DIRECTORY=(/opt/greenplum/data/primary /opt/greenplum/data/primary)
46ARRAY_NAME="gpdb"
47SEG_PREFIX=gpseg
48PORT_BASE=6000
49MASTER_PORT=5432
50MASTER_HOSTNAME=gpdb7
51MASTER_DIRECTORY=/opt/greenplum/data/master
52DATABASE_NAME=gpdb
53MACHINE_LIST_FILE=/home/gpadmin/conf/seg_hosts
54EOF
55
56
57gpinitsystem -c /home/gpadmin/conf/initgp_config
58
59*/
开始全量+增量同步
1-- 全量同步
2gpdr backup --type full
3gpdr restore -t incremental --restore-point latest
4
5-- 备集群启用Read Replica Mode
6gpdr read-replica enable
7gpstart -a
8
9-- 持续同步(必须保留一次全备)
10gpdr create-restore-point
11gpdr restore -t continuous --buffer-size 16MiB --process-max 4 --restore-point latest
12
13-- 增量同步(必须保留一次全备)
14gpdr backup --type incr
15gpdr restore -t incr --buffer-size 16MiB --process-max 4 --restore-point latest
16
17gpdr monitor backup
18gpdr info restore
19gpdr info backup
持续同步(脚本部署在主库)
有关持续增量同步,官网并没有给出具体的脚本,故这里麦老师自己写了脚本来同步,也是写shell脚本自动定期执行,这里每5分钟运行1次:
1cat > /home/gpadmin/gpdr_continuous.sh <<"EOF"
2#!/bin/bash
3
4. /home/gpadmin/.bashrc
5
6# 定义远程主机 IP 地址
7HOST1="192.92.0.41"
8HOST2="192.92.0.42"
9LOGFILE="/tmp/gpdr_continuous.log"
10SLEEP_TIME=600
11
12# 获取当前时间
13CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
14
15echo "$CURRENT_TIME: Running 'gpdr create-restore-point' on $HOST1" >> $LOGFILE
16gpdr create-restore-point >> $LOGFILE 2>&1
17if [ $? -eq 0 ]; then
18 echo "$CURRENT_TIME: Restore point created on $HOST1" >> $LOGFILE
19else
20 echo "$CURRENT_TIME: Failed to create restore point on $HOST1" >> $LOGFILE
21 exit 1
22fi
23gpdr info backup --detail >> $LOGFILE 2>&1
24
25
26echo "$CURRENT_TIME: Running 'gpdr restore -t continuous --restore-point latest' on $HOST2" >> $LOGFILE
27ssh $HOST2 "gpdr restore -t continuous --buffer-size 16MiB --process-max 8 --restore-point latest" >> $LOGFILE 2>&1
28if [ $? -eq 0 ]; then
29 echo "$CURRENT_TIME: Restore initiated on $HOST2" >> $LOGFILE
30else
31 echo "$CURRENT_TIME: Failed to initiate restore on $HOST2" >> $LOGFILE
32 exit 1
33fi
34ssh $HOST2 "gpdr info restore" >> $LOGFILE 2>&1
35
36echo "$CURRENT_TIME: Sleeping for $SLEEP_TIME seconds..." >> $LOGFILE
37echo "----------------------------------------------------------------------------------------" >> $LOGFILE
38
39# sleep $SLEEP_TIME
40
41
42EOF
43
44chmod +x /home/gpadmin/gpdr_continuous.sh
45sh /home/gpadmin/gpdr_continuous.sh
46
47*/5 * * * * sh /home/gpadmin/gpdr_continuous.sh
删除归档(脚本部署在备库)
wal日志会堆积,S3存储很快就会满,所以需要删除已经应用过的wal日志,也是写shell脚本自动定期执行:
过期归档
1# 需要注意有python3脚本,修改保留时效
2cat > /home/gpadmin/gpdr_expire.sh <<"EOF"
3#!/bin/bash
4
5. /home/gpadmin/.bashrc
6
7gpdr restore -t continuous --buffer-size 16MiB --process-max 4 --restore-point latest
8
9# more /usr/local/gpdr/state/gpdr_restore.last
10date_str=$(gpdr info restore | grep "Restore point" | awk '{print $3}' | sed 's/R$//' | sed 's/-//')
11date_str2=$(python2 -c "from datetime import datetime, timedelta; print((datetime.strptime('$date_str', '%Y%m%d%H%M%S') - timedelta(hours=8)).strftime('%Y%m%d-%H%M%S'))")
12echo $date_str
13echo $date_str2
14
15gpdr info backup --before-timestamp $date_str2 --format restore-points
16
17
18input=$(gpdr info backup --before-timestamp $date_str2 --format restore-points)
19
20# 处理并格式化输出
21echo "$input" | sed 's/[()]//g' | awk '
22/restore point:/ {
23 # 捕获 restore point 和时间戳
24 restore_point = $NF
25 timestamp = $1 " " $2 " " $3" " $4
26 next
27}
28/backup_label:/ {
29 # 捕获 backup label
30 backup_label = $NF
31 # 输出格式化的结果,backup_label 和 restore_point 之间加逗号
32 print timestamp "," backup_label "," restore_point
33}' > /tmp/gpdr_expire.txt
34
35more /tmp/gpdr_expire.txt | grep -v backup | grep -v gpdr > /tmp/gpdr_expire2.txt
36
37threshold=$(echo $date_str2 | sed 's/R$//' | sed 's/-//')
38
39
40# 逐行读取文件
41while read -r line; do
42 # 从每行中提取日期和时间信息
43 timestamp1=$(echo "$line" | cut -d ',' -f 1,2)
44 formatted_date=$(date -d "$(echo $timestamp1 | sed 's/,//')" +"%Y%m%d%H%M%S")
45
46 # 判断时间是否小于阈值
47 if [[ "$formatted_date" < "$threshold" ]]; then
48 # 提取备份标签和恢复点
49 backup_label=$(echo "$line" | cut -d ',' -f 3)
50 dr_restore_point=$(echo "$line" | cut -d ',' -f 4)
51
52 # 生成 gpdr expire restore-points 命令
53 echo "gpdr expire restore-points --backup-label $backup_label --dr-restore-point $dr_restore_point"
54 fi
55done < /tmp/gpdr_expire2.txt > /tmp/gpdr_expire.sh
56
57cat /tmp/gpdr_expire.sh | grep -v ":" > /tmp/gpdr_expire2.sh
58
59sh /tmp/gpdr_expire2.sh
60
61
62EOF
63chmod +x /home/gpadmin/gpdr_expire.sh
640 * * * * sh /home/gpadmin/gpdr_expire.sh > /tmp/gpdr_expire.log
删除文件
部署在S3设备:
1sudo yum install s3fs-fuse -y
2
3echo "12345678:12345678" > ~/.passwd-s3fs
4chmod 600 ~/.passwd-s3fs
5
6mkdir -p /s3fs_gpdr
7s3fs gpdr /s3fs_gpdr -o passwd_file=~/.passwd-s3fs -o url=http://127.0.0.1:9001 -o use_path_request_style -o allow_other
8
9/etc/rc.local
10
11fusermount -uz /s3fs_gpdr
12pkill -9 s3fs
13
14
15
160 * * * * find /s3fs_gpdr/gpdb*/gpseg*/archive/gpdb-seg*/ -name *.gz -type f -amin +120 -exec rm -rf {} \;
结果
我这里是同步了好几天后的结果:
1[gpadmin@gpdb7 ~]$ gpdr info backup
220250110:13:54:38 gpdr:gpadmin:gpdb7:2267660-[INFO]:-Getting backup information from the repository
3full backup: 20241229-113045F
4 Restore Points:
5 20241229-113152R_gpdr_full_backup: December 29 2024, 11:31:52
6 20241230-111048R: December 30 2024, 11:10:48
7
8 incremental backup: 20241229-113045F_20241230-120927I
9 Restore Points:
10 20241230-121049R_gpdr_incr_backup: December 30 2024, 12:10:49
11 20241230-121545R: December 30 2024, 12:15:45
12
13 incremental backup: 20241229-113045F_20241230-121807I
14 Restore Points:
15 20241230-121813R_gpdr_incr_backup: December 30 2024, 12:18:13
16 20250110-115002R: January 10 2025, 11:50:02
17 20250110-115501R: January 10 2025, 11:55:01
18 20250110-120002R: January 10 2025, 12:00:02
19 20250110-120502R: January 10 2025, 12:05:02
20 20250110-121001R: January 10 2025, 12:10:01
21 20250110-121501R: January 10 2025, 12:15:01
22 20250110-122002R: January 10 2025, 12:20:02
23 20250110-122501R: January 10 2025, 12:25:01
24 20250110-123001R: January 10 2025, 12:30:01
25 20250110-123501R: January 10 2025, 12:35:01
26 20250110-124002R: January 10 2025, 12:40:02
27 20250110-124502R: January 10 2025, 12:45:02
28 20250110-125001R: January 10 2025, 12:50:01
29 20250110-125501R: January 10 2025, 12:55:01
30 20250110-130001R: January 10 2025, 13:00:01
31 20250110-130501R: January 10 2025, 13:05:01
32 20250110-131002R: January 10 2025, 13:10:02
33 20250110-131502R: January 10 2025, 13:15:02
34 20250110-132001R: January 10 2025, 13:20:01
35 20250110-132502R: January 10 2025, 13:25:02
36 20250110-133001R: January 10 2025, 13:30:01
37 20250110-133502R: January 10 2025, 13:35:02
38 20250110-134001R: January 10 2025, 13:40:01
39 20250110-134501R: January 10 2025, 13:45:01
40 20250110-135002R: January 10 2025, 13:50:02
41
42[gpadmin@gpdb7 ~]$ gpdr info restore
4320250110:13:54:45 gpdr:gpadmin:gpdb7:2267676-[INFO]:-Getting restore information
44
45Latest Completed Restore Info
46-----------------------------
47Restore type: continuous
48Restore point: 20250110-135002R
49Backup label: 20241229-113045F_20241230-121807I
50Restore start: 2025-01-10 13:50:06
51Restore stop: 2025-01-10 13:50:36
52
53Current Restore Info
54-----------------------------
55 Segment ID | Lag | Latest archived WAL | Last replayed WAL
56----------------------------------------------------------------------------
57 -1 | 0 | 000000020000003900000008 | 000000020000003900000008
58 0 | 0 | 000000020000003C00000009 | 000000020000003C00000009
59 1 | 0 | 000000020000003B0000003D | 000000020000003B0000003D
60[gpadmin@gpdb7 ~]$
61[gpadmin@gpdb7 ~]$ gpdr info restore
6220250110:14:51:22 gpdr:gpadmin:gpdb7:2273946-[INFO]:-Getting restore information
63
64Latest Completed Restore Info
65-----------------------------
66Restore type: continuous
67Restore point: 20250110-145001R
68Backup label: 20241229-113045F_20241230-121807I
69Restore start: 2025-01-10 14:50:05
70Restore stop: 2025-01-10 14:50:35
71
72Current Restore Info
73-----------------------------
74 Segment ID | Lag | Latest archived WAL | Last replayed WAL
75----------------------------------------------------------------------------
76 -1 | 0 | 000000020000003900000014 | 000000020000003900000014
77 0 | 0 | 000000020000003C00000015 | 000000020000003C00000015
78 1 | 0 | 000000020000003C00000009 | 000000020000003C00000009
79[gpadmin@gpdb7 ~]$