前2篇:
全网首份资料 | GreenPlum之搭建gpdr灾备环境(一)-- 理论介绍
全网首份资料 | GreenPlum之搭建gpdr灾备环境(二)-- 实战gpdr容灾(GPDB7环境)
环境准备
最低版本:6.27.1 ,还需要自己升级一下gpdb的版本到gpdb6.28.0
1docker run -itd --name gpdb6p -h gpdb6270 -p 54298:5432 -p 29298:28080 --privileged=true lhrbest/greenplum:6.27.0_v2 /usr/sbin/init
2docker run -itd --name gpdb6r -h gpdb6270 --privileged=true lhrbest/greenplum:6.27.0_v2 /usr/sbin/init
安装gpdr软件
1-- 所有节点
2rpm -ivh greenplum-disaster-recovery-*.rpm
3sudo chown -R gpadmin:gpadmin /usr/local/gpdr
4echo 'source /usr/local/gpdr/gpdr_path.sh' >> /home/gpadmin/.bashrc
配置Primary Cluster
1-- Configure the Primary Cluster
2-- /usr/local/gpdr/templates/s3_config_file.yml
3cat > /usr/local/gpdr/s3_config_file.yml <<"EOF"
4type: s3
5path: /gpdb6
6s3_region: cn
7s3_endpoint: https://192.17.8.162:9003
8s3_bucket: gpdr
9s3_key: 12345678
10s3_key_secret: 12345678
11s3_uri_style: path
12storage_verify_tls: n
13storage_port: 9003
14process-max: 4
15buffer-size: 4MiB
16archive-async: y
17spool-path: /usr/local/gpdr/spool
18archive-push-queue-max: 10GiB
19archive-get-queue-max: 5GiB
20archive-timeout: 300
21repo1-retention-full: 1
22repo1-retention-archive: 1
23EOF
24
25# 会自动重启库
26rm -rf /usr/local/gpdr/configs/pgbackrest-*.conf
27gpdr configure backup --config-file /usr/local/gpdr/s3_config_file.yml
28# gpdr configure backup --reconfigure /usr/local/gpdr/s3_config_file.yml
29
30[gpadmin@gpdb6270 ~]$ gpconfig -s archive_mode
31Values on all segments are consistent
32GUC : archive_mode
33Master value: on
34Segment value: on
35[gpadmin@gpdb6270 ~]$ gpconfig -s archive_command
36Values on all segments are consistent
37GUC : archive_command
38Master value: pgbackrest --stanza=gpdb-seg%c --config=/usr/local/gpdr/configs/pgbackrest-seg%c.conf archive-push %p --pg1-path $(pwd)
39Segment value: pgbackrest --stanza=gpdb-seg%c --config=/usr/local/gpdr/configs/pgbackrest-seg%c.conf archive-push %p --pg1-path $(pwd)
40[gpadmin@gpdb6270 ~]$
41
42
43
44
45create database db1;
46\c db1
47CREATE TABLE t_hash AS
48SELECT id, md5(id::text)
49FROM generate_series(1, 2000000) AS id;
50
51insert into t_hash select * from t_hash;
配置备集群
这里可以使用已有的集群,也可以直接使用gpdr来新建备用集群。
1cat > /usr/local/gpdr/s3_config_file.yml <<"EOF"
2type: s3
3path: /gpdb6
4s3_region: cn
5s3_endpoint: https://192.17.8.162:9003
6s3_bucket: gpdr
7s3_key: 12345678
8s3_key_secret: 12345678
9s3_uri_style: path
10storage_verify_tls: n
11storage_port: 9003
12process-max: 4
13buffer-size: 4MiB
14archive-async: y
15spool-path: /usr/local/gpdr/spool
16archive-push-queue-max: 10GiB
17archive-get-queue-max: 5GiB
18archive-timeout: 300
19repo1-retention-full: 1
20repo1-retention-archive: 1
21EOF
22gpdr configure restore --config-file /usr/local/gpdr/s3_config_file.yml --use-existing-cluster
23/*
24# gpdr configure restore --config-file /usr/local/gpdr/s3_config_file.yml --recovery-cluster-config-file /usr/local/gpdr/recovery_cluster_config_file_gp7.yml
25
26cat > /usr/local/gpdr/recovery_cluster_config_file_gp7.yml <<"EOF"
27pguser: gpadmin
28prefix: gpseg
29num_segments: 3
30coordinator_host: gpdb7
31coordinator_port: 7000
32coordinator_data_directory: /opt/greenplum/data/master
33segment_hosts:
34 - gpdb7
35segment_port_base: 7002
36data_directories:
37 - /opt/greenplum/data/primary
38 - /opt/greenplum/data/primary
39 - /opt/greenplum/data/primary
40EOF
41
42
43cat > /home/gpadmin/conf/initgp_config <<"EOF"
44declare -a DATA_DIRECTORY=(/opt/greenplum/data/primary /opt/greenplum/data/primary)
45ARRAY_NAME="gpdb"
46SEG_PREFIX=gpseg
47PORT_BASE=6000
48MASTER_PORT=5432
49MASTER_HOSTNAME=gpdb7
50MASTER_DIRECTORY=/opt/greenplum/data/master
51DATABASE_NAME=gpdb
52MACHINE_LIST_FILE=/home/gpadmin/conf/seg_hosts
53EOF
54
55
56gpinitsystem -c /home/gpadmin/conf/initgp_config
57
58*/
开始全量+增量同步
1-- 全量同步
2gpdr backup --type full
3gpdr restore -t incremental --restore-point latest
4
5
6-- 持续同步(必须保留一次全备)
7gpdr create-restore-point
8gpdr restore -t continuous --buffer-size 16MiB --process-max 4 --restore-point latest
9
10-- 增量同步(必须保留一次全备)
11gpdr backup --type incr
12gpdr restore -t incr --buffer-size 16MiB --process-max 4 --restore-point latest
13
14gpdr monitor backup
15gpdr info restore
16gpdr info backup
持续同步(脚本部署在主库)
有关持续增量同步,官网并没有给出具体的脚本,故这里麦老师自己写了脚本来同步,也是写shell脚本自动定期执行,这里每5分钟运行1次:
1cat > /home/gpadmin/gpdr_continuous.sh <<"EOF"
2#!/bin/bash
3
4. /home/gpadmin/.bashrc
5
6# 定义远程主机 IP 地址
7HOST1="192.92.0.41"
8HOST2="192.92.0.42"
9LOGFILE="/tmp/gpdr_continuous.log"
10SLEEP_TIME=600
11
12# 获取当前时间
13CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
14
15echo "$CURRENT_TIME: Running 'gpdr create-restore-point' on $HOST1" >> $LOGFILE
16gpdr create-restore-point >> $LOGFILE 2>&1
17if [ $? -eq 0 ]; then
18 echo "$CURRENT_TIME: Restore point created on $HOST1" >> $LOGFILE
19else
20 echo "$CURRENT_TIME: Failed to create restore point on $HOST1" >> $LOGFILE
21 exit 1
22fi
23gpdr info backup --detail >> $LOGFILE 2>&1
24
25
26echo "$CURRENT_TIME: Running 'gpdr restore -t continuous --restore-point latest' on $HOST2" >> $LOGFILE
27ssh $HOST2 "gpdr restore -t continuous --buffer-size 16MiB --process-max 8 --restore-point latest" >> $LOGFILE 2>&1
28if [ $? -eq 0 ]; then
29 echo "$CURRENT_TIME: Restore initiated on $HOST2" >> $LOGFILE
30else
31 echo "$CURRENT_TIME: Failed to initiate restore on $HOST2" >> $LOGFILE
32 exit 1
33fi
34ssh $HOST2 "gpdr info restore" >> $LOGFILE 2>&1
35
36echo "$CURRENT_TIME: Sleeping for $SLEEP_TIME seconds..." >> $LOGFILE
37echo "----------------------------------------------------------------------------------------" >> $LOGFILE
38
39# sleep $SLEEP_TIME
40
41
42EOF
43
44chmod +x /home/gpadmin/gpdr_continuous.sh
45sh /home/gpadmin/gpdr_continuous.sh
46
47*/5 * * * * sh /home/gpadmin/gpdr_continuous.sh
删除归档(脚本部署在备库)
wal日志会堆积,S3存储很快就会满,所以需要删除已经应用过的wal日志,也是写shell脚本自动定期执行:
过期归档
1# 需要注意有python3脚本,修改保留时效
2cat > /home/gpadmin/gpdr_expire.sh <<"EOF"
3#!/bin/bash
4
5. /home/gpadmin/.bashrc
6
7gpdr restore -t continuous --buffer-size 16MiB --process-max 4 --restore-point latest
8
9# more /usr/local/gpdr/state/gpdr_restore.last
10date_str=$(gpdr info restore | grep "Restore point" | awk '{print $3}' | sed 's/R$//' | sed 's/-//')
11date_str2=$(python2 -c "from datetime import datetime, timedelta; print((datetime.strptime('$date_str', '%Y%m%d%H%M%S') - timedelta(hours=8)).strftime('%Y%m%d-%H%M%S'))")
12echo $date_str
13echo $date_str2
14
15gpdr info backup --before-timestamp $date_str2 --format restore-points
16
17
18input=$(gpdr info backup --before-timestamp $date_str2 --format restore-points)
19
20# 处理并格式化输出
21echo "$input" | sed 's/[()]//g' | awk '
22/restore point:/ {
23 # 捕获 restore point 和时间戳
24 restore_point = $NF
25 timestamp = $1 " " $2 " " $3" " $4
26 next
27}
28/backup_label:/ {
29 # 捕获 backup label
30 backup_label = $NF
31 # 输出格式化的结果,backup_label 和 restore_point 之间加逗号
32 print timestamp "," backup_label "," restore_point
33}' > /tmp/gpdr_expire.txt
34
35more /tmp/gpdr_expire.txt | grep -v backup | grep -v gpdr > /tmp/gpdr_expire2.txt
36
37threshold=$(echo $date_str2 | sed 's/R$//' | sed 's/-//')
38
39
40# 逐行读取文件
41while read -r line; do
42 # 从每行中提取日期和时间信息
43 timestamp1=$(echo "$line" | cut -d ',' -f 1,2)
44 formatted_date=$(date -d "$(echo $timestamp1 | sed 's/,//')" +"%Y%m%d%H%M%S")
45
46 # 判断时间是否小于阈值
47 if [[ "$formatted_date" < "$threshold" ]]; then
48 # 提取备份标签和恢复点
49 backup_label=$(echo "$line" | cut -d ',' -f 3)
50 dr_restore_point=$(echo "$line" | cut -d ',' -f 4)
51
52 # 生成 gpdr expire restore-points 命令
53 echo "gpdr expire restore-points --backup-label $backup_label --dr-restore-point $dr_restore_point"
54 fi
55done < /tmp/gpdr_expire2.txt > /tmp/gpdr_expire.sh
56
57cat /tmp/gpdr_expire.sh | grep -v ":" > /tmp/gpdr_expire2.sh
58
59sh /tmp/gpdr_expire2.sh
60
61
62EOF
63chmod +x /home/gpadmin/gpdr_expire.sh
640 * * * * sh /home/gpadmin/gpdr_expire.sh > /tmp/gpdr_expire.log
删除文件
部署在S3设备:
1sudo yum install s3fs-fuse -y
2
3echo "12345678:12345678" > ~/.passwd-s3fs
4chmod 600 ~/.passwd-s3fs
5
6mkdir -p /s3fs_gpdr
7s3fs gpdr /s3fs_gpdr -o passwd_file=~/.passwd-s3fs -o url=http://127.0.0.1:9001 -o use_path_request_style -o allow_other
8
9/etc/rc.local
10
11fusermount -uz /s3fs_gpdr
12pkill -9 s3fs
13
14
15
160 * * * * find /s3fs_gpdr/gpdb*/gpseg*/archive/gpdb-seg*/ -name *.gz -type f -amin +120 -exec rm -rf {} \;
结果
我这里是同步了好几天后的结果:
1[gpadmin@gpdb6270 ~]$ gpdr info backup
220250110:13:53:25 gpdr:gpadmin:gpdb6270:622316-[INFO]:-Getting backup information from the repository
3full backup: 20241229-113039F
4 Restore Points:
5 20241229-113106R_gpdr_full_backup: December 29 2024, 11:31:06
6 20241230-123502R: December 30 2024, 12:35:02
7
8 incremental backup: 20241229-113039F_20241230-123446I
9 Restore Points:
10 20241230-123555R_gpdr_incr_backup: December 30 2024, 12:35:55
11
12 incremental backup: 20241229-113039F_20241230-145423I
13 Restore Points:
14 20241230-145430R_gpdr_incr_backup: December 30 2024, 14:54:30
15 20250110-115001R: January 10 2025, 11:50:01
16 20250110-115501R: January 10 2025, 11:55:01
17 20250110-120001R: January 10 2025, 12:00:01
18 20250110-120501R: January 10 2025, 12:05:01
19 20250110-121001R: January 10 2025, 12:10:01
20 20250110-121501R: January 10 2025, 12:15:01
21 20250110-122001R: January 10 2025, 12:20:01
22 20250110-122502R: January 10 2025, 12:25:02
23 20250110-123001R: January 10 2025, 12:30:01
24 20250110-123501R: January 10 2025, 12:35:01
25 20250110-124001R: January 10 2025, 12:40:01
26 20250110-124501R: January 10 2025, 12:45:01
27 20250110-125001R: January 10 2025, 12:50:01
28 20250110-125501R: January 10 2025, 12:55:01
29 20250110-130001R: January 10 2025, 13:00:01
30 20250110-130501R: January 10 2025, 13:05:01
31 20250110-131001R: January 10 2025, 13:10:01
32 20250110-131501R: January 10 2025, 13:15:01
33 20250110-132001R: January 10 2025, 13:20:01
34 20250110-132501R: January 10 2025, 13:25:01
35 20250110-133002R: January 10 2025, 13:30:02
36 20250110-133501R: January 10 2025, 13:35:01
37 20250110-134001R: January 10 2025, 13:40:01
38 20250110-134501R: January 10 2025, 13:45:01
39 20250110-135001R: January 10 2025, 13:50:01
40
41[gpadmin@gpdb6270 ~]$ gpdr info restore
4220250110:13:53:54 gpdr:gpadmin:gpdb6270:622357-[INFO]:-Getting restore information
43
44Latest Completed Restore Info
45-----------------------------
46Restore type: continuous
47Restore point: 20250110-135001R
48Backup label: 20241229-113039F_20241230-145423I
49Restore start: 2025-01-10 13:50:03
50Restore stop: 2025-01-10 13:50:34
51
52Current Restore Info
53-----------------------------
54 Segment ID | Lag | Latest archived WAL | Last replayed WAL
55----------------------------------------------------------------------------
56 -1 | 0 | 00000001000000370000002D | 00000001000000370000002D
57 0 | 0 | 000000010000003A00000038 | 000000010000003A00000038
58 1 | 0 | 000000010000003A0000002C | 000000010000003A0000002C
59[gpadmin@gpdb6270 ~]$
60[gpadmin@gpdb6270 ~]$ gpdr info restore
6120250110:14:51:08 gpdr:gpadmin:gpdb6270:638429-[INFO]:-Getting restore information
62
63Latest Completed Restore Info
64-----------------------------
65Restore type: continuous
66Restore point: 20250110-145001R
67Backup label: 20241229-113039F_20241230-145423I
68Restore start: 2025-01-10 14:50:04
69Restore stop: 2025-01-10 14:50:35
70
71Current Restore Info
72-----------------------------
73 Segment ID | Lag | Latest archived WAL | Last replayed WAL
74----------------------------------------------------------------------------
75 -1 | 0 | 000000010000003700000039 | 000000010000003700000039
76 0 | 0 | 000000010000003B00000004 | 000000010000003B00000004
77 1 | 0 | 000000010000003A00000038 | 000000010000003A00000038
78[gpadmin@gpdb6270 ~]$