备份 https://127.0.0.1:2379 上的 etcd 数据到 /var/lib/backup/etcd-snapshot.db,使用之前的文
件 /data/backup/etcd-snapshot-previous.db 还原 etcd,使用指定的 ca.crt 、 etcd-client.crt 、
etcd-client.key
# 安装etcdctl
https://github.com/etcd-io/etcd/releases
# 部署一个Pod
apiVersion: v1
kind: Pod
metadata:
name: counter
spec:
containers:
- name: count
image: busybox
args:
- /bin/sh
- -c
- >
i=0;
while true;
do
echo $(date)
sleep 2;
done
controlplane ~ ➜ vi count.yml
controlplane ~ ➜ kubectl apply -f count.yml
pod/counter created
controlplane ~ ➜ kubectl logs counter
Mon Jul 29 04:11:12 UTC 2024
Mon Jul 29 04:11:14 UTC 2024
Mon Jul 29 04:11:16 UTC 2024
#!/bin/bash
ETCD_VERSION=${ETCD_VERSION:-v3.3.1}
curl -L https://github.com/coreos/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -o etcd-$ETCD_VERSION-linux-amd64.tar.gz
tar xzvf etcd-$ETCD_VERSION-linux-amd64.tar.gz
rm etcd-$ETCD_VERSION-linux-amd64.tar.gz
cd etcd-$ETCD_VERSION-linux-amd64
sudo cp etcd /usr/local/bin/
sudo cp etcdctl /usr/local/bin/
rm -rf etcd-$ETCD_VERSION-linux-amd64
etcdctl --version
# 安装etcdctl
# 获取etcd信息
controlplane ~ ➜ kubectl get pod -n kube-system etcd-controlplane -o=jsonpath='{.spec.containers[0].command}'
["etcd","--advertise-client-urls=https://192.7.195.8:2379","--cert-file=/etc/kubernetes/pki/etcd/server.crt","--client-cert-auth=true","--data-dir=/var/lib/etcd","--experimental-initial-corrupt-check=true","--experimental-watch-progress-notify-interval=5s","--initial-advertise-peer-urls=https://192.7.195.8:2380","--initial-cluster=controlplane=https://192.7.195.8:2380","--key-file=/etc/kubernetes/pki/etcd/server.key","--listen-client-urls=https://127.0.0.1:2379,https://192.7.195.8:2379","--listen-metrics-urls=http://127.0.0.1:2381","--listen-peer-urls=https://192.7.195.8:2380","--name=controlplane","--peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt","--peer-client-cert-auth=true","--peer-key-file=/etc/kubernetes/pki/etcd/peer.key","--peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt","--snapshot-count=10000","--trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt"]
# 使用etcdctl备份 /data/backup/etcd-snapshot-previous.db
ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key \
snapshot save /data/backup/etcd-snapshot-previous.db
controlplane ~ ➜ ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key snapshot save /data/backup/etcd-snapshot-previous.db
Snapshot saved at /data/backup/etcd-snapshot-previous.db
# 删除当前pod
controlplane ~ ➜ kubectl delete pod counter
pod "counter" deleted
controlplane ~ ➜ kubectl get pod
No resources found in default namespace.
# 备份当前etcd数据
ETCDCTL_API=3 etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key snapshot save /var/lib/backup/etcd-snapshot.db
# 使用/data/backup/etcd-snapshot-previous.db还原
- 停止所有 API 服务器实例
- 为所有 etcd 实例恢复状态
- 重启所有 API 服务器实例
controlplane ~ ➜ mv /etc/kubernetes/manifests /etc/kubernetes/manifests.bak
controlplane ~ ➜ mv /var/lib/etcd /var/lib/etcd.bak
controlplane ~ ➜ kubectl get pod -n -A
The connection to the server controlplane:6443 was refused - did you specify the right host or port?
export ETCDCTL_API=3
etcdctl --data-dir=/var/lib/etcd snapshot restore /data/backup/etcd-snapshot-previous.db
# 重建
controlplane ~ ✖ export ETCDCTL_API=3
etcdctl --data-dir=/var/lib/etcd snapshot restore /data/backup/etcd-snapshot-previous.db
2024-07-29 05:22:53.805370 I | mvcc: restore compact to 4738
2024-07-29 05:22:53.810432 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32
# 发现已经删除的pod已经恢复但没有完全恢复
controlplane ~ ➜ kubectl get pod
NAME READY STATUS RESTARTS AGE
counter 1/1 Running 0 23m
controlplane ~ ➜ kubectl logs counter
Error from server (NotFound): the server could not find the requested resource ( pods/log counter)
# 可以删除重建此Pod