通过keepalived+nginx 实现k8s apiserver 节点高可用
两个控制节点,两个计算节点
1,在ubuntu0和ubuntu3安装nginx和keepalived
在ubuntu0和ubuntu3上安装keepalived和nginx,实现对apiserver负载均衡和反向代
理。ubuntu0是keepalived主节点,ubuntu3是keepalived备节点
root@ubuntu0:~# apt install nginx keepalived -y
四层负载均衡,为两台Master apiserver组件提供负载均衡
root@ubuntu0:/etc/nginx/conf.c# cat ceshi.conf
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 192.168.23.99:6443 weight=5 max_fails=3 fail_timeout=30s;
server 192.168.23.102:6443 weight=5 max_fails=3 fail_timeout=30s;
} #
server {
listen 16443; # 由于nginx与master节点复用,这个监听端口不能是6443,否则会冲突
proxy_pass k8s-apiserver;
}
}
max_fails=3 一台机器连续三次都没有返回都失败了,就会认为他有问题,fail_timeout=30s 每次请求等30s,连续3次都失败,就把这个apiserver就给下线,下一次就不访问这个机器了
传输到另一台机器上
root@ubuntu0:~# scp nginx-1.20.2/objs/nginx ubuntu3:/usr/sbin/
nginx 100% 6965KB 73.7MB/s 00:00
root@ubuntu0:~# scp -r /etc/nginx/ ubuntu3:/etc/
ceshi.conf 100% 397 609.7KB/s 00:00
fastcgi_params 100% 1007 197.5KB/s 00:00
fastcgi_params.default 100% 1007 1.6MB/s 00:00
koi-utf 100% 2837 4.6MB/s 00:00
fastcgi.conf 100% 1077 1.6MB/s 00:00
scgi_params.default 100% 636 1.3MB/s 00:00
nginx.conf.default 100% 2656 5.5MB/s 00:00
uwsgi_params.default 100% 664 1.1MB/s 00:00
nginx.conf 100% 2707 3.4MB/s 00:00
fastcgi.conf.default 100% 1077 1.5MB/s 00:00
mime.types 100% 5231 11.1MB/s 00:00
win-utf 100% 3610 4.2MB/s 00:00
scgi_params 100% 636 507.1KB/s 00:00
koi-win 100% 2223 2.4MB/s 00:00
mime.types.default 100% 5231 5.7MB/s 00:00
uwsgi_params 100% 664 932.6KB/s 00:00
修改keepalive配置文件,主备不一样,需要区分
主keepalived
root@ubuntu0:/etc/keepalived# cat keepalived.conf
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_MASTER
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state MASTER
interface ens33 # 修改为实际网卡名
virtual_router_id 51 # VRRP 路由 ID实例,每个实例是唯一的
priority 100 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
192.168.23.199/24 #和物理机在一个网段
}
track_script {
check_nginx
}
}
root@ubuntu0:/etc/keepalived# scp keepalived.conf ubuntu3:/etc/keepalived/
keepalived.conf 100% 875 550.6KB/s 00:00
备份服务器
root@ubuntu3:/etc/keepalived# cat keepalived.conf
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_MASTER
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state backup
interface ens33 # 修改为实际网卡名
virtual_router_id 51 # VRRP 路由 ID实例,每个实例是唯一的
priority 90 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
192.168.23.199/24 #和物理机在一个网段
}
track_script {
check_nginx
}
}
上传脚本
root@ubuntu0:/etc/keepalived# cat check_nginx.sh
#!/bin/bash
#1、判断Nginx是否存活
counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" )
if [ $counter -eq 0 ]; then
#2、如果不存活则尝试启动Nginx
#service nginx start
systemctl restart nginx
sleep 2
#3、等待2秒后再次获取一次Nginx状态
counter=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$" )
#4、再次进行判断,如Nginx还不存活则停止Keepalived,让地址进行漂移
if [ $counter -eq 0 ]; then
#service keepalived stop
systemctl stop keepalived
fi
fi
root@ubuntu0:/etc/keepalived# chmod +x check_nginx.sh
root@ubuntu0:/etc/keepalived# scp check_nginx.sh ubuntu3:/etc/keepalived/
check_nginx.sh 100% 570 396.1KB/s 00:00
启动nginx和keepalived
root@ubuntu0:/etc/keepalived# systemctl daemon-reload && systemctl start nginx
root@ubuntu0:/etc/keepalived# systemctl start keepalived && systemctl enable nginx keepalived
此时就有地址了
root@ubuntu0:/etc/keepalived# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:f7:c9:f9 brd ff:ff:ff:ff:ff:ff
altname enp2s1
inet 192.168.23.99/24 brd 192.168.23.255 scope global ens33
valid_lft forever preferred_lft forever
inet 192.168.23.199/24 scope global secondary ens33
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef7:c9f9/64 scope link
valid_lft forever preferred_lft forever
验证漂移是否生效
root@ubuntu0:/etc/keepalived# systemctl stop keepalived
已经生效了
root@ubuntu3:/etc/keepalived# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
link/ether 00:0c:29:81:60:b1 brd ff:ff:ff:ff:ff:ff
altname enp2s1
inet 192.168.23.102/24 brd 192.168.23.255 scope global ens33
valid_lft forever preferred_lft forever
inet 192.168.23.199/24 scope global secondary ens33
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe81:60b1/64 scope link
valid_lft forever preferred_lft forever
1。安装containerd
root@ubuntu0:/etc/containerd# apt-get install -y containerd
2.进入目录
mkdir -p /etc/containerd
cd /etc/containerd/
[root@xuegod63 ~]#containerd config default > /etc/containerd/config.toml
修改配置文件,打开/etc/containerd/config.toml
把SystemdCgroup = false 修改成SystemdCgroup = true
把sandbox_image = "k8s.gcr.io/pause:3.6"修改成
sandbox_image="registry.aliyuncs.com/google_containers/pause:3.8"
找到config_path = "",修改成如下目录:
config_path = "/etc/containerd/certs.d"
创建/etc/crictl.yaml 文件
[root@xuegod63 ~]#cat > /etc/crictl.yaml <<EOF
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF
[root@xuegod63 ~]#mkdir /etc/containerd/certs.d/docker.io/ -p
[root@xuegod63 ~]#vim /etc/containerd/certs.d/docker.io/hosts.toml
#写入如下内容:
[host."https://vh3bm52y.mirror.aliyuncs.com",host."https://registry.docker-cn.com"]
capabilities = ["pull","push"]
启动containerd、并设置开启自启动
systemctl enable containerd --now
在ubuntu3上安装containerd
root@ubuntu3:/etc/keepalived# apt-get install -y containerd
root@ubuntu3:~# mkdir -p /etc/containerd
把文件传到ubuntu3上
root@ubuntu0:~# scp /etc/containerd/config.toml root@192.168.23.102:/etc/containerd/
config.toml
root@ubuntu0:~# scp /etc/crictl.yaml root@192.168.23.102:/etc/
crictl.yaml 100% 140 195.8KB/s 00:00
root@ubuntu0:~# scp /etc/containerd/certs.d/docker.io/hosts.toml root@192.168.23.102:/etc/containerd/certs.d/docker.io/
hosts.toml
root@ubuntu3:~# systemctl enable containerd --now
全部机器安装containerd
基于kubeadm.yaml初始化k8s集群
root@ubuntu0:~# ctr -n=k8s.io images import oldboyedu-master-1.23.17.tar.gz
root@ubuntu3:~# ctr -n=k8s.io images import oldboyedu-master-1.23.17.tar.gz
root@ubuntu1:~# ctr -n=k8s.io images import oldboyedu-master-1.23.17.tar.gz
root@ubuntu2:~# ctr -n=k8s.io images import oldboyedu-master-1.23.17.tar.gz
生成配置文件
root@ubuntu0:~# cat kubeadm.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
#localAPIEndpoint:
# advertiseAddress: 192.168.23.199
# bindPort: 16443
nodeRegistration:
#criSocket: unix:///run/containerd/containerd.sock
criSocket: /run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
#name: node
taints: null
---
apiServer:
# certSANs:
# - "192.168.23.199" #VIP地址
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers #指定阿里云镜像仓库
#imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers #指定阿里云镜像仓库
kind: ClusterConfiguration
kubernetesVersion: 1.23.0
#新增加如下内容:
controlPlaneEndpoint: 192.168.23.199:16443
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16 #指定 pod 网段
scheduler: {}
#追加如下内容
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
如果有问题,可以彻底清除环境,重置
sudo kubeadm reset -f
sudo systemctl stop kubelet containerd
sudo rm -rf /etc/kubernetes /var/lib/etcd
# 2. 杀死占用 6443 端口的进程
sudo ss -tulnp | grep 6443
# 报此错误检查控制平面端点是否可达
Aug 12 21:52:08 ubuntu0 kubelet[556620]: E0812 21:52:08.799640 556620 controller.go:144] failed to ensure lease exists, will retry in 7s, error: Get "https://192.168.23.199:16443/apis/coordination.k8s.io/v1/namespaces/kube-node-lease/leases/ubuntu0?timeout=10s": dial tcp 192.168.23.199:16443: connect: connection refused
curl -vk https://192.168.23.199:16443
containerd 报此错误
/v2/pause/manifests/3.8\": context canceled" host=registry.k8s.io
"
.pkg.dev/v2/k8s-artifacts-prod/images/pause/manifests/3.8\": context canceled" host=registry.k8s.io
b6fe66a7fa376e7e8847c52841,Namespace:kube-system,Attempt:0,} failed, error" error="rpc error: code = Canceled desc = failed to get sandbox image \"registry.k8s.io/pause:3.8\": fai>
# 使用阿里云镜像源拉取 pause 镜像
sudo ctr -n k8s.io images pull registry.aliyuncs.com/google_containers/pause:3.8
# 给镜像打标签
sudo ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/pause:3.8 registry.k8s.io/pause:3.8
root@ubuntu0:~# kubeadm init --config=kubeadm.yaml --ignore-preflight-errors=SystemVerification
kubeadm join 192.168.23.199:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0864c8edc7e3f5f0c405a4800d3f0b84e4d5ea3340ede6c5f24f6ff4640d9b44 \
--control-plane #把一台机器作为控制节点用这个
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.23.199:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0864c8edc7e3f5f0c405a4800d3f0b84e4d5ea3340ede6c5f24f6ff4640d9b44 #工作节点用这个
root@ubuntu0:~# mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
root@ubuntu0:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu0 NotReady control-plane,master 8m11s v1.23.17
在余下的控制节点创建目录,把已经创建好的证书传到剩余的控制节点上
root@ubuntu0:~# cat 1.sh
ssh $1 " cd /root && mkdir -p /etc/kubernetes/pki/etcd &&mkdir -p ~/.kube/ "
scp /etc/kubernetes/pki/ca.crt $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/ca.key $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.key $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.pub $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.crt $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.key $1:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.crt $1:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/pki/etcd/ca.key $1:/etc/kubernetes/pki/etcd/
root@ubuntu0:~# bash 1.sh ubuntu3
ca.crt 100% 1099 1.9MB/s 00:00
ca.key 100% 1679 2.3MB/s 00:00
sa.key 100% 1675 2.9MB/s 00:00
sa.pub 100% 451 585.6KB/s 00:00
front-proxy-ca.crt 100% 1115 1.6MB/s 00:00
front-proxy-ca.key 100% 1675 2.3MB/s 00:00
ca.crt 100% 1086 1.0MB/s 00:00
ca.key 100% 1679 2.0MB/s 00:00
加入控制节点
root@ubuntu3:~# kubeadm join 192.168.23.199:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0864c8edc7e3f5f0c405a4800d3f0b84e4d5ea3340ede6c5f24f6ff4640d9b44 \
--control-plane
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
root@ubuntu3:~#
root@ubuntu3:~#
root@ubuntu3:~#
root@ubuntu3:~#
root@ubuntu3:~# mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
root@ubuntu3:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu0 NotReady control-plane,master 10h v1.23.17
ubuntu3 NotReady control-plane,master 6m19s v1.23.17
加入工作节点
root@ubuntu1:~# kubeadm join 192.168.23.199:16443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:0864c8edc7e3f5f0c405a4800d3f0b84e4d5ea3340ede6c5f24f6ff4640d9b44
root@ubuntu2:~# kubeadm join 192.168.23.199:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0864c8edc7e3f5f0c405a4800d3f0b84e4d5ea3340ede6c5f24f6ff4640d9b44
[preflight] Running pre-flight checks
root@ubuntu3:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu0 NotReady control-plane,master 11h v1.23.17
ubuntu1 Ready <none> 84s v1.23.17
ubuntu2 Ready <none> 18s v1.23.17
ubuntu3 NotReady control-plane,master 13m v1.23.17
打标签
root@ubuntu3:~# kubectl label nodes ubuntu1 node-role.kubernetes.io/work=work
node/ubuntu1 labeled
root@ubuntu3:~# kubectl label nodes ubuntu2 node-role.kubernetes.io/work=work
node/ubuntu2 labeled
root@ubuntu3:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu0 NotReady control-plane,master 11h v1.23.17
ubuntu1 Ready work 3m1s v1.23.17
ubuntu2 Ready work 115s v1.23.17
ubuntu3 NotReady control-plane,master 15m v1.23.17
安装calico网络插件
1.安装ansible方便管理
root@ubuntu0:~# apt install ansible -y
root@ubuntu0:~# mkdir -p /etc/ansible
root@ubuntu0:~# touch /etc/ansible/ansible.cfg
root@ubuntu0:~# vim /etc/ansible/ansible.cfg
root@ubuntu0:~# cat /etc/ansible/ansible.cfg
[defaults]
# 基础设置
inventory = /etc/ansible/hosts
remote_user = root
host_key_checking = False
# 日志记录
log_path = /var/log/ansible.log
# 权限设置
become = True
become_method = sudo
become_user = root
become_ask_pass = False
[privilege_escalation]
become = True
become_method = sudo
become_user = root
become_ask_pass = False
root@ubuntu0:~# touch /etc/ansible/hosts
root@ubuntu0:~# vim /etc/ansible/hosts
root@ubuntu0:~# cat /etc/ansible/hosts
[web]
ubuntu1
ubuntu2
ubuntu3
root@ubuntu0:~# ansible all -m ping
ubuntu2 | SUCCESS => {
"ansible_facts": {
"discovered_interpreter_python": "/usr/bin/python3"
},
"changed": false,
"ping": "pong"
}
ubuntu1 | SUCCESS => {
"ansible_facts": {
"discovered_interpreter_python": "/usr/bin/python3"
},
"changed": false,
"ping": "pong"
}
ubuntu3 | SUCCESS => {
"ansible_facts": {
"discovered_interpreter_python": "/usr/bin/python3"
},
"changed": false,
"ping": "pong"
}
把calico镜像传到其他的机器上
root@ubuntu0:~# ansible all -m copy -a 'src=calico.tar.gz dest=/root'
root@ubuntu0:~# ansible all -m shell -a " ctr -n=k8s.io images import calico.tar.gz"
root@ubuntu0:~# ctr -n=k8s.io images import calico.tar.gz
wget https://raw.githubusercontent.com/projectcalico/calico/v3.25.1/manifests/calico.yaml
- name: CLUSTER_TYPE
value: "k8s,bgp" #添加以下内容
- name: IP_AUTODETECTION_METHOD
value: "interface=ens33"
root@ubuntu0:~# kubectl apply -f calico.yaml
出现问题可以重启
kubectl rollout restart daemonset -n kube-system calico-node
root@ubuntu0:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu0 Ready control-plane,master 11h v1.23.17
ubuntu1 Ready work 24m v1.23.17
ubuntu2 Ready work 23m v1.23.17
ubuntu3 Ready control-plane,master 37m v1.23.17
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-646b6595d5-p4ghh 1/1 Running 0 93m
kube-system calico-node-54vzw 1/1 Running 0 76m
kube-system calico-node-c6qkd 1/1 Running 0 76m
kube-system calico-node-d89hz 1/1 Running 0 76m
kube-system calico-node-mvxhs 1/1 Running 0 76m
kube-system coredns-6d8c4cb4d-44b9m 1/1 Running 12 (83m ago) 36h
kube-system coredns-6d8c4cb4d-snkxf 1/1 Running 12 (83m ago) 36h
kube-system etcd-ubuntu0 1/1 Running 5 (16h ago) 36h
kube-system etcd-ubuntu3 1/1 Running 1 (16h ago) 25h
kube-system kube-apiserver-ubuntu0 1/1 Running 4 (16h ago) 36h
测试k8s集群的DNS解析和网络是否正常
root@ubuntu0:~# kubectl run busybox --image=busybox:1.28 --restart=Never -it -- sh
If you don't see a command prompt, try pressing enter.
/ #
/ #
/ #
/ # ping www.baidu.com
PING www.baidu.com (182.61.200.108): 56 data bytes
64 bytes from 182.61.200.108: seq=1 ttl=127 time=30.889 ms
^C
--- www.baidu.com ping statistics ---
2 packets transmitted, 1 packets received, 50% packet loss
round-trip min/avg/max = 30.889/30.889/30.889 ms
通过上面可以看到能访问网络,说明calico网络插件已经被正常安装了
/ # nslookup kubernetes.default.svc.cluster.local
Server: 10.96.0.10
Address 1: 10.96.0.10 kube-dns.kube-system.svc.cluster.local
Name: kubernetes.default.svc.cluster.local
Address 1: 10.96.0.1 kubernetes.default.svc.cluster.local
看到上面内容,说明k8s的coredns服务正常
/ # exit #退出 pod
10.96.0.10 就是我们coreDNS的clusterIP,说明coreDNS配置好了。
解析内部Service的名称,是通过coreDNS去解析的。
etcd 配置成高可用状态
每个机器只写了一台作为本机的机器 为单点
root@ubuntu0:~# cd /etc/kubernetes/manifests/
root@ubuntu0:/etc/kubernetes/manifests# ls
etcd.yaml kube-apiserver.yaml kube-controller-manager.yaml kube-scheduler.yaml
root@ubuntu0:/etc/kubernetes/manifests# vim etcd.yaml
root@ubuntu0:/etc/kubernetes/manifests# sed -n '19,27p' etcd.yaml
- --data-dir=/var/lib/etcd
- --experimental-initial-corrupt-check=true
- --initial-advertise-peer-urls=https://192.168.23.99:2380
- --initial-cluster=ubuntu0=https://192.168.23.99:2380
- --key-file=/etc/kubernetes/pki/etcd/server.key
- --listen-client-urls=https://127.0.0.1:2379,https://192.168.23.99:2379
- --listen-metrics-urls=http://127.0.0.1:2381
- --listen-peer-urls=https://192.168.23.99:2380
- --name=ubuntu0
root@ubuntu0:/etc/kub
两台主节点全部都更该为以下的,做高可用
root@ubuntu0:/etc/kubernetes/manifests# grep '\-\-initial\-cluster' etcd.yaml
# - --initial-cluster=ubuntu0=https://192.168.23.99:2380
- --initial-cluster=ubuntu3=https://192.168.23.102:2380,ubuntu0=https://192.168.23.99:2380
重启kubelet生效
root@ubuntu0:/etc/kubernetes/manifests# systemctl restart kubelet.service
root@ubuntu0:/etc/kubernetes/manifests# ansible all -m shell -a " systemctl restart kubelet.service"
ubuntu2 | CHANGED | rc=0 >>
ubuntu1 | CHANGED | rc=0 >>
ubuntu3 | CHANGED | rc=0 >>
验证,状态都是started
root@ubuntu0:~# etcdctl --endpoints=https://192.168.23.99:2379,https://192.168.23.102:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key member list
b7b60034dbe61848, started, ubuntu3, https://192.168.23.102:2380, https://192.168.23.102:2379, false
cd434f622ffc2333, started, ubuntu0, https://192.168.23.99:2380, https://192.168.23.99:2379, false
显示如下,说明etcd集群配置成功:
root@ubuntu0:~# etcdctl --endpoints=https://192.168.23.99:2379,https://192.168.23.102:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key endpoint health --cluster
https://192.168.23.99:2379 is healthy: successfully committed proposal: took = 5.673582ms
https://192.168.23.102:2379 is healthy: successfully committed proposal: took = 5.81056ms
查看那个为主 true为主
root@ubuntu0:~# etcdctl --endpoints=https://192.168.23.99:2379,https://192.168.23.102:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key endpoint status --cluster
https://192.168.23.102:2379, b7b60034dbe61848, 3.5.6, 5.4 MB, true, false, 7, 115848, 115848,
https://192.168.23.99:2379, cd434f622ffc2333, 3.5.6, 5.4 MB, false, false, 7, 115848, 115848,
测试把主的机器关机 ,在进行测试
root@ubuntu0:~# kubectl config view
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: DATA+OMITTED
server: https://192.168.23.199:16443
name: kubernetes
contexts:
- context:
cluster: kubernetes
user: kubernetes-admin
name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
user:
client-certificate-data: REDACTED
client-key-data: REDACTED