Kubernetes Dashboard 从v2.0.0-beta1版本开始,集成了一个 metrics-scraper 的组件,可以通过 Kubernetes 的 Metrics API 收集一些基础资源的监控信息,并在web页面展示。
这次就是想用 metrics-scraper
1 使用 openssl 签发证书
mkdir certs
openssl req -nodes -newkey rsa:2048 -keyout certs/dashboard.key -out certs/dashboard.csr -subj "/C=/ST=/L=/O=/OU=/CN=kubernetes-dashboard"
openssl x509 -req -sha256 -days 10000 -in certs/dashboard.csr -signkey certs/dashboard.key -out certs/dashboard.crt
2 安装 Dashboard
# 1 创建 namespace
kubectl create namespace kubernetes-dashboard
# 2 导入证书
kubectl create secret generic kubernetes-dashboard-certs --from-file=certs -n kubernetes-dashboard
# 3 下载部署配置文件 如果下不下来,就自己fq 复制下来
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta3/aio/deploy/recommended.yaml
# 4 注释 recommended.yaml 中的 namespace
apiVersion: v1
kind: Namespace
metadata:
name: kubernetes-dashboard
# 5 注释 recommended.yaml 中的 Secret 这几行全部注释
apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque
# 6 再 args 行 增加 一个 token-ttl 可以延长 token 登陆的过期时间
args:
- --token-ttl=604800
# 7 修改 NodePort 这一部分 替换成 以下 代码
---
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
type: NodePort
ports:
- port: 443
targetPort: 8443
nodePort: 30000
selector:
k8s-app: kubernetes-dashboard
---
# 8 创建
kubectl create -f recommended.yaml
3 创建 serviceAcount 和 token
# 新建 token-user.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kube-system
# 创建用户
kubectl create -f token-user.yaml
# 查询 token
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}')
# 使用 展示 的 token 字符串 登陆 dashboard
4 安装 metrics-server
metrics-server 是 Kubernetes 官方的集群资源利用率信息收集器,在 k8 1.8 版本 + Heaper 就不再支持了,所以安装 metrics-server
wget https://github.com/kubernetes-incubator/metrics-server/archive/v0.3.3.tar.gz
tar zxvf v0.3.3.tar.gz
# 先等等,这里你直接 安装 可能会有问题 ,我们看下 metrics-server-0.3.3/deploy/1.8+/ 下的
# metrics-server-deployment.yaml 这个文件 替换成如下的样子
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-server
namespace: kube-system
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: metrics-server
namespace: kube-system
labels:
k8s-app: metrics-server
spec:
selector:
matchLabels:
k8s-app: metrics-server
template:
metadata:
name: metrics-server
labels:
k8s-app: metrics-server
spec:
serviceAccountName: metrics-server
volumes:
# mount in tmp so we can safely use from-scratch images and/or read-only containers
- name: tmp-dir
emptyDir: {}
containers:
- name: metrics-server # 从这里开始替换
# image: k8s.gcr.io/metrics-server-amd64:v0.3.3
image: registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.3.3
args:
- --kubelet-insecure-tls
- --kubelet-preferred-address-types=InternalIP
imagePullPolicy: Always
volumeMounts:
- name: tmp-dir
mountPath: /tmp
执行安装
# 安装
kubectl apply -f metrics-server-0.3.3/deploy/1.8+/
# 查看 metrics server pods
[root@k8s-master certs]# kubectl -n kube-system get pods -l k8s-app=metrics-server
NAME READY STATUS RESTARTS AGE
metrics-server-7745fd5ddd-dn7p6 1/1 Running 0 5h
[root@k8s-master certs]# kubectl get svc -n kube-system metrics-server
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
metrics-server ClusterIP 10.96.218.78 <none> 443/TCP 5h
如果你这里 查看 pods 像下面这样:
[root@k8s-master metrics-server-0.3.3]# kubectl -n kube-system get pods -l k8s-app=metrics-server
NAME READY STATUS RESTARTS AGE
metrics-server-678cc5687-l2tjn 0/1 ImagePullBackOff 0 5m
# 可以 查看详细/输出/日志信息
[root@k8s-master metrics-server-0.3.3]# kubectl logs -f metrics-server-7745fd5ddd-dn7p6 -n kube-system
Error from server (BadRequest): container "metrics-server" in pod "metrics-server-678cc5687-l2tjn" is waiting to start: trying and failing to pull image
...
unable to fully scrape metrics from source kubelet_summary:k8s-node1: unable to fetch metrics from Kubelet k8s-node1 (k8s-node1): Get https://k8s-node1:10250/stats/summary/: dial tcp: lookup k8s-node1 on 10.96.0.10:53: no such host]
...
# 这种是 DNS 解析 的异常带来的问题,我们之前配置过的 /etc/hosts
172.16.0.175 k8s-master
172.16.0.100 k8s-node1
172.16.0.147 k8s-node2
# 无法被 metrics-server 所找到 ,这里我们可以添加一个--kubelet-insecure-tls参数跳过证书校验: metrics-server-deployment.yaml 这个文件
- name: metrics-server
# image: k8s.gcr.io/metrics-server-amd64:v0.3.3
image: registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.3.3
args:
- --kubelet-insecure-tls
- --kubelet-preferred-address-types=InternalIP
# imagePullPolicy: Always
imagePullPolicy: Always
重新安装 ,查看日志 ,完成
5 kubectl top nodes 查看效果
[root@k8s-master certs]# kubectl top nodes
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
ecs-ca42 117m 2% 2968Mi 38%
k8s-node1 665m 16% 4231Mi 54%
k8s-node2 691m 17% 4149Mi 53%
k8s-node3 654m 16% 5227Mi 67%
k8s-node4 641m 16% 4458Mi 57%
6 打开 dashboard 查看
讨论区

部署 Dashboard 2.0 + Metrics 监控 Pods node 负载 ...