1、下载配置文件
github下载prometheus配置文件,此处选择版本0.8.0版本
2、安装
kubectl create -f manifests/setup
## 感觉没啥用
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
#此处需要注意安装kube-state-metrics-deployment.yaml里面的kube-state-metrics:v2.0.0镜像会拉取失败,因为镜像是google仓库的,此处我拉了官方代码本地打包了镜像发布到私服仓库harbor.songcw.com,此处镜像需要替换为 harbor.songcw.com/kube/kube-state-metrics:v2.0.0
kubectl create -f manifests/
3、ingress映射
创建ingress映射文件
cat > /etc/kubernetes/ingress/monitoring-ingress.yaml << EOF
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: nginx-http
namespace: monitoring
spec:
rules:
- host: alertmanager.k8s.songcw.com
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: alertmanager-main
port:
number: 9093
- host: grafana.k8s.songcw.com
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: grafana
port:
number: 3000
- host: prometheus.k8s.songcw.com
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: prometheus-k8s
port:
number: 9090
EOF
### 执行
kubectl create -f monitoring-ingress.yaml
此处的域名需要在本地host添加映射,k8s的master的ip地址:192.168.200.223。
4、安装kube-contoller-manager、kube-scheduler监控
修改k8s的master节点的/etc/kubernetes/manifests/目录下kube-controller-manager.yaml、kube-scheduler.yaml将 - --bind-address=127.0.0.1 修改为 - --bind-address=0.0.0.0,修改完成后服务会自动重启,等待重启验证通过。
在manifests目录下(这一步一点要仔细看下新版的matchLabels发生了改变)
grep -A2 -B2 selector kubernetes-serviceMonitor*
然后在monitoring目录下创建:
cat <<EOF > kube-controller-manager-scheduler.yml
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager
labels:
app.kubernetes.io/name: kube-controller-manager
spec:
selector:
component: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: https-metrics
port: 10257
targetPort: 10257
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-scheduler
labels:
app.kubernetes.io/name: kube-scheduler
spec:
selector:
component: kube-scheduler
type: ClusterIP
clusterIP: None
ports:
- name: https-metrics
port: 10259
targetPort: 10259
protocol: TCP
EOF
##安装
kubectl apply -f kube-controller-manager-scheduler.yaml
## 然后使用kubectl get svc -n kube-system能看到kube-controller-manager、kube-scheduler会重启
## 登录prometheus查看targets是否能获取到这两项指标
cat <<EOF > kube-endpoint-controller-manager.yml
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: kube-system
subsets:
- addresses:
- ip: 192.168.200.223
ports:
- name: https-metrics
port: 10257
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: kube-system
subsets:
- addresses:
- ip: 192.168.200.223
ports:
- name: https-metrics
port: 10259
protocol: TCP
EOF
- 执行 kubectl apply -f kube-endpoint-controller-manager.yml
5、etcd监控
## 颁发etcd客户端访问证书
kubectl -n monitoring create secret generic etcd-certs --from-file=/etc/kubernetes/pki/etcd/healthcheck-client.crt --from-file=/etc/kubernetes/pki/etcd/healthcheck-client.key --from-file=/etc/kubernetes/pki/etcd/ca.crt
## 在prometheus中添加etcd证书
kubectl edit prometheus k8s -n monitoring
**
验证证书是否正常挂载,出现以下情况说明正常挂载
kubectl exec -it prometheus-k8s-0 /bin/sh -n monitoring
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
Defaulting container name to prometheus.
Use 'kubectl describe pod/prometheus-k8s-0 -n monitoring' to see all of the containers in this pod.
/prometheus $ ls /etc/prometheus/secrets/etcd-certs/
ca.crt healthcheck-client.crt healthcheck-client.key
暴露etcd监控端点
cat <<EOF > kube-endpoint-etcd.yml
apiVersion: v1
kind: Service
metadata:
name: etcd-k8s
namespace: kube-system
labels:
k8s-app: etcd
spec:
type: ClusterIP
clusterIP: None
ports:
- name: etcd
port: 2379
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: etcd
name: etcd-k8s
namespace: kube-system
subsets:
- addresses:
- ip: 192.168.200.223
ports:
- name: etcd
port: 2379
protocol: TCP
---
EOF
##执行
kubectl apply -f kube-endpoint-etcd.yml
创建serviceMonitor
cat <<EOF > prometheus-service-monitor-etcd.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: etcd-k8s
namespace: monitoring
labels:
k8s-app: etcd
spec:
jobLabel: k8s-app
endpoints:
- port: etcd
interval: 30s
scheme: https
tlsConfig:
caFile: /etc/kubernetes/pki/etcd/ca.crt
certFile: /etc/kubernetes/pki/etcd/healthcheck-client.crt
keyFile: /etc/kubernetes/pki/etcd/healthcheck-client.key
insecureSkipVerify: true
selector:
matchLabels:
k8s-app: etcd
namespaceSelector:
matchNames:
- kube-system
EOF
执行 kubectl apply -f prometheus-service-monitor-etcd.yaml创建服务监控
6、集成钉钉报警通知(此处存在部分问题)
cat <<EOF > ding-talk-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: dingtalk-config
namespace: monitoring
data:
config.yml: |-
targets:
webhook:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxx #修改为钉钉机器人的webhook
mention:
all: true
EOF
卸载
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup