5.2 Node上部署一个filebeat采集器采集k8s组件日志
- es和kibana部署好了之后,我们如何采集pod日志呢,我们采用方案一的方式,是要在每一个node上中部署一个filebeat的采集器,采用的是7.9.3版本,除此之外我已经按照文中4小节里面的问题2中对docker或者containerd的runtime进行了标准的日志落盘
[root@k8s-master fek]# cat filebeat.yaml --- apiVersion: v1 kind: ConfigMap metadata: name: filebeat-config namespace: kube-system labels: k8s-app: filebeat data: filebeat.yml: |- filebeat.inputs: - type: container paths: - /var/log/containers/*.log #这里是filebeat采集挂载到pod中的日志目录 processors: - add_kubernetes_metadata: #添加k8s的字段用于后续的数据清洗 host: ${NODE_NAME} matchers: - logs_path: logs_path: "/var/log/containers/" #output.kafka: #如果日志量较大,es中的日志有延迟,可以选择在filebeat和logstash中间加入kafka # hosts: ["kafka-log-01:9092", "kafka-log-02:9092", "kafka-log-03:9092"] # topic: 'topic-test-log' # version: 2.0.0 output.logstash: #因为还需要部署logstash进行数据的清洗,因此filebeat是把数据推到logstash中 hosts: ["logstash:5044"] enabled: true --- # Source: filebeat/templates/filebeat-service-account.yaml apiVersion: v1 kind: ServiceAccount metadata: name: filebeat namespace: kube-system labels: k8s-app: filebeat --- # Source: filebeat/templates/filebeat-role.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: filebeat labels: k8s-app: filebeat rules: - apiGroups: [""] # "" indicates the core API group resources: - namespaces - pods verbs: - get - watch - list --- # Source: filebeat/templates/filebeat-role-binding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: filebeat subjects: - kind: ServiceAccount name: filebeat namespace: kube-system roleRef: kind: ClusterRole name: filebeat apiGroup: rbac.authorization.k8s.io --- # Source: filebeat/templates/filebeat-daemonset.yaml apiVersion: apps/v1 kind: DaemonSet metadata: name: filebeat namespace: kube-system labels: k8s-app: filebeat spec: selector: matchLabels: k8s-app: filebeat template: metadata: labels: k8s-app: filebeat spec: serviceAccountName: filebeat terminationGracePeriodSeconds: 30 containers: - name: filebeat image: docker.io/kubeimages/filebeat:7.9.3 #该镜像支持arm64和amd64两种架构 args: [ "-c", "/etc/filebeat.yml", "-e","-httpprof","0.0.0.0:6060" ] #ports: # - containerPort: 6060 # hostPort: 6068 env: - name: NODE_NAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: ELASTICSEARCH_HOST value: elasticsearch-logging - name: ELASTICSEARCH_PORT value: "9200" securityContext: runAsUser: 0 # If using Red Hat OpenShift uncomment this: #privileged: true resources: limits: memory: 1000Mi cpu: 1000m requests: memory: 100Mi cpu: 100m volumeMounts: - name: config #挂载的是filebeat的配置文件 mountPath: /etc/filebeat.yml readOnly: true subPath: filebeat.yml - name: data #持久化filebeat数据到宿主机上 mountPath: /usr/share/filebeat/data - name: varlibdockercontainers #这里主要是把宿主机上的源日志目录挂载到filebeat容器中,如果没有修改docker或者containerd的runtime进行了标准的日志落盘路径,可以把mountPath改为/var/lib mountPath: /data/var/ readOnly: true - name: varlog #这里主要是把宿主机上/var/log/pods和/var/log/containers的软链接挂载到filebeat容器中 mountPath: /var/log/ readOnly: true - name: timezone mountPath: /etc/localtime volumes: - name: config configMap: defaultMode: 0600 name: filebeat-config - name: varlibdockercontainers hostPath: #如果没有修改docker或者containerd的runtime进行了标准的日志落盘路径,可以把path改为/var/lib path: /data/var/ - name: varlog hostPath: path: /var/log/ # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart - name: inputs configMap: defaultMode: 0600 name: filebeat-inputs - name: data hostPath: path: /data/filebeat-data type: DirectoryOrCreate - name: timezone hostPath: path: /etc/localtime tolerations: #加入容忍能够调度到每一个节点 - effect: NoExecute key: dedicated operator: Equal value: gpu - effect: NoSchedule operator: Exists
- 部署之后,检查是否成功创建,能看到两个命名为filebeat-xx的pod副本分别创建在两个nodes上
[root@k8s-master elk]# kubectl apply -f filebeat.yaml [root@k8s-master elk]# kubectl get pod -n kube-system NAME READY STATUS RESTARTS AGE coredns-5bd5f9dbd9-8zdn5 1/1 Running 0 10h elasticsearch-0 1/1 Running 1 13h filebeat-2q5tz 1/1 Running 0 13h filebeat-k6m27 1/1 Running 2 13h kibana-b7d98644-tllmm 1/1 Running 0 10h
5.3 增加logstash来对采集到的原始日志进行业务需要的清洗
- 这里主要是结合业务需要和对日志的二次利用,grafana展示,所以加入了logstash进行日志的清洗,需要是对ingrss的字段类型进行了转换,业务服务日志进行了字段的变更和类型转换,大家可以根据自己的业务需求进行调整
[root@k8s-master fek]# cat logstash.yaml --- apiVersion: v1 kind: Service metadata: name: logstash namespace: kube-system spec: ports: - port: 5044 targetPort: beats selector: type: logstash clusterIP: None --- apiVersion: apps/v1 kind: Deployment metadata: name: logstash namespace: kube-system spec: selector: matchLabels: type: logstash template: metadata: labels: type: logstash srv: srv-logstash spec: containers: - image: docker.io/kubeimages/logstash:7.9.3 #该镜像支持arm64和amd64两种架构 name: logstash ports: - containerPort: 5044 name: beats command: - logstash - '-f' - '/etc/logstash_c/logstash.conf' env: - name: "XPACK_MONITORING_ELASTICSEARCH_HOSTS" value: "http://elasticsearch-logging:9200" volumeMounts: - name: config-volume mountPath: /etc/logstash_c/ - name: config-yml-volume mountPath: /usr/share/logstash/config/ - name: timezone mountPath: /etc/localtime resources: #logstash一定要加上资源限制,避免对其他业务造成资源抢占影响 limits: cpu: 1000m memory: 2048Mi requests: cpu: 512m memory: 512Mi volumes: - name: config-volume configMap: name: logstash-conf items: - key: logstash.conf path: logstash.conf - name: timezone hostPath: path: /etc/localtime - name: config-yml-volume configMap: name: logstash-yml items: - key: logstash.yml path: logstash.yml --- apiVersion: v1 kind: ConfigMap metadata: name: logstash-conf namespace: kube-system labels: type: logstash data: logstash.conf: |- input { beats { port => 5044 } } filter{ # 处理ingress日志 if [kubernetes][container][name] == "nginx-ingress-controller" { json { source => "message" target => "ingress_log" } if [ingress_log][requesttime] { mutate { convert => ["[ingress_log][requesttime]", "float"] } } if [ingress_log][upstremtime] { mutate { convert => ["[ingress_log][upstremtime]", "float"] } } if [ingress_log][status] { mutate { convert => ["[ingress_log][status]", "float"] } } if [ingress_log][httphost] and [ingress_log][uri] { mutate { add_field => {"[ingress_log][entry]" => "%{[ingress_log][httphost]}%{[ingress_log][uri]}"} } mutate{ split => ["[ingress_log][entry]","/"] } if [ingress_log][entry][1] { mutate{ add_field => {"[ingress_log][entrypoint]" => "%{[ingress_log][entry][0]}/%{[ingress_log][entry][1]}"} remove_field => "[ingress_log][entry]" } } else{ mutate{ add_field => {"[ingress_log][entrypoint]" => "%{[ingress_log][entry][0]}/"} remove_field => "[ingress_log][entry]" } } } } # 处理以srv进行开头的业务服务日志 if [kubernetes][container][name] =~ /^srv*/ { json { source => "message" target => "tmp" } if [kubernetes][namespace] == "kube-system" { drop{} } if [tmp][level] { mutate{ add_field => {"[applog][level]" => "%{[tmp][level]}"} } if [applog][level] == "debug"{ drop{} } } if [tmp][msg]{ mutate{ add_field => {"[applog][msg]" => "%{[tmp][msg]}"} } } if [tmp][func]{ mutate{ add_field => {"[applog][func]" => "%{[tmp][func]}"} } } if [tmp][cost]{ if "ms" in [tmp][cost]{ mutate{ split => ["[tmp][cost]","m"] add_field => {"[applog][cost]" => "%{[tmp][cost][0]}"} convert => ["[applog][cost]", "float"] } } else{ mutate{ add_field => {"[applog][cost]" => "%{[tmp][cost]}"} } } } if [tmp][method]{ mutate{ add_field => {"[applog][method]" => "%{[tmp][method]}"} } } if [tmp][request_url]{ mutate{ add_field => {"[applog][request_url]" => "%{[tmp][request_url]}"} } } if [tmp][meta._id]{ mutate{ add_field => {"[applog][traceId]" => "%{[tmp][meta._id]}"} } } if [tmp][project] { mutate{ add_field => {"[applog][project]" => "%{[tmp][project]}"} } } if [tmp][time] { mutate{ add_field => {"[applog][time]" => "%{[tmp][time]}"} } } if [tmp][status] { mutate{ add_field => {"[applog][status]" => "%{[tmp][status]}"} convert => ["[applog][status]", "float"] } } } mutate{ rename => ["kubernetes", "k8s"] remove_field => "beat" remove_field => "tmp" remove_field => "[k8s][labels][app]" } } output{ elasticsearch { hosts => ["http://elasticsearch-logging:9200"] codec => json index => "logstash-%{+YYYY.MM.dd}" #索引名称以logstash+日志进行每日新建 } } --- apiVersion: v1 kind: ConfigMap metadata: name: logstash-yml namespace: kube-system labels: type: logstash data: logstash.yml: |- http.host: "0.0.0.0" xpack.monitoring.elasticsearch.hosts: http://elasticsearch-logging:9200
5.4 在kibana的web界面进行配置日志可视化
- 首先登录kibana界面之后,打开菜单中的stack management模块
- 点开索引管理,可以发现,已经有采集到的日志索引了
- 为避免es日志占用磁盘空间越来越大,因此我们可以根据业务需要增加一个索引生命周期策略,点击index lifecycle policites
- Policy name写为logstash-history-ilm-policy,不能随意更改,后续的模版中会引用
- 为了能够在kibana中能够discover查看日志,因此需要设置一个索引匹配,选择index patterns,然后创建
- 由于我们是部署的单节点,因此创建的索引使用默认的索引模版会产生一个1副本,所以会发现索引都是yellow,解决办法如下
在菜单中打开,dev tools
然后调用api进行更改,会把所有的索引副本数全部改为0
PUT _all/_settings { "number_of_replicas": 0 }
- 为了根本解决和链接索引生命周期策略,标准化日志字段中的map类型,因此我们需要修改默认的template
PUT _template/logstash { "order": 1, "index_patterns": [ "logstash-*" ], "settings": { "index": { "lifecycle" : { "name" : "logstash-history-ilm-policy" }, "number_of_shards": "2", "refresh_interval": "5s", "number_of_replicas" : "0" } }, "mappings": { "properties": { "@timestamp": { "type": "date" }, "applog": { "dynamic": true, "properties": { "cost": { "type": "float" }, "func": { "type": "keyword" }, "method": { "type": "keyword" } } }, "k8s": { "dynamic": true, "properties": { "namespace": { "type": "keyword" }, "container": { "dynamic": true, "properties": { "name": { "type": "keyword" } } }, "labels": { "dynamic": true, "properties": { "srv": { "type": "keyword" } } } } }, "geoip": { "dynamic": true, "properties": { "ip": { "type": "ip" }, "latitude": { "type": "float" }, "location": { "type": "geo_point" }, "longitude": { "type": "float" } } } } }, "aliases": {} }
- 最后验证索引和discover
写在最后
日志采集只是业务可观测性中的一部分,并且对于日志不光有Elastic Stack,也有Loki、Splunk或者托管云上的日志收集方案等,条条大路通罗马,不管怎么做,最终到达效果即可,没有哪个方案绝对的好,只能是在什么业务场景最适合,最能反应出业务的问题,快速排查到业务上问题才是好的