From 41cdfd691de66804ecd1d689d5a15b955a05f9cd Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 28 Jul 2017 08:07:07 +0200 Subject: [PATCH 01/26] Exposes /metrics endpoints for Prometheus scraping This reverts commit 22a314ac161d3d203881eaf4b1a44ea8bf028a27. --- 50kafka.yml | 15 +++++++++++++++ zookeeper/50pzoo.yml | 23 +++++++++++++++++++++++ zookeeper/51zoo.yml | 23 +++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/50kafka.yml b/50kafka.yml index 73a64850..17d9dc2f 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -11,6 +11,8 @@ spec: labels: app: kafka annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" spec: terminationGracePeriodSeconds: 30 initContainers: @@ -26,11 +28,24 @@ spec: - name: config mountPath: /etc/kafka containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + cpu: 10m + memory: 100Mi - name: broker image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: - name: KAFKA_LOG4J_OPTS value: -Dlog4j.configuration=file:/etc/kafka/log4j.properties + - name: JMX_PORT + value: "5555" ports: - containerPort: 9092 command: diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 446748a7..77054ced 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -12,6 +12,8 @@ spec: app: zookeeper storage: persistent annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" spec: terminationGracePeriodSeconds: 10 initContainers: @@ -24,11 +26,32 @@ spec: - name: data mountPath: /var/lib/zookeeper/data containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + command: + - java + - -Xms39M + - -Xmx99M + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + cpu: 10m + memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: - name: KAFKA_LOG4J_OPTS value: -Dlog4j.configuration=file:/etc/kafka/log4j.properties + - name: JMX_PORT + value: "5555" command: - ./bin/zookeeper-server-start.sh - /etc/kafka/zookeeper.properties diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index a2922ef2..7e25598e 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -12,6 +12,8 @@ spec: app: zookeeper storage: ephemeral annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" spec: terminationGracePeriodSeconds: 10 initContainers: @@ -27,11 +29,32 @@ spec: - name: data mountPath: /var/lib/zookeeper/data containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + command: + - java + - -Xms39M + - -Xmx99M + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + cpu: 10m + memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: - name: KAFKA_LOG4J_OPTS value: -Dlog4j.configuration=file:/etc/kafka/log4j.properties + - name: JMX_PORT + value: "5555" command: - ./bin/zookeeper-server-start.sh - /etc/kafka/zookeeper.properties From ffb89dd2ca4bd9a3319da5c8c581f26a5366d2f7 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 06:07:46 +0200 Subject: [PATCH 02/26] Adds pod that can be used to estimate resource limits for jmx containers in kafka and zoo pods --- test/jmx-selftest.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 test/jmx-selftest.yml diff --git a/test/jmx-selftest.yml b/test/jmx-selftest.yml new file mode 100644 index 00000000..9b2a0507 --- /dev/null +++ b/test/jmx-selftest.yml @@ -0,0 +1,41 @@ +# Sets up a pod that monitors itself, to test resource usage etc. +# kubectl exec -n test-kafka jmx-selftest-... -- /bin/sh -c 'apk add --no-cache curl && curl http://localhost:5556/metrics' +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: jmx-selftest + namespace: test-kafka +spec: + replicas: 1 + template: + metadata: + labels: + test-target: jmx-exporter + test-type: readiness + # Uncomment to test with Prometheus + #annotations: + # prometheus.io/scrape: "true" + # prometheus.io/port: "5556" + spec: + containers: + - name: monitor + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + command: + - java + - -Dcom.sun.management.jmxremote.ssl=false + - -Dcom.sun.management.jmxremote.authenticate=false + - -Dcom.sun.management.jmxremote.port=5555 + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/httpserver_sample_config.yml + ports: + - name: jmx + containerPort: 5555 + - name: slashmetrics + containerPort: 5556 + # Test run, again and again + readinessProbe: + httpGet: + path: /metrics + port: 5556 From 51bbedbd753161eb0112e8d2c96e862faeaa11fc Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 08:20:00 +0200 Subject: [PATCH 03/26] CPU limit on metrics export won't actually save any cycles It'll just make the requests slower. Dreadfully slow on Minikube (>30s even when limit is increased to 100m). --- 50kafka.yml | 1 - zookeeper/50pzoo.yml | 1 - zookeeper/51zoo.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 17d9dc2f..84d1b2ac 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -37,7 +37,6 @@ spec: cpu: 0m memory: 40Mi limits: - cpu: 10m memory: 100Mi - name: broker image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 77054ced..02fed53e 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -43,7 +43,6 @@ spec: cpu: 0m memory: 40Mi limits: - cpu: 10m memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 7e25598e..e4cc7a64 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -46,7 +46,6 @@ spec: cpu: 0m memory: 40Mi limits: - cpu: 10m memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d From 8ec2045902349ba4e4634127744ddaeea5d8f713 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 08:26:05 +0200 Subject: [PATCH 04/26] The test that caught the performance problem --- test/metrics.yml | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 test/metrics.yml diff --git a/test/metrics.yml b/test/metrics.yml new file mode 100644 index 00000000..19a370f0 --- /dev/null +++ b/test/metrics.yml @@ -0,0 +1,85 @@ +--- +kind: ConfigMap +metadata: + name: metrics + namespace: test-kafka +apiVersion: v1 +data: + + curl-format.txt: |- + \n + # ### curl stats ###\n + time_namelookup: %{time_namelookup}\n + time_connect: %{time_connect}\n + time_appconnect: %{time_appconnect}\n + time_pretransfer: %{time_pretransfer}\n + time_redirect: %{time_redirect}\n + time_starttransfer: %{time_starttransfer}\n + \n + time_total: %{time_total}\n + \n + http_code: %{http_code}\n + content_type: %{content_type}\n + size_download: %{size_download}\n + + setup.sh: |- + touch /tmp/testlog + tail -f /tmp/testlog + + continue.sh: |- + exit 0 + + run.sh: |- + exec >> /tmp/testlog + exec 2>&1 + + curl -w "@/test/curl-format.txt" -s --fail-early --max-time $MAX_RESPONSE_TIME http://kafka-0.broker.kafka.svc.cluster.local:5556/metrics + + exit 0 + +--- +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: metrics + namespace: test-kafka +spec: + replicas: 1 + template: + metadata: + labels: + test-target: kafka + test-type: readiness + spec: + containers: + - name: testcase + image: solsson/curl@sha256:8c0c5d669b3dd67932da934024252af59fb9d0fa0e5118b5a737b35c5e1487bf + env: + - name: MAX_RESPONSE_TIME + value: "5" + # Test set up + command: + - /bin/bash + - -e + - /test/setup.sh + # Test run, again and again + readinessProbe: + exec: + command: + - /bin/bash + - -e + - /test/run.sh + # Test quit on nonzero exit + livenessProbe: + exec: + command: + - /bin/bash + - -e + - /test/continue.sh + volumeMounts: + - name: config + mountPath: /test + volumes: + - name: config + configMap: + name: metrics From 11feb285af02a283a6bbb4b855d9973cd6e061bc Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 09:36:48 +0200 Subject: [PATCH 05/26] Demonstrates OOMKilled with current resource limits --- test/metrics.yml | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/test/metrics.yml b/test/metrics.yml index 19a370f0..70a15caa 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -9,18 +9,18 @@ data: curl-format.txt: |- \n # ### curl stats ###\n - time_namelookup: %{time_namelookup}\n - time_connect: %{time_connect}\n - time_appconnect: %{time_appconnect}\n - time_pretransfer: %{time_pretransfer}\n - time_redirect: %{time_redirect}\n - time_starttransfer: %{time_starttransfer}\n + time_namelookup %{time_namelookup}\n + time_connect %{time_connect}\n + time_appconnect %{time_appconnect}\n + time_pretransfer %{time_pretransfer}\n + time_redirect %{time_redirect}\n + time_starttransfer %{time_starttransfer}\n \n - time_total: %{time_total}\n + time_total %{time_total}\n \n - http_code: %{http_code}\n - content_type: %{content_type}\n - size_download: %{size_download}\n + http_code{url="%{url_effective}"} %{http_code}\n + content_type %{content_type}\n + size_download %{size_download}\n setup.sh: |- touch /tmp/testlog @@ -33,7 +33,20 @@ data: exec >> /tmp/testlog exec 2>&1 - curl -w "@/test/curl-format.txt" -s --fail-early --max-time $MAX_RESPONSE_TIME http://kafka-0.broker.kafka.svc.cluster.local:5556/metrics + curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ + http://kafka-0.broker.kafka.svc.cluster.local:5556/metrics \ + | grep http_code \ + | grep 200 + + curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ + http://zoo-0.zoo.kafka.svc.cluster.local:5556/metrics \ + | grep http_code \ + | grep 200 + + curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ + http://pzoo-1.pzoo.kafka.svc.cluster.local:5556/metrics \ + | grep http_code \ + | grep 200 exit 0 From 02c4b3e7c898ffdf9008fa832c004d12c4eb9dee Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 11:11:58 +0200 Subject: [PATCH 06/26] Same base image as kafka and latest exporter source --- 50kafka.yml | 2 +- test/jmx-selftest.yml | 2 +- zookeeper/50pzoo.yml | 2 +- zookeeper/51zoo.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 84d1b2ac..d85cfa88 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -29,7 +29,7 @@ spec: mountPath: /etc/kafka containers: - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 ports: - containerPort: 5556 resources: diff --git a/test/jmx-selftest.yml b/test/jmx-selftest.yml index 9b2a0507..9aaf313f 100644 --- a/test/jmx-selftest.yml +++ b/test/jmx-selftest.yml @@ -19,7 +19,7 @@ spec: spec: containers: - name: monitor - image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - -Dcom.sun.management.jmxremote.ssl=false diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 02fed53e..3e5d6f6f 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -27,7 +27,7 @@ spec: mountPath: /var/lib/zookeeper/data containers: - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - -Xms39M diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index e4cc7a64..15008019 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -30,7 +30,7 @@ spec: mountPath: /var/lib/zookeeper/data containers: - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - -Xms39M From a56322094b4c1a03b2df71d3f60110ed96085fb1 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 11:21:25 +0200 Subject: [PATCH 07/26] zoo is fast now, <0.02s compared to >1s for the others --- test/metrics.yml | 11 +++++++++-- zookeeper/51zoo.yml | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/metrics.yml b/test/metrics.yml index 70a15caa..a95ae060 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -1,3 +1,5 @@ +# kubectl apply -f test/metrics.yml && kubectl scale --replicas=0 deploy/metrics && kubectl scale --replicas=1 deploy/metrics +# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | grep time_total --- kind: ConfigMap metadata: @@ -16,11 +18,11 @@ data: time_redirect %{time_redirect}\n time_starttransfer %{time_starttransfer}\n \n - time_total %{time_total}\n + time_total{url="%{url_effective}"} %{time_total}\n \n http_code{url="%{url_effective}"} %{http_code}\n + size_download{url="%{url_effective}"} %{size_download}\n content_type %{content_type}\n - size_download %{size_download}\n setup.sh: |- touch /tmp/testlog @@ -33,18 +35,23 @@ data: exec >> /tmp/testlog exec 2>&1 + date -u -Iseconds | tee /tmp/loglast + curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ http://kafka-0.broker.kafka.svc.cluster.local:5556/metrics \ + | tee -a /tmp/loglast \ | grep http_code \ | grep 200 curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ http://zoo-0.zoo.kafka.svc.cluster.local:5556/metrics \ + | tee -a /tmp/loglast \ | grep http_code \ | grep 200 curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ http://pzoo-1.pzoo.kafka.svc.cluster.local:5556/metrics \ + | tee -a /tmp/loglast \ | grep http_code \ | grep 200 diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 15008019..c3e82ae1 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -34,7 +34,9 @@ spec: command: - java - -Xms39M - - -Xmx99M + - -Xmx80M + - -XX:MaxPermSize=16m + - -Xss1m - -jar - jmx_prometheus_httpserver.jar - "5556" From e8a61e7fa0901ed593a65f2921ada7096ae4b92e Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 11:42:11 +0200 Subject: [PATCH 08/26] The 1s response time from kafka might be due to ... that unlike zoo pods it actually exposes interesting data --- 50kafka.yml | 10 ++++++++++ test/metrics.yml | 8 +++++--- zookeeper/50pzoo.yml | 6 ++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index d85cfa88..d0875554 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -30,6 +30,16 @@ spec: containers: - name: metrics image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + command: + - java + - -Xmx80M + # TODO OpenJDK 64-Bit Server VM warning: ignoring option MaxPermSize=16m; support was removed in 8.0 + - -XX:MaxPermSize=16m + - -Xss1m + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/kafka-prometheus-monitoring.yml ports: - containerPort: 5556 resources: diff --git a/test/metrics.yml b/test/metrics.yml index a95ae060..822421a7 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -1,5 +1,5 @@ # kubectl apply -f test/metrics.yml && kubectl scale --replicas=0 deploy/metrics && kubectl scale --replicas=1 deploy/metrics -# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | grep time_total +# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | egrep 'time_total|^jmx_scrape_duration_seconds' --- kind: ConfigMap metadata: @@ -10,7 +10,7 @@ data: curl-format.txt: |- \n - # ### curl stats ###\n + # ------ curl stats ------\n time_namelookup %{time_namelookup}\n time_connect %{time_connect}\n time_appconnect %{time_appconnect}\n @@ -23,6 +23,8 @@ data: http_code{url="%{url_effective}"} %{http_code}\n size_download{url="%{url_effective}"} %{size_download}\n content_type %{content_type}\n + # ----- curl complete -----\n + \n setup.sh: |- touch /tmp/testlog @@ -76,7 +78,7 @@ spec: image: solsson/curl@sha256:8c0c5d669b3dd67932da934024252af59fb9d0fa0e5118b5a737b35c5e1487bf env: - name: MAX_RESPONSE_TIME - value: "5" + value: "3" # Test set up command: - /bin/bash diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 3e5d6f6f..97fcf1eb 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -30,8 +30,10 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - - -Xms39M - - -Xmx99M + - -Xmx80M + # TODO OpenJDK 64-Bit Server VM warning: ignoring option MaxPermSize=16m; support was removed in 8.0 + - -XX:MaxPermSize=16m + - -Xss1m - -jar - jmx_prometheus_httpserver.jar - "5556" From 6d4ffc37213f5bc20a80d211395ac03d99a59e59 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 29 Jul 2017 11:48:14 +0200 Subject: [PATCH 09/26] interesting --- test/metrics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/metrics.yml b/test/metrics.yml index 822421a7..931e91f7 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -1,5 +1,5 @@ # kubectl apply -f test/metrics.yml && kubectl scale --replicas=0 deploy/metrics && kubectl scale --replicas=1 deploy/metrics -# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | egrep 'time_total|^jmx_scrape_duration_seconds' +# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | egrep 'time_total|^jmx_scrape_duration_seconds|^java_lang_memory_heapmemoryusage_used|^java_lang_memory_nonheapmemoryusage_used' --- kind: ConfigMap metadata: From 72cfc77709a9675c12150f4ac8083382393ec828 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 31 Jul 2017 07:01:51 +0200 Subject: [PATCH 10/26] Adapts to Java 8+, but still guessing the numbers --- 50kafka.yml | 3 +-- test/metrics.yml | 2 +- zookeeper/50pzoo.yml | 3 +-- zookeeper/51zoo.yml | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index d0875554..53756c88 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -33,8 +33,7 @@ spec: command: - java - -Xmx80M - # TODO OpenJDK 64-Bit Server VM warning: ignoring option MaxPermSize=16m; support was removed in 8.0 - - -XX:MaxPermSize=16m + - -XX:MaxMetaspaceSize=16m - -Xss1m - -jar - jmx_prometheus_httpserver.jar diff --git a/test/metrics.yml b/test/metrics.yml index 931e91f7..7a752f72 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -1,5 +1,5 @@ # kubectl apply -f test/metrics.yml && kubectl scale --replicas=0 deploy/metrics && kubectl scale --replicas=1 deploy/metrics -# kubectl exec -ti metrics-... -- tail -f /tmp/loglast | egrep 'time_total|^jmx_scrape_duration_seconds|^java_lang_memory_heapmemoryusage_used|^java_lang_memory_nonheapmemoryusage_used' +# kubectl exec metrics-... -- tail -f /tmp/loglast | egrep 'time_total|^jmx_scrape_duration_seconds|^java_lang_memory_heapmemoryusage_used|^java_lang_memory_nonheapmemoryusage_used' --- kind: ConfigMap metadata: diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 97fcf1eb..122ea654 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -31,8 +31,7 @@ spec: command: - java - -Xmx80M - # TODO OpenJDK 64-Bit Server VM warning: ignoring option MaxPermSize=16m; support was removed in 8.0 - - -XX:MaxPermSize=16m + - -XX:MaxMetaspaceSize=16m - -Xss1m - -jar - jmx_prometheus_httpserver.jar diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index c3e82ae1..5a5168ca 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -35,7 +35,7 @@ spec: - java - -Xms39M - -Xmx80M - - -XX:MaxPermSize=16m + - -XX:MaxMetaspaceSize=16m - -Xss1m - -jar - jmx_prometheus_httpserver.jar From 2b822b143e621169d2501928df8a6112ffdc0256 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 31 Jul 2017 07:11:52 +0200 Subject: [PATCH 11/26] Don't touch Xss as it has <1MB defaults according to docs --- 50kafka.yml | 1 - zookeeper/50pzoo.yml | 1 - zookeeper/51zoo.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 53756c88..5bda673a 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -34,7 +34,6 @@ spec: - java - -Xmx80M - -XX:MaxMetaspaceSize=16m - - -Xss1m - -jar - jmx_prometheus_httpserver.jar - "5556" diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 122ea654..a369a033 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -32,7 +32,6 @@ spec: - java - -Xmx80M - -XX:MaxMetaspaceSize=16m - - -Xss1m - -jar - jmx_prometheus_httpserver.jar - "5556" diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 5a5168ca..60cf5ae1 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -36,7 +36,6 @@ spec: - -Xms39M - -Xmx80M - -XX:MaxMetaspaceSize=16m - - -Xss1m - -jar - jmx_prometheus_httpserver.jar - "5556" From 6d3b9356a70f627b66d5033d33cc12597ca9db6f Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 31 Jul 2017 07:12:21 +0200 Subject: [PATCH 12/26] Let's focus on the two numbers that seem to matter --- zookeeper/51zoo.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 60cf5ae1..507d36ce 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -33,7 +33,6 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - - -Xms39M - -Xmx80M - -XX:MaxMetaspaceSize=16m - -jar From a4b51f4c5a46e8672ff1e664540be167dd335984 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 31 Jul 2017 10:54:59 +0200 Subject: [PATCH 13/26] Endpont works again, with similar scrape times as Xmx=80m without Metaspace limit --- 50kafka.yml | 4 ++-- test/metrics.yml | 2 +- zookeeper/50pzoo.yml | 4 ++-- zookeeper/51zoo.yml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 5bda673a..016ed070 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -32,8 +32,8 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - - -Xmx80M - - -XX:MaxMetaspaceSize=16m + - -Xmx64M + - -XX:MaxMetaspaceSize=32m - -jar - jmx_prometheus_httpserver.jar - "5556" diff --git a/test/metrics.yml b/test/metrics.yml index 7a752f72..b28cae8c 100644 --- a/test/metrics.yml +++ b/test/metrics.yml @@ -52,7 +52,7 @@ data: | grep 200 curl -w "@/test/curl-format.txt" -s --max-time $MAX_RESPONSE_TIME \ - http://pzoo-1.pzoo.kafka.svc.cluster.local:5556/metrics \ + http://pzoo-0.pzoo.kafka.svc.cluster.local:5556/metrics \ | tee -a /tmp/loglast \ | grep http_code \ | grep 200 diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index a369a033..9743de4e 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -30,8 +30,8 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - - -Xmx80M - - -XX:MaxMetaspaceSize=16m + - -Xmx64M + - -XX:MaxMetaspaceSize=32m - -jar - jmx_prometheus_httpserver.jar - "5556" diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 507d36ce..75f940fe 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -33,8 +33,8 @@ spec: image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 command: - java - - -Xmx80M - - -XX:MaxMetaspaceSize=16m + - -Xmx64M + - -XX:MaxMetaspaceSize=32m - -jar - jmx_prometheus_httpserver.jar - "5556" From 8dbd7a1c761d8fb0f2b77bfe5065a6646cd74208 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 1 Aug 2017 07:52:05 +0200 Subject: [PATCH 14/26] Reverse order of containers to benefit from "Defaulting container name to" --- 50kafka.yml | 36 ++++++++++++++++++------------------ zookeeper/50pzoo.yml | 36 ++++++++++++++++++------------------ zookeeper/51zoo.yml | 36 ++++++++++++++++++------------------ 3 files changed, 54 insertions(+), 54 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 016ed070..c4cf2405 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -28,24 +28,6 @@ spec: - name: config mountPath: /etc/kafka containers: - - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 - command: - - java - - -Xmx64M - - -XX:MaxMetaspaceSize=32m - - -jar - - jmx_prometheus_httpserver.jar - - "5556" - - example_configs/kafka-prometheus-monitoring.yml - ports: - - containerPort: 5556 - resources: - requests: - cpu: 0m - memory: 40Mi - limits: - memory: 100Mi - name: broker image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: @@ -81,6 +63,24 @@ spec: mountPath: /etc/kafka - name: data mountPath: /var/lib/kafka/data + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + command: + - java + - -Xmx64M + - -XX:MaxMetaspaceSize=32m + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/kafka-prometheus-monitoring.yml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + memory: 100Mi volumes: - name: config configMap: diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 9743de4e..ef127032 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -26,24 +26,6 @@ spec: - name: data mountPath: /var/lib/zookeeper/data containers: - - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 - command: - - java - - -Xmx64M - - -XX:MaxMetaspaceSize=32m - - -jar - - jmx_prometheus_httpserver.jar - - "5556" - - example_configs/zookeeper.yaml - ports: - - containerPort: 5556 - resources: - requests: - cpu: 0m - memory: 40Mi - limits: - memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: @@ -76,6 +58,24 @@ spec: mountPath: /etc/kafka - name: data mountPath: /var/lib/zookeeper/data + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + command: + - java + - -Xmx64M + - -XX:MaxMetaspaceSize=32m + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + memory: 100Mi volumes: - name: config configMap: diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 75f940fe..8ea302b5 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -29,24 +29,6 @@ spec: - name: data mountPath: /var/lib/zookeeper/data containers: - - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 - command: - - java - - -Xmx64M - - -XX:MaxMetaspaceSize=32m - - -jar - - jmx_prometheus_httpserver.jar - - "5556" - - example_configs/zookeeper.yaml - ports: - - containerPort: 5556 - resources: - requests: - cpu: 0m - memory: 40Mi - limits: - memory: 100Mi - name: zookeeper image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d env: @@ -79,6 +61,24 @@ spec: mountPath: /etc/kafka - name: data mountPath: /var/lib/zookeeper/data + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + command: + - java + - -Xmx64M + - -XX:MaxMetaspaceSize=32m + - -jar + - jmx_prometheus_httpserver.jar + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 + resources: + requests: + cpu: 0m + memory: 40Mi + limits: + memory: 100Mi volumes: - name: config configMap: From d7d504419648a7ff830fe9f066ade439c2168a5b Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sat, 5 Aug 2017 07:15:26 +0200 Subject: [PATCH 15/26] Uses prometheus/jmx_exporter parent-0.10 tag --- 50kafka.yml | 2 +- test/jmx-selftest.yml | 2 +- zookeeper/50pzoo.yml | 2 +- zookeeper/51zoo.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index c4cf2405..59ec5924 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -64,7 +64,7 @@ spec: - name: data mountPath: /var/lib/kafka/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 command: - java - -Xmx64M diff --git a/test/jmx-selftest.yml b/test/jmx-selftest.yml index 9aaf313f..9e328eab 100644 --- a/test/jmx-selftest.yml +++ b/test/jmx-selftest.yml @@ -19,7 +19,7 @@ spec: spec: containers: - name: monitor - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 command: - java - -Dcom.sun.management.jmxremote.ssl=false diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index ef127032..570c72c9 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -59,7 +59,7 @@ spec: - name: data mountPath: /var/lib/zookeeper/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 command: - java - -Xmx64M diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 8ea302b5..4591aedc 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -62,7 +62,7 @@ spec: - name: data mountPath: /var/lib/zookeeper/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:07cdc4b446ebe8208950202b924caefadad006ea94ae92d73bef81897df4d5c7 + image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 command: - java - -Xmx64M From b6c85eb08ce323e2c0ba19dccaea5e227f651715 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 7 Aug 2017 07:56:20 +0200 Subject: [PATCH 16/26] Had 10 OOMKilled/hour with 100Mi so let's increase request, and with 150Mi limit I got zero restarts in 48 hours. --- 50kafka.yml | 4 ++-- zookeeper/50pzoo.yml | 4 ++-- zookeeper/51zoo.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 59ec5924..b588816d 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -78,9 +78,9 @@ spec: resources: requests: cpu: 0m - memory: 40Mi - limits: memory: 100Mi + limits: + memory: 150Mi volumes: - name: config configMap: diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 570c72c9..70d7c0ee 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -73,9 +73,9 @@ spec: resources: requests: cpu: 0m - memory: 40Mi - limits: memory: 100Mi + limits: + memory: 150Mi volumes: - name: config configMap: diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 4591aedc..37dea415 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -76,9 +76,9 @@ spec: resources: requests: cpu: 0m - memory: 40Mi - limits: memory: 100Mi + limits: + memory: 150Mi volumes: - name: config configMap: From db52a3c28a31503fb07fd617768737a3192874e2 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 8 Aug 2017 13:41:16 +0200 Subject: [PATCH 17/26] Uses JMX config from config map, so we can experiment --- zookeeper/10zookeeper-config.yml | 19 +++++++++++++++++++ zookeeper/51zoo.yml | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index e796b4ba..c9b7a6f3 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -35,3 +35,22 @@ data: # Suppress connection log messages, three lines per livenessProbe execution log4j.logger.org.apache.zookeeper.server.NIOServerCnxnFactory=WARN log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN + + jmx-zookeeper-prometheus.yaml: |+ + rules: + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3" diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 37dea415..611f8acc 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -70,7 +70,7 @@ spec: - -jar - jmx_prometheus_httpserver.jar - "5556" - - example_configs/zookeeper.yaml + - /etc/kafka/jmx-zookeeper-prometheus.yaml ports: - containerPort: 5556 resources: @@ -79,6 +79,9 @@ spec: memory: 100Mi limits: memory: 150Mi + volumeMounts: + - name: config + mountPath: /etc/kafka volumes: - name: config configMap: From 37e58e93f5db54945e38f584eb2baaab70b162e3 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 6 Oct 2017 20:13:02 +0200 Subject: [PATCH 18/26] Scrape less, and improve scrape time further ... through ssl=false and whitelist. Thanks to @yacut, see #49 --- 10broker-config.yml | 19 +++++++++++++++++++ 50kafka.yml | 5 ++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/10broker-config.yml b/10broker-config.yml index 2025bbfd..933a6c54 100644 --- a/10broker-config.yml +++ b/10broker-config.yml @@ -256,3 +256,22 @@ data: # Change to DEBUG to enable audit log for the authorizer log4j.logger.kafka.authorizer.logger=WARN, authorizerAppender log4j.additivity.kafka.authorizer.logger=false + + jmx-kafka-prometheus.yml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi + ssl: false + whitelistObjectNames: ["kafka.server:*","java.lang:*"] + rules: + - pattern : kafka.server<>Value + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>(Value|OneMinuteRate) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>(.*) + - pattern : kafka.server<>queue-size + - pattern : kafka.server<>OneMinuteRate + - pattern : java.lang<>SystemCpuLoad + - pattern : java.langused + - pattern : java.lang<>FreePhysicalMemorySize diff --git a/50kafka.yml b/50kafka.yml index b588816d..08621917 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -72,7 +72,7 @@ spec: - -jar - jmx_prometheus_httpserver.jar - "5556" - - example_configs/kafka-prometheus-monitoring.yml + - /etc/kafka/jmx-kafka-prometheus.yml ports: - containerPort: 5556 resources: @@ -81,6 +81,9 @@ spec: memory: 100Mi limits: memory: 150Mi + volumeMounts: + - name: config + mountPath: /etc/kafka volumes: - name: config configMap: From d4b95d288708755105032ac730b8bfbfccb6fcf1 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sun, 22 Oct 2017 20:27:08 +0200 Subject: [PATCH 19/26] For performance, again thanks to @yacut #49 --- 10broker-config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/10broker-config.yml b/10broker-config.yml index 933a6c54..78a1a8c1 100644 --- a/10broker-config.yml +++ b/10broker-config.yml @@ -264,10 +264,10 @@ data: whitelistObjectNames: ["kafka.server:*","java.lang:*"] rules: - pattern : kafka.server<>Value - - pattern : kafka.server<>OneMinuteRate + - pattern : kafka.server<>OneMinuteRate - pattern : kafka.server<>OneMinuteRate - pattern : kafka.server<>queue-size - - pattern : kafka.server<>(Value|OneMinuteRate) + - pattern : kafka.server<>(Value|OneMinuteRate) - pattern : kafka.server<>(.*) - pattern : kafka.server<>(.*) - pattern : kafka.server<>queue-size From 09949508f3754a8e4605623ab8a98e9a4c1b6863 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 8 Aug 2017 13:57:29 +0200 Subject: [PATCH 20/26] Gets you JVM metrics from zoo, lots and lots of it --- zookeeper/10zookeeper-config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index c9b7a6f3..82ce63fd 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -54,3 +54,4 @@ data: labels: replicaId: "$2" memberType: "$3" + - pattern : java.lang From e35d0773fcd485311751d10ccfe78811203a6e3a Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 8 Aug 2017 14:16:24 +0200 Subject: [PATCH 21/26] Still not getting anything zookeeper-specific --- zookeeper/10zookeeper-config.yml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index 82ce63fd..40abf623 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -38,20 +38,5 @@ data: jmx-zookeeper-prometheus.yaml: |+ rules: - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$2" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$3" - labels: - replicaId: "$2" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$4" - labels: - replicaId: "$2" - memberType: "$3" - - pattern: "org.apache.ZooKeeperService<>(\\w+)" - name: "zookeeper_$4_$5" - labels: - replicaId: "$2" - memberType: "$3" + - pattern : org.apache.ZooKeeperService - pattern : java.lang From 42d1b1ae3b8a6d78437be92527fec844700d403c Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 8 Aug 2017 15:37:53 +0200 Subject: [PATCH 22/26] Adds directives from kafka's rules, now for pzoo too. But before this, how did the metrics container know which port to connect to? --- zookeeper/10zookeeper-config.yml | 2 ++ zookeeper/50pzoo.yml | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index 40abf623..6bdffe59 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -37,6 +37,8 @@ data: log4j.logger.org.apache.zookeeper.server.NIOServerCnxn=WARN jmx-zookeeper-prometheus.yaml: |+ + lowercaseOutputName: true + jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi rules: - pattern : org.apache.ZooKeeperService - pattern : java.lang diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 70d7c0ee..6ac35ba1 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -67,7 +67,7 @@ spec: - -jar - jmx_prometheus_httpserver.jar - "5556" - - example_configs/zookeeper.yaml + - /etc/kafka/jmx-zookeeper-prometheus.yaml ports: - containerPort: 5556 resources: @@ -76,6 +76,9 @@ spec: memory: 100Mi limits: memory: 150Mi + volumeMounts: + - name: config + mountPath: /etc/kafka volumes: - name: config configMap: From 253633f05318833d134ef5484063bba76d76579d Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 3 Nov 2017 13:59:26 +0100 Subject: [PATCH 23/26] Zookeeper metrics conf contributed by @yacut #61 --- zookeeper/10zookeeper-config.yml | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index 6bdffe59..eeba1e00 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -39,6 +39,25 @@ data: jmx-zookeeper-prometheus.yaml: |+ lowercaseOutputName: true jmxUrl: service:jmx:rmi:///jndi/rmi://localhost:5555/jmxrmi + ssl: false + whitelistObjectNames: ["org.apache.ZooKeeperService:*","java.lang:*"] rules: - - pattern : org.apache.ZooKeeperService - - pattern : java.lang + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + labels: + replicaId: "$2" + memberType: "$3" + - pattern : java.lang<>SystemCpuLoad + - pattern : java.langused + - pattern : java.lang<>FreePhysicalMemorySize From dc1c1da725e4b30b13da3d14186bb7d2a0ba15dd Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 3 Nov 2017 14:22:13 +0100 Subject: [PATCH 24/26] Upgrade to jmx-exporter 0.1.0 --- 50kafka.yml | 2 +- zookeeper/50pzoo.yml | 2 +- zookeeper/51zoo.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/50kafka.yml b/50kafka.yml index 08621917..57933f62 100644 --- a/50kafka.yml +++ b/50kafka.yml @@ -64,7 +64,7 @@ spec: - name: data mountPath: /var/lib/kafka/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 + image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 command: - java - -Xmx64M diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 6ac35ba1..593c7a11 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -59,7 +59,7 @@ spec: - name: data mountPath: /var/lib/zookeeper/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 + image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 command: - java - -Xmx64M diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 611f8acc..c88659a5 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -62,7 +62,7 @@ spec: - name: data mountPath: /var/lib/zookeeper/data - name: metrics - image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 + image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 command: - java - -Xmx64M From 4c35576a079c15294462df0c10b79a53923c5f55 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 3 Nov 2017 14:26:57 +0100 Subject: [PATCH 25/26] Upgrade test also to jmx-exporter 0.1.0 --- test/jmx-selftest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/jmx-selftest.yml b/test/jmx-selftest.yml index 9e328eab..2d28b14f 100644 --- a/test/jmx-selftest.yml +++ b/test/jmx-selftest.yml @@ -19,7 +19,7 @@ spec: spec: containers: - name: monitor - image: solsson/kafka-prometheus-jmx-exporter@sha256:348b0f6510b08dff70ba468a16d25dc8def480fe79aca0e3c76f098d67b108a3 + image: solsson/kafka-prometheus-jmx-exporter@sha256:40a6ab24ccac0ed5acb8c02dccfbb1f5924fd97f46c0450e0245686c24138b53 command: - java - -Dcom.sun.management.jmxremote.ssl=false From 6a26cf3b5bd64a80ee3a4b1d1fe189bb8cc977fd Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 3 Nov 2017 14:42:52 +0100 Subject: [PATCH 26/26] Adapts test instructions to debian based jre image --- test/jmx-selftest.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/jmx-selftest.yml b/test/jmx-selftest.yml index 2d28b14f..b73b8071 100644 --- a/test/jmx-selftest.yml +++ b/test/jmx-selftest.yml @@ -1,5 +1,8 @@ # Sets up a pod that monitors itself, to test resource usage etc. -# kubectl exec -n test-kafka jmx-selftest-... -- /bin/sh -c 'apk add --no-cache curl && curl http://localhost:5556/metrics' +# testpod=$(kubectl -n test-kafka get pods -l test-target=jmx-exporter -o=jsonpath={.items[*].metadata.name}) +# kubectl exec -n test-kafka $testpod -- apt-get update +# kubectl exec -n test-kafka $testpod -- apt-get install -y --no-install-recommends curl +# kubectl exec -n test-kafka $testpod -- curl http://localhost:5556/metrics apiVersion: extensions/v1beta1 kind: Deployment metadata: