diff --git a/README.md b/README.md index 4b039b315..b945c56d7 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ KubeBlocks add-ons. | pulsar | pulsar-bookies-recovery-2.11.2
pulsar-bookies-recovery-3.0.2
pulsar-bookies-recovery-4.0.6
pulsar-bookkeeper-2.11.2
pulsar-bookkeeper-3.0.2
pulsar-bookkeeper-4.0.6
pulsar-broker-2.11.2
pulsar-broker-3.0.2
pulsar-broker-4.0.6
pulsar-proxy-2.11.2
pulsar-proxy-3.0.2
pulsar-proxy-4.0.6
pulsar-zookeeper-2.11.2
pulsar-zookeeper-3.0.2 | Apache Pulsar is an open-source, distributed messaging and streaming platform built for the cloud. | cjc7373 caiq1nyu | | qdrant | qdrant-1.10.0
qdrant-1.13.4
qdrant-1.15.4
qdrant-1.16.3
qdrant-1.17.1
qdrant-1.5.0
qdrant-1.7.3
qdrant-1.8.1
qdrant-1.8.4 | High-performance, massive-scale Vector Database for the next generation of AI. | cjc7373 | | rabbitmq | rabbitmq-3.10.25
rabbitmq-3.11.28
rabbitmq-3.12.14
rabbitmq-3.13.7
rabbitmq-3.8.34
rabbitmq-3.9.29
rabbitmq-4.0.9
rabbitmq-4.1.6
rabbitmq-4.2.1 | RabbitMQ is a reliable and mature messaging and streaming broker. | xuriwuyun | -| redis | redis-5.0.12
redis-6.2.14
redis-6.2.17
redis-6.2.18
redis-6.2.19
redis-6.2.22
redis-7.0.6
redis-7.2.10
redis-7.2.11
redis-7.2.12
redis-7.2.14
redis-7.2.4
redis-7.2.7
redis-7.4.2
redis-7.4.5
redis-7.4.6
redis-7.4.7
redis-7.4.9
redis-8.0.1
redis-8.0.3
redis-8.0.4
redis-8.0.5
redis-8.2.1
redis-8.2.2
redis-8.2.3
redis-8.2.6
redis-8.4.0
redis-8.4.3
redis-8.6.3
redis-cluster-5.0.12
redis-cluster-6.2.14
redis-cluster-6.2.17
redis-cluster-6.2.18
redis-cluster-6.2.19
redis-cluster-6.2.22
redis-cluster-7.0.6
redis-cluster-7.2.10
redis-cluster-7.2.11
redis-cluster-7.2.12
redis-cluster-7.2.14
redis-cluster-7.2.4
redis-cluster-7.2.7
redis-cluster-7.4.2
redis-cluster-7.4.5
redis-cluster-7.4.6
redis-cluster-7.4.7
redis-cluster-7.4.9
redis-cluster-8.0.1
redis-cluster-8.0.3
redis-cluster-8.0.4
redis-cluster-8.0.5
redis-cluster-8.2.1
redis-cluster-8.2.2
redis-cluster-8.2.3
redis-cluster-8.2.6
redis-cluster-8.4.0
redis-cluster-8.4.3
redis-cluster-8.6.3
redis-sentinel-5.0.12
redis-sentinel-6.2.14
redis-sentinel-6.2.17
redis-sentinel-6.2.18
redis-sentinel-6.2.19
redis-sentinel-6.2.22
redis-sentinel-7.0.6
redis-sentinel-7.2.10
redis-sentinel-7.2.11
redis-sentinel-7.2.12
redis-sentinel-7.2.14
redis-sentinel-7.2.4
redis-sentinel-7.2.7
redis-sentinel-7.4.2
redis-sentinel-7.4.5
redis-sentinel-7.4.6
redis-sentinel-7.4.7
redis-sentinel-7.4.9
redis-sentinel-8.0.1
redis-sentinel-8.0.3
redis-sentinel-8.0.4
redis-sentinel-8.0.5
redis-sentinel-8.2.1
redis-sentinel-8.2.2
redis-sentinel-8.2.3
redis-sentinel-8.2.6
redis-sentinel-8.4.0
redis-sentinel-8.4.3
redis-sentinel-8.6.3
redis-twemproxy-0.5.0 | Redis is an in-memory database that persists on disk. The data model is key-value, but many different kind of values are supported: Strings, Lists, Sets, Sorted Sets, Hashes, Streams, HyperLogLogs, Bitmaps. | wangyelei | +| redis | | Redis is an in-memory database that persists on disk. The data model is key-value, but many different kind of values are supported: Strings, Lists, Sets, Sorted Sets, Hashes, Streams, HyperLogLogs, Bitmaps. | wangyelei | | risingwave | risingwave-compactor-v1.0.0
risingwave-compute-v1.0.0
risingwave-connector-v1.0.0
risingwave-frontend-v1.0.0
risingwave-meta-v1.0.0 | RisingWave is a distributed SQL streaming database that enables cost-efficient and reliable processing of streaming data. | RisingWave Labs | | rocketmq | rocketmq-broker-4.9.6
rocketmq-dashboard-2.0.1
rocketmq-exporter-0.0.3
rocketmq-namesrv-4.9.6 | RocketMQ is a distributed messaging and streaming platform originally developed by Alibaba, featuring high throughput, low latency, and strong reliability for processing millions of messages per second. | kizuna-lek | | starrocks-ce | starrocks-ce-be-3.2.2
starrocks-ce-be-3.3.0
starrocks-ce-fe-3.2.2
starrocks-ce-fe-3.3.0 | A Linux Foundation project, is the next-generation data platform designed to make data-intensive real-time analytics fast and easy. | ApeCloud yandongxiao | diff --git a/addons-cluster/redis/templates/_helpers.tpl b/addons-cluster/redis/templates/_helpers.tpl index 6dced4261..7e4e09b87 100644 --- a/addons-cluster/redis/templates/_helpers.tpl +++ b/addons-cluster/redis/templates/_helpers.tpl @@ -115,7 +115,17 @@ Define redis ComponentSpec with ComponentDefinition. - name: LOAD_BALANCER_ENABLED value: "true" {{- end }} + {{- if or (eq .Values.mode "replication-syncer") (eq .Values.mode "replication-twemproxy") }} + - name: KB_MAX_LAG + value: {{ .Values.maxLagOnSwitchover | quote }} + {{- end }} serviceVersion: {{ .Values.version }} + {{- if eq .Values.mode "replication-syncer" }} + configs: + - name: redis-replication-config + configMap: + name: {{ include "kblib.clusterName" . }}-redis-redis-replication-config + {{- end }} {{- if and .Values.customSecretName .Values.customSecretNamespace }} systemAccounts: - name: default @@ -210,6 +220,8 @@ replication mode: 2 replicas: 1 {{- else if eq .Values.mode "replication" }} replicas: {{ max .Values.replicas 2 }} +{{- else if eq .Values.mode "replication-syncer" }} +replicas: {{ max .Values.replicas 2 }} {{- else if eq .Values.mode "replication-twemproxy" }} replicas: {{ max .Values.replicas 2 }} {{- end }} @@ -313,6 +325,8 @@ metadata: apps.kubeblocks.io/mode: {{ .Values.mode }} {{- if and .Values.hostNetworkEnabled (eq .Values.mode "cluster") }} kubeblocks.io/host-network: "shard" + {{- else if and .Values.hostNetworkEnabled (or (eq .Values.mode "replication-syncer") (eq .Values.mode "replication-twemproxy")) }} + kubeblocks.io/host-network: "redis" {{- else if .Values.hostNetworkEnabled }} kubeblocks.io/host-network: "redis,redis-sentinel" {{- end }} @@ -321,4 +335,4 @@ metadata: {{- end }} spec: terminationPolicy: {{ .Values.extra.terminationPolicy }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/addons-cluster/redis/templates/cluster.yaml b/addons-cluster/redis/templates/cluster.yaml index 71599bb65..0cd4f7c1b 100644 --- a/addons-cluster/redis/templates/cluster.yaml +++ b/addons-cluster/redis/templates/cluster.yaml @@ -23,9 +23,10 @@ {{- if eq .Values.mode "replication" }} {{- include "redis-cluster.componentSpec" . | indent 2 }} {{- include "redis-cluster.sentinelComponentSpec" . | indent 2 }} + {{- else if eq .Values.mode "replication-syncer" }} + {{- include "redis-cluster.componentSpec" . | indent 2 }} {{- else if eq .Values.mode "replication-twemproxy" }} {{- include "redis-cluster.componentSpec" . | indent 2 }} - {{- include "redis-cluster.sentinelComponentSpec" . | indent 2 }} {{- include "redis-cluster.twemproxyComponentSpec" . | indent 2 }} {{- else }} {{- include "redis-cluster.componentSpec" . | indent 2 }} diff --git a/addons-cluster/redis/values.schema.json b/addons-cluster/redis/values.schema.json index 0172b5495..26efec508 100644 --- a/addons-cluster/redis/values.schema.json +++ b/addons-cluster/redis/values.schema.json @@ -16,6 +16,7 @@ "enum": [ "standalone", "replication", + "replication-syncer", "cluster", "replication-twemproxy" ] diff --git a/addons-cluster/redis/values.yaml b/addons-cluster/redis/values.yaml index 956c49d1b..0d0826103 100644 --- a/addons-cluster/redis/values.yaml +++ b/addons-cluster/redis/values.yaml @@ -6,19 +6,25 @@ ## version: 7.2.10 -## @param mode redis cluster topology mode, standalone, replication and cluster +## @param mode redis cluster topology mode, standalone, replication, replication-syncer and cluster ## standalone: single redis instance ## replication: primary-secondary replication with redis sentinel, refer: https://redis.io/docs/management/sentinel -## replication-twemproxy: primary-secondary replication with sentinel and twemproxy, it is just a demo for how to use twemproxy with redis, not recommended for production +## replication-syncer: primary-secondary replication with syncer-provided HA and Fake Sentinel endpoint, no redis-sentinel component +## replication-twemproxy: primary-secondary replication with syncer-provided HA and twemproxy, no redis-sentinel component ## cluster: official redis cluster, refer: https://redis.io/docs/reference/cluster-spec. mode: replication ## @param replicas specify replicas of replication ## if mode is standalone, replicas should be 1 ## if mode is replication and replicas is 2, it means 1 primary and 1 secondary +## if mode is replication-syncer and replicas is 2, it means 1 primary and 1 secondary managed by syncer ## if mode is cluster and replicas is 2, it means every shard has 1 primary and 1 secondary, the count of shards is specified by redisCluster.shardCount replicas: 2 +## @param maxLagOnSwitchover maximum allowed Redis replication offset lag for syncer switchover/failover +## Default matches Redis Sentinel max_allowed_data_loss semantics: 1MiB. +maxLagOnSwitchover: "1048576" + ## @param cpu ## cpu: 0.5 diff --git a/addons/redis/README.md b/addons/redis/README.md index 71386f4c9..d465a5c9b 100644 --- a/addons/redis/README.md +++ b/addons/redis/README.md @@ -1299,7 +1299,7 @@ metadata: spec: terminationPolicy: Delete clusterDef: redis - topology: replication-twemproxy # set topology to standalone + topology: replication-twemproxy # redis HA is managed by syncer; no redis-sentinel component componentSpecs: - name: redis replicas: 2 @@ -1319,23 +1319,6 @@ spec: resources: requests: storage: 20Gi - - name: redis-sentinel - replicas: 3 - resources: - limits: - cpu: "0.2" - memory: "0.2Gi" - requests: - cpu: "0.2" - memory: "0.2Gi" - volumeClaimTemplates: - - name: data - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi - name: redis-twemproxy # add one componet on provisioniing: twemproxy replicas: 3 resources: @@ -1345,13 +1328,14 @@ spec: requests: cpu: "0.2" memory: "0.2Gi" + ``` ```bash kubectl apply -f examples/redis/cluster-twemproxy.yaml ``` -A cluster named `redis-twemproxy` will be created with three components, one for Redis (2 replicas), one for Sentinel (3 replicas), and one for twemproxy (3 replicas). +A cluster named `redis-twemproxy` will be created with two components: Redis (2 replicas, HA managed by syncer) and twemproxy (3 replicas). ```yaml # snippet of cluster.yaml @@ -1360,10 +1344,9 @@ kind: Cluster spec: terminationPolicy: Delete clusterDef: redis - topology: replication-twemproxy # set topology to standalone + topology: replication-twemproxy # redis HA is managed by syncer componentSpecs: - name: redis - - name: redis-sentinel - name: redis-twemproxy # add one componet on provisioniing: twemproxy replicas: 3 # set the desired number of replicas for twemproxy resources: diff --git a/addons/redis/scripts-ut-spec/redis_start_spec.sh b/addons/redis/scripts-ut-spec/redis_start_spec.sh index 61e687809..ec55505b3 100644 --- a/addons/redis/scripts-ut-spec/redis_start_spec.sh +++ b/addons/redis/scripts-ut-spec/redis_start_spec.sh @@ -470,12 +470,28 @@ Describe "Redis Start Bash Script Tests" setup() { primary="" primary_port="" + unset COMPONENT_REPLICAS + unset SYNCER_SENTINEL_RETRY_TIMES + unset SYNCER_SENTINEL_RETRY_DELAY_SECOND + unset REDIS_START_INITIALIZED_FILE + rm -f ./redis-start-initialized unset SENTINEL_COMPONENT_NAME } Before "setup" - It "gets default primary node if SENTINEL_COMPONENT_NAME is not set" + un_setup() { + unset COMPONENT_REPLICAS + unset SYNCER_SENTINEL_RETRY_TIMES + unset SYNCER_SENTINEL_RETRY_DELAY_SECOND + unset REDIS_START_INITIALIZED_FILE + rm -f ./redis-start-initialized + unset SENTINEL_COMPONENT_NAME + } + After "un_setup" + + It "gets default primary node for single-replica component" Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="1" get_default_initialize_primary_node() { # shellcheck disable=SC2034 primary="fake-primary" @@ -484,10 +500,316 @@ Describe "Redis Start Bash Script Tests" } When call init_or_get_primary_from_redis_sentinel The status should be success - The stdout should include "SENTINEL_COMPONENT_NAME env is not set, try to use default primary node" + The stdout should include "SENTINEL_COMPONENT_NAME env is not set and component has one replica, use default primary node" The variable primary should eq "fake-primary" The variable primary_port should eq "fake-primary-port" End + + It "uses syncer Fake Sentinel master info for multi-replica syncer mode when available" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + build_syncer_get_master_addr_by_name_command() { + echo "echo 'redis-1.redis-headless.default 6379'" + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "SENTINEL_COMPONENT_NAME env is not set, try to get primary from syncer Fake Sentinel." + The stdout should include "syncer Fake Sentinel has master info: redis-1.redis-headless.default 6379" + The variable primary should eq "redis-1.redis-headless.default" + The variable primary_port should eq "6379" + End + + It "uses conservative replica target for multi-replica syncer mode when no stable master exists" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + export CURRENT_POD_NAME="redis-1" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + syncer_dcs_leader_status() { + echo "syncer DCS leader configmap redis-leader is not found." + return 1 + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "syncer Fake Sentinel has no stable master info, start as conservative replica until syncer promotes or follows." + The stdout should include "use conservative replicaof target: 127.0.0.1 1" + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "127.0.0.1" + The variable primary_port should eq "1" + End + + It "allows only the default pod to bootstrap primary when no start marker exists" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + export CURRENT_POD_NAME="redis-0" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + get_default_initialize_primary_node() { + # shellcheck disable=SC2034 + primary="fake-bootstrap-primary" + # shellcheck disable=SC2034 + primary_port="fake-bootstrap-primary-port" + } + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + syncer_dcs_leader_status() { + echo "syncer DCS leader configmap redis-leader is not found." + return 1 + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "current pod redis-0 is default bootstrap primary and syncer DCS leader is not found." + The stdout should include "syncer has no master info and current pod is allowed to use default primary for initial bootstrap." + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "fake-bootstrap-primary" + The variable primary_port should eq "fake-bootstrap-primary-port" + End + + It "allows default pod bootstrap when start marker exists but syncer DCS leader is confirmed not found" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + redis_start_initialized_file="$REDIS_START_INITIALIZED_FILE" + export CURRENT_POD_NAME="redis-0" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + echo "already-ran" > "$REDIS_START_INITIALIZED_FILE" + get_default_initialize_primary_node() { + # shellcheck disable=SC2034 + primary="fake-bootstrap-primary" + # shellcheck disable=SC2034 + primary_port="fake-bootstrap-primary-port" + } + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + syncer_dcs_leader_status() { + echo "syncer DCS leader configmap redis-leader is not found." + return 1 + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "redis start marker exists but syncer DCS leader is confirmed not found" + The stdout should include "current pod redis-0 is default bootstrap primary and syncer DCS leader is not found." + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "fake-bootstrap-primary" + The variable primary_port should eq "fake-bootstrap-primary-port" + End + + It "does not bootstrap default pod when syncer DCS leader already exists" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + export CURRENT_POD_NAME="redis-0" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + syncer_dcs_leader_status() { + echo "syncer DCS leader configmap redis-leader exists with leader redis-1." + return 0 + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "syncer DCS leader already exists, skip default bootstrap primary." + The stdout should include "use conservative replicaof target: 127.0.0.1 1" + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "127.0.0.1" + The variable primary_port should eq "1" + End + + It "does not bootstrap default pod when syncer DCS leader status is unknown" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + export CURRENT_POD_NAME="redis-0" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + syncer_dcs_leader_status() { + echo "failed to query syncer DCS leader configmap redis-leader: timeout" + return 2 + } + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "syncer DCS leader status is unknown, skip default bootstrap primary." + The stdout should include "use conservative replicaof target: 127.0.0.1 1" + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "127.0.0.1" + The variable primary_port should eq "1" + End + End + + Describe "syncer_dcs_leader_status()" + setup_fake_syncerctl() { + fake_bin="./fake-bin" + mkdir -p "$fake_bin" + cat > "$fake_bin/timeout" <<'EOF' +#!/bin/sh +shift +exec "$@" +EOF + chmod +x "$fake_bin/timeout" + export OLD_PATH="$PATH" + export PATH="$PWD/$fake_bin:$PATH" + export REDIS_COMPONENT_NAME="redis" + export CLUSTER_NAMESPACE="default" + } + Before "setup_fake_syncerctl" + + cleanup_fake_syncerctl() { + export PATH="$OLD_PATH" + rm -rf ./fake-bin + unset OLD_PATH + unset REDIS_COMPONENT_NAME + unset CLUSTER_NAMESPACE + } + After "cleanup_fake_syncerctl" + + It "uses syncerctl as the primary path and returns found" + cat > ./fake-bin/syncerctl <<'EOF' +#!/bin/sh +echo "syncer DCS leader configmap redis-leader exists with leader redis-0." +exit 0 +EOF + chmod +x ./fake-bin/syncerctl + When call syncer_dcs_leader_status + The status should be success + The stdout should include "syncer DCS leader configmap redis-leader exists with leader redis-0." + The stdout should not include "fallback to python3" + End + + It "uses syncerctl not_found to allow default pod bootstrap without python3" + cat > ./fake-bin/syncerctl <<'EOF' +#!/bin/sh +echo "syncer DCS leader configmap redis-leader is not found." +exit 1 +EOF + chmod +x ./fake-bin/syncerctl + When call syncer_dcs_leader_status + The status should be failure + The stdout should include "syncer DCS leader configmap redis-leader is not found." + The stdout should not include "fallback to python3" + End + + It "treats syncerctl failure as unknown" + cat > ./fake-bin/syncerctl <<'EOF' +#!/bin/sh +echo "temporary api error" +exit 2 +EOF + chmod +x ./fake-bin/syncerctl + When call syncer_dcs_leader_status + The status should eq 2 + The stdout should include "temporary api error" + The stdout should include "syncerctl failed to query syncer DCS leader configmap redis-leader, status is unknown." + End + + It "returns unknown when syncerctl and python3 are both absent" + rm -f ./fake-bin/syncerctl + export PATH="$PWD/$fake_bin" + When call syncer_dcs_leader_status + The status should eq 2 + The stdout should include "syncerctl is not available, fallback to python3 for syncer DCS leader status." + The stdout should include "python3 is not available, syncer DCS leader status is unknown." + End + End + + Context "syncer bootstrap with syncerctl DCS leader status" + setup() { + fake_bin="./fake-bin" + mkdir -p "$fake_bin" + cat > "$fake_bin/timeout" <<'EOF' +#!/bin/sh +shift +exec "$@" +EOF + chmod +x "$fake_bin/timeout" + export OLD_PATH="$PATH" + export PATH="$PWD/$fake_bin:$PATH" + export COMPONENT_REPLICAS="2" + export SYNCER_SENTINEL_RETRY_TIMES="1" + export SYNCER_SENTINEL_RETRY_DELAY_SECOND="0" + export REDIS_START_INITIALIZED_FILE="./redis-start-initialized" + export REDIS_COMPONENT_NAME="redis" + export CURRENT_POD_NAME="redis-0" + export REDIS_POD_NAME_LIST="redis-0,redis-1" + rm -f "$REDIS_START_INITIALIZED_FILE" + } + Before "setup" + + cleanup() { + export PATH="$OLD_PATH" + rm -rf ./fake-bin + rm -f ./redis-start-initialized + unset OLD_PATH + unset COMPONENT_REPLICAS + unset SYNCER_SENTINEL_RETRY_TIMES + unset SYNCER_SENTINEL_RETRY_DELAY_SECOND + unset REDIS_START_INITIALIZED_FILE + unset REDIS_COMPONENT_NAME + unset CURRENT_POD_NAME + unset REDIS_POD_NAME_LIST + } + After "cleanup" + + build_syncer_get_master_addr_by_name_command() { + echo "false" + } + + get_default_initialize_primary_node() { + # shellcheck disable=SC2034 + primary="fake-bootstrap-primary" + # shellcheck disable=SC2034 + primary_port="fake-bootstrap-primary-port" + } + + It "bootstraps only default pod when syncerctl reports leader not found" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + cat > ./fake-bin/syncerctl <<'EOF' +#!/bin/sh +echo "syncer DCS leader configmap redis-leader is not found." +exit 1 +EOF + chmod +x ./fake-bin/syncerctl + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "current pod redis-0 is default bootstrap primary and syncer DCS leader is not found." + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "fake-bootstrap-primary" + The variable primary_port should eq "fake-bootstrap-primary-port" + End + + It "uses conservative replica target when syncerctl reports unknown" + Skip if "shell type and version unmatch, please check!" should_skip_when_shell_type_and_version_invalid + cat > ./fake-bin/syncerctl <<'EOF' +#!/bin/sh +echo "temporary api error" +exit 2 +EOF + chmod +x ./fake-bin/syncerctl + When call init_or_get_primary_from_redis_sentinel + The status should be success + The stdout should include "syncer DCS leader status is unknown, skip default bootstrap primary." + The stdout should include "use conservative replicaof target: 127.0.0.1 1" + The stderr should include "Function 'get_master_addr_by_name_from_syncer' failed after 1 retries." + The variable primary should eq "127.0.0.1" + The variable primary_port should eq "1" + End End Context "when SENTINEL_POD_FQDN_LIST is not set" @@ -580,4 +902,4 @@ Describe "Redis Start Bash Script Tests" End End End -End \ No newline at end of file +End diff --git a/addons/redis/scripts/redis-role-probe.sh b/addons/redis/scripts/redis-role-probe.sh new file mode 100644 index 000000000..46d2f39e9 --- /dev/null +++ b/addons/redis/scripts/redis-role-probe.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euo pipefail + +role="$(/tools/dbctl redis getrole | tr -d '[:space:]')" +case "${role}" in + primary|secondary) + ;; + *) + echo "${role}" + exit 0 + ;; +esac + +role_snapshot_period_seconds="${ROLE_SNAPSHOT_PERIOD_SECONDS:-15}" +now_us="$(date +%s%6N)" +period_us="$((role_snapshot_period_seconds * 1000000))" +term="$((now_us / period_us * period_us))" +pod_name="${CURRENT_POD_NAME:-}" +pod_uid="${CURRENT_POD_UID:-}" + +if [ -z "${pod_name}" ]; then + echo "${role}" + exit 0 +fi + +printf '{"term":"%s","PodRoleNamePairs":[{"podName":"%s","roleName":"%s","podUid":"%s"}]}\n' \ + "${term}" "${pod_name}" "${role}" "${pod_uid}" diff --git a/addons/redis/scripts/redis-start.sh b/addons/redis/scripts/redis-start.sh index 2a0db64d8..a277dadea 100755 --- a/addons/redis/scripts/redis-start.sh +++ b/addons/redis/scripts/redis-start.sh @@ -26,6 +26,7 @@ redis_template_conf="/etc/conf/redis.conf" redis_real_conf="/etc/redis/redis.conf" redis_acl_file="/data/users.acl" redis_acl_file_bak="/data/users.acl.bak" +redis_start_initialized_file="${REDIS_START_INITIALIZED_FILE:-/data/.kb_redis_start_initialized}" retry_times=3 retry_delay_second=2 service_port=${SERVICE_PORT:-6379} @@ -146,9 +147,10 @@ rebuild_redis_acl_file() { init_or_get_primary_from_redis_sentinel() { # check redis sentinel component env if ! env_exist SENTINEL_COMPONENT_NAME; then - # return default primary node if redis sentinel component name is not set - echo "SENTINEL_COMPONENT_NAME env is not set, try to use default primary node." - get_default_initialize_primary_node + # In syncer-managed replication, there is no external Sentinel component. + # Avoid lexicographic bootstrap as primary for multi-replica clusters because + # it can emit a transient primary role before syncer has aligned with DCS. + init_or_get_primary_from_syncer return fi @@ -263,6 +265,225 @@ retry_get_master_addr_by_name_from_sentinel() { fi } +init_or_get_primary_from_syncer() { + local component_replicas="${COMPONENT_REPLICAS:-1}" + if ! [[ "$component_replicas" =~ ^[0-9]+$ ]]; then + component_replicas=1 + fi + + if [ "$component_replicas" -le 1 ]; then + echo "SENTINEL_COMPONENT_NAME env is not set and component has one replica, use default primary node." + get_default_initialize_primary_node + return + fi + + echo "SENTINEL_COMPONENT_NAME env is not set, try to get primary from syncer Fake Sentinel." + local syncer_retry_times="${SYNCER_SENTINEL_RETRY_TIMES:-6}" + local syncer_retry_delay_second="${SYNCER_SENTINEL_RETRY_DELAY_SECOND:-2}" + if retry_get_master_addr_by_name_from_syncer "$syncer_retry_times" "$syncer_retry_delay_second"; then + primary="${REDIS_SENTINEL_PRIMARY_INFO[0]}" + primary_port="${REDIS_SENTINEL_PRIMARY_INFO[1]}" + echo "syncer Fake Sentinel has master info: $primary $primary_port" + return + fi + + echo "syncer Fake Sentinel has no stable master info, start as conservative replica until syncer promotes or follows." + if syncer_initial_bootstrap_default_primary_allowed; then + echo "syncer has no master info and current pod is allowed to use default primary for initial bootstrap." + get_default_initialize_primary_node + return + fi + set_conservative_replicaof_target +} + +build_syncer_get_master_addr_by_name_command() { + local timeout_value="${SYNCER_SENTINEL_QUERY_TIMEOUT:-2}" + local syncer_sentinel_host="${SYNCER_SENTINEL_HOST:-127.0.0.1}" + local syncer_sentinel_port="${SYNCER_SENTINEL_PORT:-26379}" + echo "timeout $timeout_value redis-cli -h $syncer_sentinel_host -p $syncer_sentinel_port sentinel get-master-addr-by-name $REDIS_COMPONENT_NAME" +} + +get_master_addr_by_name_from_syncer() { + local master_addr_by_name_command + unset_xtrace_when_ut_mode_false + master_addr_by_name_command=$(build_syncer_get_master_addr_by_name_command) + echo "execute syncer get-master-addr-by-name command: $master_addr_by_name_command" + output=$(eval "$master_addr_by_name_command") + exit_code=$? + set_xtrace_when_ut_mode_false + + if [ $exit_code -eq 0 ]; then + read -r -d '' -a REDIS_SENTINEL_PRIMARY_INFO <<< "$output" + if [ "${#REDIS_SENTINEL_PRIMARY_INFO[@]}" -eq 2 ] && [ -n "${REDIS_SENTINEL_PRIMARY_INFO[0]}" ] && [ -n "${REDIS_SENTINEL_PRIMARY_INFO[1]}" ]; then + echo "Successfully retrieved primary info from syncer Fake Sentinel" + return 0 + fi + echo "Empty primary info retrieved from syncer Fake Sentinel" + return 1 + fi + + if [ $exit_code -eq 124 ]; then + echo "Timeout occurred while retrieving primary info from syncer Fake Sentinel. Retrying..." + else + echo "Error occurred while retrieving primary info from syncer Fake Sentinel. Retrying..." + fi + return 1 +} + +retry_get_master_addr_by_name_from_syncer() { + local max_retry="$1" + local retry_delay="$2" + if call_func_with_retry "$max_retry" "$retry_delay" get_master_addr_by_name_from_syncer; then + return 0 + fi + echo "Failed to retrieve primary info from syncer Fake Sentinel after $max_retry retries." + return 1 +} + +set_conservative_replicaof_target() { + primary="${SYNCER_CONSERVATIVE_REPLICAOF_HOST:-127.0.0.1}" + primary_port="${SYNCER_CONSERVATIVE_REPLICAOF_PORT:-1}" + echo "use conservative replicaof target: $primary $primary_port" +} + +is_redis_start_initialized() { + [ -f "$redis_start_initialized_file" ] +} + +mark_redis_start_initialized() { + mkdir -p "$(dirname "$redis_start_initialized_file")" 2>/dev/null || true + date +%s > "$redis_start_initialized_file" 2>/dev/null || true +} + +syncer_initial_bootstrap_default_primary_allowed() { + local dcs_leader_status + syncer_dcs_leader_status + dcs_leader_status=$? + + if [ "$dcs_leader_status" -eq 0 ]; then + echo "syncer DCS leader already exists, skip default bootstrap primary." + return 1 + fi + + if [ "$dcs_leader_status" -ne 1 ]; then + echo "syncer DCS leader status is unknown, skip default bootstrap primary." + return 1 + fi + + if is_redis_start_initialized; then + echo "redis start marker exists but syncer DCS leader is confirmed not found: $redis_start_initialized_file" + fi + + local min_lex_pod + min_lex_pod=$(min_lexicographical_order_pod "$REDIS_POD_NAME_LIST") + if equals "$CURRENT_POD_NAME" "$min_lex_pod"; then + echo "current pod $CURRENT_POD_NAME is default bootstrap primary and syncer DCS leader is not found." + return 0 + fi + + echo "current pod $CURRENT_POD_NAME is not default bootstrap primary: $min_lex_pod" + return 1 +} + +syncer_dcs_leader_status() { + local leader_configmap_name="${SYNCER_DCS_LEADER_CONFIGMAP_NAME:-${REDIS_COMPONENT_NAME}-leader}" + local query_timeout="${SYNCER_DCS_QUERY_TIMEOUT:-2}" + if command -v syncerctl >/dev/null 2>&1; then + syncerctl_dcs_leader_status "$leader_configmap_name" "$query_timeout" + return $? + fi + + echo "syncerctl is not available, fallback to python3 for syncer DCS leader status." + if ! command -v python3 >/dev/null 2>&1; then + echo "python3 is not available, syncer DCS leader status is unknown." + return 2 + fi + + timeout "$query_timeout" python3 - "$leader_configmap_name" <<'PY' +import json +import os +import ssl +import sys +import urllib.error +import urllib.request + +name = sys.argv[1] +host = os.environ.get("KUBERNETES_SERVICE_HOST") +port = os.environ.get("KUBERNETES_SERVICE_PORT", "443") +token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" +ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +namespace_path = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" +namespace = os.environ.get("CLUSTER_NAMESPACE") +try: + if not namespace: + with open(namespace_path, encoding="utf-8") as f: + namespace = f.read().strip() + with open(token_path, encoding="utf-8") as f: + token = f.read().strip() + if not host or not namespace or not token: + raise RuntimeError("missing kubernetes service account context") + + url = f"https://{host}:{port}/api/v1/namespaces/{namespace}/configmaps/{name}" + context = ssl.create_default_context(cafile=ca_path) + request = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"}) + with urllib.request.urlopen(request, context=context, timeout=2) as response: + obj = json.loads(response.read().decode("utf-8")) + leader = obj.get("metadata", {}).get("annotations", {}).get("leader", "") + if leader: + print(f"syncer DCS leader configmap {name} exists with leader {leader}.") + sys.exit(0) + print(f"syncer DCS leader configmap {name} exists but leader is empty.") + sys.exit(1) +except urllib.error.HTTPError as e: + if e.code == 404: + print(f"syncer DCS leader configmap {name} is not found.") + sys.exit(1) + print(f"failed to query syncer DCS leader configmap {name}: HTTP {e.code}") + sys.exit(2) +except Exception as e: + print(f"failed to query syncer DCS leader configmap {name}: {e}") + sys.exit(2) +PY + local exit_code=$? + if [ "$exit_code" -eq 124 ]; then + echo "query syncer DCS leader configmap $leader_configmap_name timed out, status is unknown." + return 2 + fi + if [ "$exit_code" -eq 0 ] || [ "$exit_code" -eq 1 ]; then + return "$exit_code" + fi + return 2 +} + +syncerctl_dcs_leader_status() { + local leader_configmap_name="$1" + local query_timeout="$2" + local namespace="${CLUSTER_NAMESPACE:-${KB_NAMESPACE:-${POD_NAMESPACE:-}}}" + local output + local exit_code + local args=(dcs-leader-status --configmap "$leader_configmap_name") + if [ -n "$namespace" ]; then + args+=(--namespace "$namespace") + fi + + if output=$(timeout "$query_timeout" syncerctl "${args[@]}" 2>&1); then + exit_code=0 + else + exit_code=$? + fi + echo "$output" + + if [ "$exit_code" -eq 124 ]; then + echo "query syncer DCS leader configmap $leader_configmap_name by syncerctl timed out, status is unknown." + return 2 + fi + if [ "$exit_code" -eq 0 ] || [ "$exit_code" -eq 1 ]; then + return "$exit_code" + fi + echo "syncerctl failed to query syncer DCS leader configmap $leader_configmap_name, status is unknown." + return 2 +} + get_default_initialize_primary_node() { # TODO: if has advertise svc and port, we should use it as default primary node info instead of the fqdn min_lex_pod=$(min_lexicographical_order_pod "$REDIS_POD_NAME_LIST") @@ -388,6 +609,7 @@ build_redis_conf() { build_replicaof_config rebuild_redis_acl_file build_redis_default_accounts + mark_redis_start_initialized } # This is magic for shellspec ut framework. diff --git a/addons/redis/scripts/redis-twemproxy-start.sh b/addons/redis/scripts/redis-twemproxy-start.sh new file mode 100644 index 000000000..0139109cc --- /dev/null +++ b/addons/redis/scripts/redis-twemproxy-start.sh @@ -0,0 +1,141 @@ +#!/bin/sh +set -eu + +NUTCRACKER_CONF="${NUTCRACKER_CONF:-/etc/proxy/nutcracker.conf}" +NUTCRACKER_PID_FILE="${NUTCRACKER_PID_FILE:-/tmp/nutcracker.pid}" +NUTCRACKER_ARGS="${NUTCRACKER_ARGS:-}" +SENTINEL_POLL_INTERVAL_SECONDS="${TWEMPROXY_SENTINEL_POLL_INTERVAL_SECONDS:-1}" +LAST_MASTER_ADDR="" +NUTCRACKER_PID="" + +normalize_master_addr() { + addr="$1" + host="${addr%:*}" + port="${addr##*:}" + case "$host" in + *.*) + resolved="$(getent hosts "$host" 2>/dev/null | awk 'NR == 1 {print $1}' || true)" + if [ -n "$resolved" ]; then + host="$resolved" + fi + ;; + esac + printf '%s:%s' "$host" "$port" +} + +first_value() { + value="${1:-}" + value="${value%%,*}" + case "$value" in + *:*) value="${value#*:}" ;; + esac + printf '%s' "$value" +} + +default_master_name() { + redis_service="$(first_value "${REDIS_SERVICE_NAMES:-}")" + if [ -n "$redis_service" ]; then + printf '%s' "${redis_service%-redis}" + fi +} + +query_sentinel_master() { + sentinel_host="$1" + sentinel_port="$2" + master_name="$3" + master_len=${#master_name} + + { + printf '*3\r\n' + printf '$8\r\nSENTINEL\r\n' + printf '$23\r\nget-master-addr-by-name\r\n' + printf '$%s\r\n%s\r\n' "$master_len" "$master_name" + } | nc -w 2 "$sentinel_host" "$sentinel_port" 2>/dev/null | awk ' + /^\$/ { + if (getline value) { + gsub(/\r/, "", value) + fields++ + if (fields == 1) { + host = value + } else if (fields == 2) { + print host ":" value + exit + } + } + } + ' +} + +start_nutcracker() { + echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) starting nutcracker" + # shellcheck disable=SC2086 + nutcracker -c "$NUTCRACKER_CONF" -v 4 -m 16384 -p "$NUTCRACKER_PID_FILE" $NUTCRACKER_ARGS & + NUTCRACKER_PID="$!" +} + +stop_nutcracker() { + if [ -n "${NUTCRACKER_PID:-}" ] && kill -0 "$NUTCRACKER_PID" 2>/dev/null; then + echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) stopping nutcracker pid=$NUTCRACKER_PID" + kill "$NUTCRACKER_PID" 2>/dev/null || true + wait "$NUTCRACKER_PID" 2>/dev/null || true + fi + NUTCRACKER_PID="" +} + +restart_nutcracker() { + stop_nutcracker + start_nutcracker +} + +shutdown() { + stop_nutcracker + exit 0 +} + +trap shutdown TERM INT + +sentinel_host="$(first_value "${REDIS_SENTINEL_SERVICE_HOSTS:-}")" +sentinel_port="$(first_value "${REDIS_SENTINEL_SERVICE_PORTS:-}")" +sentinel_master_name="${SENTINEL_MASTER_NAME:-$(default_master_name)}" + +if [ -z "$sentinel_host" ] || [ -z "$sentinel_port" ] || [ -z "$sentinel_master_name" ]; then + echo "Fake Sentinel service is not configured; starting nutcracker without master watcher" + # shellcheck disable=SC2086 + exec nutcracker -c "$NUTCRACKER_CONF" -v 4 -m 16384 $NUTCRACKER_ARGS +fi + +if ! command -v nc >/dev/null 2>&1; then + echo "nc is required for twemproxy master watcher but was not found" + exit 1 +fi + +echo "twemproxy master watcher enabled: sentinel=${sentinel_host}:${sentinel_port}, master=${sentinel_master_name}, interval=${SENTINEL_POLL_INTERVAL_SECONDS}s" +LAST_MASTER_ADDR="$(query_sentinel_master "$sentinel_host" "$sentinel_port" "$sentinel_master_name" || true)" +if [ -n "$LAST_MASTER_ADDR" ]; then + LAST_MASTER_ADDR="$(normalize_master_addr "$LAST_MASTER_ADDR")" +fi +echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) initial Fake Sentinel master=${LAST_MASTER_ADDR:-unknown}" + +start_nutcracker + +while true; do + if [ -n "${NUTCRACKER_PID:-}" ] && ! kill -0 "$NUTCRACKER_PID" 2>/dev/null; then + echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) nutcracker exited; restarting" + start_nutcracker + fi + + current_master_addr="$(query_sentinel_master "$sentinel_host" "$sentinel_port" "$sentinel_master_name" || true)" + if [ -n "$current_master_addr" ]; then + current_master_addr="$(normalize_master_addr "$current_master_addr")" + if [ -n "$LAST_MASTER_ADDR" ] && [ "$current_master_addr" != "$LAST_MASTER_ADDR" ]; then + echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Fake Sentinel master changed: ${LAST_MASTER_ADDR} -> ${current_master_addr}; restarting nutcracker" + LAST_MASTER_ADDR="$current_master_addr" + restart_nutcracker + echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) nutcracker restarted after master change" + else + LAST_MASTER_ADDR="$current_master_addr" + fi + fi + + sleep "$SENTINEL_POLL_INTERVAL_SECONDS" +done diff --git a/addons/redis/scripts/redis5-start.sh b/addons/redis/scripts/redis5-start.sh index 491b833a7..7bc722359 100755 --- a/addons/redis/scripts/redis5-start.sh +++ b/addons/redis/scripts/redis5-start.sh @@ -24,8 +24,7 @@ primary="" primary_port="6379" redis_template_conf="/etc/conf/redis.conf" redis_real_conf="/etc/redis/redis.conf" -redis_acl_file="/data/users.acl" -redis_acl_file_bak="/data/users.acl.bak" +redis_start_initialized_file="${REDIS_START_INITIALIZED_FILE:-/data/.kb_redis_start_initialized}" retry_times=3 retry_delay_second=2 @@ -107,9 +106,10 @@ build_replicaof_config() { init_or_get_primary_from_redis_sentinel() { # check redis sentinel component env if ! env_exist SENTINEL_COMPONENT_NAME; then - # return default primary node if redis sentinel component name is not set - echo "SENTINEL_COMPONENT_NAME env is not set, try to use default primary node." - get_default_initialize_primary_node + # In syncer-managed replication, there is no external Sentinel component. + # Avoid lexicographic bootstrap as primary for multi-replica clusters because + # it can emit a transient primary role before syncer has aligned with DCS. + init_or_get_primary_from_syncer return fi @@ -228,6 +228,225 @@ retry_get_master_addr_by_name_from_sentinel() { fi } +init_or_get_primary_from_syncer() { + local component_replicas="${COMPONENT_REPLICAS:-1}" + if ! [[ "$component_replicas" =~ ^[0-9]+$ ]]; then + component_replicas=1 + fi + + if [ "$component_replicas" -le 1 ]; then + echo "SENTINEL_COMPONENT_NAME env is not set and component has one replica, use default primary node." + get_default_initialize_primary_node + return + fi + + echo "SENTINEL_COMPONENT_NAME env is not set, try to get primary from syncer Fake Sentinel." + local syncer_retry_times="${SYNCER_SENTINEL_RETRY_TIMES:-6}" + local syncer_retry_delay_second="${SYNCER_SENTINEL_RETRY_DELAY_SECOND:-2}" + if retry_get_master_addr_by_name_from_syncer "$syncer_retry_times" "$syncer_retry_delay_second"; then + primary="${REDIS_SENTINEL_PRIMARY_INFO[0]}" + primary_port="${REDIS_SENTINEL_PRIMARY_INFO[1]}" + echo "syncer Fake Sentinel has master info: $primary $primary_port" + return + fi + + echo "syncer Fake Sentinel has no stable master info, start as conservative replica until syncer promotes or follows." + if syncer_initial_bootstrap_default_primary_allowed; then + echo "syncer has no master info and current pod is allowed to use default primary for initial bootstrap." + get_default_initialize_primary_node + return + fi + set_conservative_replicaof_target +} + +build_syncer_get_master_addr_by_name_command() { + local timeout_value="${SYNCER_SENTINEL_QUERY_TIMEOUT:-2}" + local syncer_sentinel_host="${SYNCER_SENTINEL_HOST:-127.0.0.1}" + local syncer_sentinel_port="${SYNCER_SENTINEL_PORT:-26379}" + echo "timeout $timeout_value redis-cli -h $syncer_sentinel_host -p $syncer_sentinel_port sentinel get-master-addr-by-name $REDIS_COMPONENT_NAME" +} + +get_master_addr_by_name_from_syncer() { + local master_addr_by_name_command + unset_xtrace_when_ut_mode_false + master_addr_by_name_command=$(build_syncer_get_master_addr_by_name_command) + echo "execute syncer get-master-addr-by-name command: $master_addr_by_name_command" + output=$(eval "$master_addr_by_name_command") + exit_code=$? + set_xtrace_when_ut_mode_false + + if [ $exit_code -eq 0 ]; then + read -r -d '' -a REDIS_SENTINEL_PRIMARY_INFO <<< "$output" + if [ "${#REDIS_SENTINEL_PRIMARY_INFO[@]}" -eq 2 ] && [ -n "${REDIS_SENTINEL_PRIMARY_INFO[0]}" ] && [ -n "${REDIS_SENTINEL_PRIMARY_INFO[1]}" ]; then + echo "Successfully retrieved primary info from syncer Fake Sentinel" + return 0 + fi + echo "Empty primary info retrieved from syncer Fake Sentinel" + return 1 + fi + + if [ $exit_code -eq 124 ]; then + echo "Timeout occurred while retrieving primary info from syncer Fake Sentinel. Retrying..." + else + echo "Error occurred while retrieving primary info from syncer Fake Sentinel. Retrying..." + fi + return 1 +} + +retry_get_master_addr_by_name_from_syncer() { + local max_retry="$1" + local retry_delay="$2" + if call_func_with_retry "$max_retry" "$retry_delay" get_master_addr_by_name_from_syncer; then + return 0 + fi + echo "Failed to retrieve primary info from syncer Fake Sentinel after $max_retry retries." + return 1 +} + +set_conservative_replicaof_target() { + primary="${SYNCER_CONSERVATIVE_REPLICAOF_HOST:-127.0.0.1}" + primary_port="${SYNCER_CONSERVATIVE_REPLICAOF_PORT:-1}" + echo "use conservative replicaof target: $primary $primary_port" +} + +is_redis_start_initialized() { + [ -f "$redis_start_initialized_file" ] +} + +mark_redis_start_initialized() { + mkdir -p "$(dirname "$redis_start_initialized_file")" 2>/dev/null || true + date +%s > "$redis_start_initialized_file" 2>/dev/null || true +} + +syncer_initial_bootstrap_default_primary_allowed() { + local dcs_leader_status + syncer_dcs_leader_status + dcs_leader_status=$? + + if [ "$dcs_leader_status" -eq 0 ]; then + echo "syncer DCS leader already exists, skip default bootstrap primary." + return 1 + fi + + if [ "$dcs_leader_status" -ne 1 ]; then + echo "syncer DCS leader status is unknown, skip default bootstrap primary." + return 1 + fi + + if is_redis_start_initialized; then + echo "redis start marker exists but syncer DCS leader is confirmed not found: $redis_start_initialized_file" + fi + + local min_lex_pod + min_lex_pod=$(min_lexicographical_order_pod "$REDIS_POD_NAME_LIST") + if equals "$CURRENT_POD_NAME" "$min_lex_pod"; then + echo "current pod $CURRENT_POD_NAME is default bootstrap primary and syncer DCS leader is not found." + return 0 + fi + + echo "current pod $CURRENT_POD_NAME is not default bootstrap primary: $min_lex_pod" + return 1 +} + +syncer_dcs_leader_status() { + local leader_configmap_name="${SYNCER_DCS_LEADER_CONFIGMAP_NAME:-${REDIS_COMPONENT_NAME}-leader}" + local query_timeout="${SYNCER_DCS_QUERY_TIMEOUT:-2}" + if command -v syncerctl >/dev/null 2>&1; then + syncerctl_dcs_leader_status "$leader_configmap_name" "$query_timeout" + return $? + fi + + echo "syncerctl is not available, fallback to python3 for syncer DCS leader status." + if ! command -v python3 >/dev/null 2>&1; then + echo "python3 is not available, syncer DCS leader status is unknown." + return 2 + fi + + timeout "$query_timeout" python3 - "$leader_configmap_name" <<'PY' +import json +import os +import ssl +import sys +import urllib.error +import urllib.request + +name = sys.argv[1] +host = os.environ.get("KUBERNETES_SERVICE_HOST") +port = os.environ.get("KUBERNETES_SERVICE_PORT", "443") +token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" +ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +namespace_path = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" +namespace = os.environ.get("CLUSTER_NAMESPACE") +try: + if not namespace: + with open(namespace_path, encoding="utf-8") as f: + namespace = f.read().strip() + with open(token_path, encoding="utf-8") as f: + token = f.read().strip() + if not host or not namespace or not token: + raise RuntimeError("missing kubernetes service account context") + + url = f"https://{host}:{port}/api/v1/namespaces/{namespace}/configmaps/{name}" + context = ssl.create_default_context(cafile=ca_path) + request = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"}) + with urllib.request.urlopen(request, context=context, timeout=2) as response: + obj = json.loads(response.read().decode("utf-8")) + leader = obj.get("metadata", {}).get("annotations", {}).get("leader", "") + if leader: + print(f"syncer DCS leader configmap {name} exists with leader {leader}.") + sys.exit(0) + print(f"syncer DCS leader configmap {name} exists but leader is empty.") + sys.exit(1) +except urllib.error.HTTPError as e: + if e.code == 404: + print(f"syncer DCS leader configmap {name} is not found.") + sys.exit(1) + print(f"failed to query syncer DCS leader configmap {name}: HTTP {e.code}") + sys.exit(2) +except Exception as e: + print(f"failed to query syncer DCS leader configmap {name}: {e}") + sys.exit(2) +PY + local exit_code=$? + if [ "$exit_code" -eq 124 ]; then + echo "query syncer DCS leader configmap $leader_configmap_name timed out, status is unknown." + return 2 + fi + if [ "$exit_code" -eq 0 ] || [ "$exit_code" -eq 1 ]; then + return "$exit_code" + fi + return 2 +} + +syncerctl_dcs_leader_status() { + local leader_configmap_name="$1" + local query_timeout="$2" + local namespace="${CLUSTER_NAMESPACE:-${KB_NAMESPACE:-${POD_NAMESPACE:-}}}" + local output + local exit_code + local args=(dcs-leader-status --configmap "$leader_configmap_name") + if [ -n "$namespace" ]; then + args+=(--namespace "$namespace") + fi + + if output=$(timeout "$query_timeout" syncerctl "${args[@]}" 2>&1); then + exit_code=0 + else + exit_code=$? + fi + echo "$output" + + if [ "$exit_code" -eq 124 ]; then + echo "query syncer DCS leader configmap $leader_configmap_name by syncerctl timed out, status is unknown." + return 2 + fi + if [ "$exit_code" -eq 0 ] || [ "$exit_code" -eq 1 ]; then + return "$exit_code" + fi + echo "syncerctl failed to query syncer DCS leader configmap $leader_configmap_name, status is unknown." + return 2 +} + get_default_initialize_primary_node() { # TODO: if has advertise svc and port, we should use it as default primary node info instead of the fqdn min_lex_pod=$(min_lexicographical_order_pod "$REDIS_POD_NAME_LIST") @@ -324,8 +543,9 @@ build_redis_conf() { build_announce_ip_and_port build_redis_service_port build_replicaof_config - rebuild_redis_acl_file + # Redis 5 has no ACL support; account config is handled by requirepass/masterauth. build_redis_default_accounts + mark_redis_start_initialized } # This is magic for shellspec ut framework. diff --git a/addons/redis/templates/_helpers.tpl b/addons/redis/templates/_helpers.tpl index 860669c43..ac9635510 100644 --- a/addons/redis/templates/_helpers.tpl +++ b/addons/redis/templates/_helpers.tpl @@ -55,6 +55,13 @@ Define redis component definition regular expression name prefix ^redis-\d+ {{- end -}} +{{/* +Define redis syncer component definition regular expression name prefix +*/}} +{{- define "redisSyncer.cmpdRegexpPattern" -}} +^redis-syncer-\d+ +{{- end -}} + {{/* Define redis 7.X component definition regular expression name prefix */}} @@ -122,6 +129,13 @@ Define redis component script template name redis-scripts-template-{{ .Chart.Version }} {{- end -}} +{{/* +Define redis sentinel component script template name +*/}} +{{- define "redisSentinel.scriptsTemplate" -}} +redis-sentinel-scripts-template-{{ .Chart.Version }} +{{- end -}} + {{/* Define redis cluster component script template name */}} @@ -129,6 +143,13 @@ Define redis cluster component script template name redis-cluster-scripts-template-{{ .Chart.Version }} {{- end -}} +{{/* +Define redis metrics config name +*/}} +{{- define "redis.metricsConfiguration" -}} +redis-metrics-config +{{- end -}} + {{- define "redis7.image" -}} {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository }}:{{ .Values.image.tag.major7.minor72 }} {{- end }} @@ -167,10 +188,26 @@ Generate scripts configmap */}} {{- define "redis.extend.scripts" -}} {{- range $path, $_ := $.Files.Glob "scripts/**" }} +{{- $name := $path | base }} +{{- if not (has $name (list "redis-sentinel-account-provision.sh" "redis-sentinel-member-join.sh" "redis-sentinel-member-leave.sh" "redis-sentinel-ping.sh" "redis-sentinel-post-start.sh" "redis-sentinel-start-v2.sh" "redis5-sentinel-start-v2.sh" "redis6-sentinel-post-start.sh")) }} {{ $path | base }}: |- {{- $.Files.Get $path | nindent 2 }} {{- end }} {{- end }} +{{- end }} + +{{/* +Generate scripts configmap for the independent redis-sentinel component. +*/}} +{{- define "redis-sentinel.extend.scripts" -}} +{{- range $path, $_ := $.Files.Glob "scripts/**" }} +{{- $name := $path | base }} +{{- if has $name (list "redis-sentinel-account-provision.sh" "redis-sentinel-member-join.sh" "redis-sentinel-member-leave.sh" "redis-sentinel-ping.sh" "redis-sentinel-post-start.sh" "redis-sentinel-start-v2.sh" "redis5-sentinel-start-v2.sh" "redis6-sentinel-post-start.sh") }} +{{ $path | base }}: |- +{{- $.Files.Get $path | nindent 2 }} +{{- end }} +{{- end }} +{{- end }} {{/* Generate scripts configmap @@ -186,23 +223,6 @@ redis-account.sh: |- {{- end }} {{- end }} -{{- define "redis.config.reconfigureAction" -}} -reconfigure: - exec: - container: redis - targetPodSelector: All - command: - - /bin/sh - - -c - - | - set -eu - - env | cut -d= -f1 | grep -E '^[a-z0-9_.-][a-z0-9_.-]*$' | sort -u | while IFS= read -r param; do - [ -n "${param}" ] || continue - /scripts/reload-parameter.sh "${param}" "$(printenv "${param}")" - done -{{- end -}} - {{- define "apeDts.reshard.image" -}} {{ .Values.image.apeDts.registry | default ( .Values.image.registry | default "docker.io" ) }}/{{ .Values.image.apeDts.repository}}:{{ .Values.image.apeDts.reshardTag }} {{- end }} diff --git a/addons/redis/templates/clusterdefinition.yaml b/addons/redis/templates/clusterdefinition.yaml index 05db3d219..1ec8f328c 100644 --- a/addons/redis/templates/clusterdefinition.yaml +++ b/addons/redis/templates/clusterdefinition.yaml @@ -29,28 +29,34 @@ spec: update: - redis-sentinel - redis + - name: replication-syncer + components: + - name: redis + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} + orders: + provision: + - redis + terminate: + - redis + update: + - redis - name: replication-twemproxy components: - name: redis - compDef: {{ include "redis.cmpdRegexpPattern" . }} - - name: redis-sentinel - compDef: {{ include "redisSentinel.cmpdRegexpPattern" . }} + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} - name: redis-twemproxy compDef: {{ include "redisTwemproxy.cmpdRegexpPattern" . }} orders: provision: - - redis-sentinel - redis - redis-twemproxy terminate: - redis-twemproxy - - redis-sentinel - redis update: - - redis-sentinel - redis-twemproxy - redis - name: cluster shardings: - name: shard - shardingDef: redis-cluster \ No newline at end of file + shardingDef: redis-cluster diff --git a/addons/redis/templates/cmpd-redis-sentinel.yaml b/addons/redis/templates/cmpd-redis-sentinel.yaml index ac1f3616f..7c0a71dd0 100644 --- a/addons/redis/templates/cmpd-redis-sentinel.yaml +++ b/addons/redis/templates/cmpd-redis-sentinel.yaml @@ -60,7 +60,7 @@ spec: {{- end }} scripts: - name: redis-scripts - template: {{ include "redis.scriptsTemplate" $ }} + template: {{ include "redisSentinel.scriptsTemplate" $ }} namespace: {{ $.Release.Namespace }} volumeName: scripts defaultMode: 0555 @@ -289,4 +289,4 @@ spec: - sh - -c - /scripts/redis-sentinel-ping.sh -{{- end }} \ No newline at end of file +{{- end }} diff --git a/addons/redis/templates/cmpd-redis-syncer.yaml b/addons/redis/templates/cmpd-redis-syncer.yaml new file mode 100644 index 000000000..ce51bbf9a --- /dev/null +++ b/addons/redis/templates/cmpd-redis-syncer.yaml @@ -0,0 +1,618 @@ +{{- range .Values.redisVersions }} +{{- $redisStartScripts := "redis-start.sh" }} +{{- if eq .major "5" }} + {{- $redisStartScripts = "redis5-start.sh" }} +{{- end }} +--- +apiVersion: apps.kubeblocks.io/v1 +kind: ComponentDefinition +metadata: + name: {{ printf "%s-%s" .syncerComponentDef $.Chart.Version }} + labels: + {{- include "redis.labels" $ | nindent 4 }} + annotations: + {{- include "redis.annotations" $ | nindent 4 }} +spec: + provider: kubeblocks + description: A Redis v{{ .major }}.0 syncer component definition for Kubernetes + serviceKind: redis + serviceVersion: {{ .serviceVersion }} + podManagementPolicy: OrderedReady + podUpgradePolicy: ReCreate + minReadySeconds: 10 + tls: + volumeName: tls + mountPath: {{ $.Values.tlsMountPath }} + caFile: ca.crt + certFile: tls.crt + keyFile: tls.key + services: + - name: redis + serviceName: redis + spec: + ports: + - name: redis + port: 6379 + targetPort: redis + roleSelector: primary + - name: redis-syncer-sentinel + serviceName: redis-syncer-sentinel + spec: + ports: + - name: redis-sentinel + port: 26379 + targetPort: redis-sentinel + - name: redis-advertised + serviceName: redis-advertised + spec: + ## the type can override in cluster componentSpec services, type can be NodePort, LoadBalancer(not support yet) + type: NodePort + ports: + - name: redis-advertised + port: 6379 + targetPort: redis + podService: true + disableAutoProvision: true + - name: redis-lb-advertised + serviceName: redis-lb-advertised + spec: + ## the type can override in cluster componentSpec services, type can loadBalancer + type: LoadBalancer + # allocateLoadBalancerNodePorts: false + externalTrafficPolicy: Cluster + ports: + - name: redis-advertised + port: 6379 + targetPort: redis + podService: true + disableAutoProvision: true + updateStrategy: BestEffortParallel + volumes: + - name: data + needSnapshot: true + roles: + - name: primary + updatePriority: 2 + participatesInQuorum: false + isExclusive: true + - name: secondary + updatePriority: 1 + participatesInQuorum: false + logConfigs: + {{- range $name,$pattern := $.Values.logConfigs }} + - name: {{ $name }} + filePathPattern: {{ $pattern }} + {{- end }} + exporter: + containerName: metrics + scrapePath: /metrics + scrapePort: http-metrics + configs: + - name: redis-replication-config + template: {{ printf "redis%s-config-template-%s" .major $.Chart.Version }} + namespace: {{ $.Release.Namespace }} + volumeName: redis-config + externalManaged: true + scripts: + - name: redis-scripts + template: {{ include "redis.scriptsTemplate" $ }} + namespace: {{ $.Release.Namespace }} + volumeName: scripts + defaultMode: 0555 + policyRules: + - apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - get + - list + - patch + - update + - delete + - apiGroups: + - "" + resources: + - events + verbs: + - create + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - apiGroups: + - apps.kubeblocks.io + resources: + - clusters + - components + verbs: + - get + systemAccounts: + - name: default + initAccount: true + passwordGenerationPolicy: + length: 10 + numDigits: 5 + numSymbols: 0 + letterCase: MixedCases + hostNetwork: + containerPorts: + - container: redis + ports: + - redis + {{- if $.Values.enableMetrics }} + - container: metrics + ports: + - http-metrics + - server-metrics + {{- end }} + vars: + {{- if eq .major "5" }} + - name: IS_REDIS5 + value: "true" + {{- else if eq .major "8" }} + - name: IS_REDIS8 + value: "true" + {{- end }} + ## the name of current cluster instance + - name: CLUSTER_NAME + valueFrom: + clusterVarRef: + clusterName: Required + - name: CLUSTER_NAMESPACE + valueFrom: + clusterVarRef: + namespace: Required + ## the default username of redis connection + - name: REDIS_DEFAULT_USER + valueFrom: + credentialVarRef: + name: default + username: Required + ## the default password of redis connection + - name: REDIS_DEFAULT_PASSWORD + valueFrom: + credentialVarRef: + name: default + password: Required + ## the username of redis primary-secondary replication + - name: REDIS_REPL_USER + value: "kbreplicator" + ## the password of redis primary-secondary replication shared the same password with default password + - name: REDIS_REPL_PASSWORD + valueFrom: + credentialVarRef: + name: default + password: Required + ## the username of redis for redis sentinel connection + - name: REDIS_SENTINEL_USER + value: "kbreplicator-sentinel" + ## the password of redis for redis sentinel connection shared the same password with default password + - name: REDIS_SENTINEL_PASSWORD + valueFrom: + credentialVarRef: + name: default + password: Required + - name: COMPONENT_REPLICAS + valueFrom: + componentVarRef: + optional: false + replicas: Required + ## the redis advertised service port list for each pod, the value format is "pod1Svc:nodeport1,pod2Svc:nodeport2,..." + - name: REDIS_ADVERTISED_PORT + valueFrom: + serviceVarRef: + name: redis-advertised + optional: true + port: + name: redis-advertised + option: Required + - name: REDIS_LB_ADVERTISED_PORT + valueFrom: + serviceVarRef: + name: redis-lb-advertised + optional: true + port: + name: redis-advertised + option: Required + ## the redis advertised service name list for each pod, the value format is "pod1Svc,pod2Svc,..." + - name: REDIS_LB_ADVERTISED_HOST + valueFrom: + serviceVarRef: + name: redis-lb-advertised + optional: true + loadBalancer: Required + host: Required + ## the redis pod name list for each pod, the value format is "pod1,pod2,..." + - name: REDIS_POD_NAME_LIST + valueFrom: + componentVarRef: + optional: false + podNames: Required + ## the redis pod fqdn list for each pod, the value format is "pod1FQDN,pod2FQDN,..." + - name: REDIS_POD_FQDN_LIST + valueFrom: + componentVarRef: + optional: false + podFQDNs: Required + ## the component name of redis, it's the fullname of redis component + - name: REDIS_COMPONENT_NAME + valueFrom: + componentVarRef: + optional: false + componentName: Required + ## the redis server host network port when using host network mode, the port will be allocated automatically by KubeBlocks + - name: REDIS_HOST_NETWORK_PORT + valueFrom: + hostNetworkVarRef: + optional: true + container: + name: redis + port: + name: redis + option: Required + - name: SERVICE_PORT + value: "6379" + expression: {{ `{{if index . "REDIS_HOST_NETWORK_PORT"}}{{.REDIS_HOST_NETWORK_PORT}}{{else}}{{.SERVICE_PORT}}{{end}}` | toYaml }} + {{- if $.Values.enableMetrics }} + ## the redis server metrics container host network port when using host network mode, the port will be allocated automatically by KubeBlocks, if not set, the default value is 9121 + - name: REDIS_METRICS_HOST_NETWORK_PORT + valueFrom: + hostNetworkVarRef: + optional: true + container: + name: metrics + port: + name: http-metrics + option: Required + - name: REDIS_METRICS_HTTP_PORT + value: "9121" + expression: {{ `{{if index . "REDIS_METRICS_HOST_NETWORK_PORT"}}{{.REDIS_METRICS_HOST_NETWORK_PORT}}{{else}}{{.REDIS_METRICS_HTTP_PORT}}{{end}}` | toYaml }} + ## the redis server metrics container host network server port when using host network mode, the port will be allocated automatically by KubeBlocks, if not set, the default value is 8888 + - name: REDIS_METRICS_HOST_NETWORK_SERVER_PORT + valueFrom: + hostNetworkVarRef: + optional: true + container: + name: metrics + port: + name: server-metrics + option: Required + - name: REDIS_METRICS_SERVER_PORT + value: "8888" + expression: {{ `{{if index . "REDIS_METRICS_HOST_NETWORK_SERVER_PORT"}}{{.REDIS_METRICS_HOST_NETWORK_SERVER_PORT}}{{else}}{{.REDIS_METRICS_SERVER_PORT}}{{end}}` | toYaml }} + - name: REDIS_METRICS_ADDR + value: "redis://localhost:$(SERVICE_PORT)" + expression: {{ `{{if eq (index . "TLS_ENABLED") "true"}}rediss://localhost:{{.SERVICE_PORT}}{{else}}redis://localhost:{{.SERVICE_PORT}}{{end}}` | toYaml }} + {{- end }} + - name: REDIS_CLI_TLS_CMD + value: "" + expression: {{ `{{if eq (index . "TLS_ENABLED") "true"}}--tls --insecure{{else}}{{end}}` | toYaml }} + ## the component name of redis sentinel when redis sentinel is enabled, it's the fullname of redis-sentinel component + - name: SENTINEL_COMPONENT_NAME + valueFrom: + componentVarRef: + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + optional: true + componentName: Required + ## the username of redis sentinel for redis connection, it is optional + - name: SENTINEL_USER + valueFrom: + credentialVarRef: + ## reference the redis-sentinel component definition name + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + name: default + optional: true + username: Required + - name: SENTINEL_HEADLESS_SERVICE_NAME + valueFrom: + serviceVarRef: + compDef: {{ .sentinelComponentDef }} + name: headless + optional: true + host: Optional + ## the password of redis sentinel for redis connection, it is optional + - name: SENTINEL_PASSWORD + valueFrom: + credentialVarRef: + ## reference the redis-sentinel component definition name + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + name: default + optional: true + password: Required + - name: SENTINEL_POD_NAME_LIST + valueFrom: + componentVarRef: + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + optional: true + podNames: Required + - name: SENTINEL_POD_FQDN_LIST + valueFrom: + componentVarRef: + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + optional: true + podFQDNs: Required + ## the redis sentinel server port, if redis sentinel is in host network mode, the port will be allocated automatically by KubeBlocks, if not, the default port is 26379 + - name: SENTINEL_SERVICE_PORT + valueFrom: + hostNetworkVarRef: + compDef: {{ printf "%s-%s" .sentinelComponentDef $.Chart.Version }} + optional: true + container: + name: redis-sentinel + port: + name: redis-sentinel + option: Required + expression: {{ `{{if index . "SENTINEL_SERVICE_PORT"}}{{.SENTINEL_SERVICE_PORT}}{{else}}26379{{end}}` | toYaml }} + - name: PHY_MEMORY + valueFrom: + resourceVarRef: + memoryLimit: Required + - name: TLS_ENABLED + valueFrom: + tlsVarRef: + enabled: Optional + optional: true + - name: TLS_MOUNT_PATH + value: {{ $.Values.tlsMountPath }} + lifecycleActions: + roleProbe: + periodSeconds: 1 + timeoutSeconds: 5 + exec: + env: + - name: CURRENT_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: CURRENT_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: KB_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: KB_POD_FQDN + value: "$(CURRENT_POD_NAME).$(REDIS_COMPONENT_NAME)-headless.$(CLUSTER_NAMESPACE).svc.{{ $.Values.clusterDomain }}" + - name: KB_CLUSTER_COMP_NAME + value: $(REDIS_COMPONENT_NAME) + - name: KB_SERVICE_PORT + value: "$(SERVICE_PORT)" + container: redis + command: + - /scripts/redis-role-probe.sh + postProvision: + exec: + container: redis + command: + - /bin/bash + - -c + - /scripts/redis-register-to-sentinel.sh > /tmp/post-provision.log 2>&1 + targetPodSelector: Role + matchingKey: primary + ## all lifecycle actions share the same env + env: + - name: CURRENT_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: CURRENT_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: CURRENT_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: CURRENT_POD_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + preCondition: RuntimeReady + accountProvision: + exec: + container: redis + command: + - sh + - -c + - /scripts/redis-account-provision.sh + switchover: + timeoutSeconds: 900 + exec: + container: redis + command: + - /bin/bash + - -c + - /tools/syncerctl switchover --primary "$KB_SWITCHOVER_CURRENT_NAME" ${KB_SWITCHOVER_CANDIDATE_NAME:+--candidate "$KB_SWITCHOVER_CANDIDATE_NAME"} > /tmp/switchover.log 2>&1 + memberJoin: + exec: + container: redis + command: + - /bin/bash + - -c + - /scripts/sync-acl.sh + targetPodSelector: Any + runtime: + initContainers: + - name: init-syncer + command: + - cp + - -r + - /bin/syncer + - /bin/syncerctl + - /tools/ + imagePullPolicy: {{ default "IfNotPresent" $.Values.image.pullPolicy }} + volumeMounts: + - mountPath: /tools + name: tools + - name: init-dbctl + command: + - cp + - -r + - /bin/dbctl + - /tools/ + imagePullPolicy: {{ default "IfNotPresent" $.Values.dbctlImage.pullPolicy }} + volumeMounts: + - mountPath: /tools + name: tools + containers: + - name: redis + imagePullPolicy: {{ default "IfNotPresent" $.Values.image.pullPolicy }} + command: + - syncer + - --port + - "3601" + - -- + - /scripts/{{ $redisStartScripts }} + ports: + - name: redis + containerPort: 6379 + - name: redis-sentinel + containerPort: 26379 + - name: ha + containerPort: 3601 + volumeMounts: + - name: data + mountPath: {{ $.Values.dataMountPath }} + - name: redis-config + mountPath: /etc/conf + - name: scripts + mountPath: /scripts + - name: redis-conf + mountPath: /etc/redis + - mountPath: /tools + name: tools + env: + - name: PATH + value: /tools/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - name: KB_CLUSTER_NAME + value: "$(CLUSTER_NAME)" + - name: KB_COMP_NAME + value: redis + - name: KB_CLUSTER_COMP_NAME + value: "$(REDIS_COMPONENT_NAME)" + - name: KB_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: KB_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: KB_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: COMPONENT_NAME + value: redis + - name: CLUSTER_COMPONENT_NAME + value: "$(REDIS_COMPONENT_NAME)" + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: KB_SERVICE_CHARACTER_TYPE + value: redis + - name: KB_ENGINE_TYPE + value: redis + - name: KB_SERVICE_PORT + value: "$(SERVICE_PORT)" + - name: KB_SERVICE_PASSWORD + value: "$(REDIS_DEFAULT_PASSWORD)" + - name: SENTINEL_ENABLED + value: "true" + - name: SENTINEL_PORT + value: "26379" + - name: SENTINEL_MASTER_NAME + value: "$(REDIS_COMPONENT_NAME)" + - name: CURRENT_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: CURRENT_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: CURRENT_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: CURRENT_POD_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: POD_FQDN + value: "$(CURRENT_POD_NAME).$(REDIS_COMPONENT_NAME)-headless.$(CLUSTER_NAMESPACE).svc.{{ $.Values.clusterDomain }}" + readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + exec: + command: + - sh + - -c + - /scripts/redis-ping.sh + lifecycle: + preStop: + exec: + command: + - /bin/bash + - -c + - /scripts/redis-pre-stop.sh + {{- if $.Values.enableMetrics }} + - name: metrics + imagePullPolicy: {{ $.Values.metrics.image.pullPolicy | quote }} + securityContext: + runAsNonRoot: true + runAsUser: 1001 + env: + - name: REDIS_ADDR + value: "$(REDIS_METRICS_ADDR)" + - name: REDIS_EXPORTER_WEB_LISTEN_ADDRESS + value: "0.0.0.0:$(REDIS_METRICS_HTTP_PORT)" + - name: REDIS_USER + value: $(REDIS_DEFAULT_USER) + - name: REDIS_PASSWORD + value: $(REDIS_DEFAULT_PASSWORD) + - name: REDIS_EXPORTER_IS_CLUSTER + value: "false" + - name: REDIS_EXPORTER_SKIP_TLS_VERIFICATION + value: "true" + ports: + - name: http-metrics + containerPort: {{ $.Values.metrics.service.port }} + - name: server-metrics + containerPort: {{ $.Values.metrics.service.serverPort }} + {{- end }} +{{- end }} diff --git a/addons/redis/templates/cmpd-redis-twemproxy.yaml b/addons/redis/templates/cmpd-redis-twemproxy.yaml index 69a3423a8..555b98d76 100644 --- a/addons/redis/templates/cmpd-redis-twemproxy.yaml +++ b/addons/redis/templates/cmpd-redis-twemproxy.yaml @@ -36,7 +36,7 @@ spec: - name: REDIS_SERVICE_NAMES valueFrom: serviceVarRef: - compDef: {{ include "redis.cmpdRegexpPattern" . }} + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} name: redis optional: false host: Required @@ -46,7 +46,7 @@ spec: - name: REDIS_SERVICE_PORTS valueFrom: serviceVarRef: - compDef: {{ include "redis.cmpdRegexpPattern" . }} + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} name: redis optional: false port: @@ -54,11 +54,31 @@ spec: option: Required multipleClusterObjectOption: strategy: combined + - name: REDIS_SENTINEL_SERVICE_HOSTS + valueFrom: + serviceVarRef: + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} + name: redis-syncer-sentinel + optional: true + host: Required + multipleClusterObjectOption: + strategy: combined + - name: REDIS_SENTINEL_SERVICE_PORTS + valueFrom: + serviceVarRef: + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} + name: redis-syncer-sentinel + optional: true + port: + name: redis-sentinel + option: Required + multipleClusterObjectOption: + strategy: combined ## the default username of all redis components, it will generate a set of variables with the component name suffix like "REDIS_DEFAULT_USER_REDIS0", "REDIS_DEFAULT_USER_REDIS1" - name: REDIS_DEFAULT_USER valueFrom: credentialVarRef: - compDef: {{ include "redis.cmpdRegexpPattern" . }} + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} name: default username: Required multipleClusterObjectOption: @@ -67,7 +87,7 @@ spec: - name: REDIS_DEFAULT_PASSWORD valueFrom: credentialVarRef: - compDef: {{ include "redis.cmpdRegexpPattern" . }} + compDef: {{ include "redisSyncer.cmpdRegexpPattern" . }} name: default password: Required multipleClusterObjectOption: @@ -91,9 +111,7 @@ spec: - name: redis-twemproxy imagePullPolicy: {{ default .Values.redisTwemproxyImage.pullPolicy "IfNotPresent" }} command: - - sh - - -c - - nutcracker -c /etc/proxy/nutcracker.conf -v 4 -m 16384 + - /scripts/redis-twemproxy-start.sh env: - name: CURRENT_POD_NAME valueFrom: @@ -122,4 +140,3 @@ spec: name: proxy-conf - mountPath: /scripts name: scripts - diff --git a/addons/redis/templates/cmpd-redis.yaml b/addons/redis/templates/cmpd-redis.yaml index a4d3c3f54..b9e6ae195 100644 --- a/addons/redis/templates/cmpd-redis.yaml +++ b/addons/redis/templates/cmpd-redis.yaml @@ -86,7 +86,6 @@ spec: namespace: {{ $.Release.Namespace }} volumeName: redis-config externalManaged: true - {{- include "redis.config.reconfigureAction" $ | nindent 6 }} scripts: - name: redis-scripts template: {{ include "redis.scriptsTemplate" $ }} @@ -384,6 +383,7 @@ spec: - -c - /scripts/redis-account-provision.sh switchover: + timeoutSeconds: 300 exec: container: redis command: diff --git a/addons/redis/templates/cmpv-redis.yaml b/addons/redis/templates/cmpv-redis.yaml index d388fd9c3..fc75d28ed 100644 --- a/addons/redis/templates/cmpv-redis.yaml +++ b/addons/redis/templates/cmpv-redis.yaml @@ -11,6 +11,7 @@ spec: {{- range .Values.redisVersions }} - compDefs: - {{ .componentDef }} + - {{ .syncerComponentDef }} releases: {{- range .mirrorVersions }} - {{ .version }} @@ -27,9 +28,10 @@ spec: redis: {{ $redisRepository }}:{{ .imageTag }} metrics: {{ include "metrics.repository" $ }}:v1.80.1 init-dbctl: {{ $.Values.dbctlImage.registry | default ( $.Values.image.registry | default "docker.io" ) }}/{{ $.Values.dbctlImage.repository }}:{{ $.Values.dbctlImage.tag }} + init-syncer: {{ $.Values.image.registry | default "docker.io" }}/{{ $.Values.image.syncer.repository }}:{{ $.Values.image.syncer.tag }} postProvision: {{ $redisRepository }}:{{ .imageTag }} accountProvision: {{ $redisRepository }}:{{ .imageTag }} switchover: {{ $redisRepository }}:{{ .imageTag }} memberJoin: {{ $redisRepository }}:{{ .imageTag }} {{- end }} - {{- end }} \ No newline at end of file + {{- end }} diff --git a/addons/redis/templates/pcr-redis.yaml b/addons/redis/templates/pcr-redis.yaml new file mode 100644 index 000000000..b0ebc3b4b --- /dev/null +++ b/addons/redis/templates/pcr-redis.yaml @@ -0,0 +1,45 @@ +{{- range .Values.redisVersions }} +--- +apiVersion: parameters.kubeblocks.io/v1alpha1 +kind: ParamConfigRenderer +metadata: + name: {{ printf "redis%s-pcr-%s" .major $.Chart.Version }} + labels: + {{- include "redis.labels" $ | nindent 4 }} + annotations: + {{- include "redis.annotations" $ | nindent 4 }} +spec: + componentDef: {{ printf "%s-%s" .componentDef $.Chart.Version }} + parametersDefs: + - {{ printf "redis%s-config-pd" .major }} + + configs: + - name: redis.conf + fileFormatConfig: + format: redis + reRenderResourceTypes: + - vscale +{{- end }} +{{- range .Values.redisVersions }} +--- +apiVersion: parameters.kubeblocks.io/v1alpha1 +kind: ParamConfigRenderer +metadata: + name: {{ printf "redis-syncer%s-pcr-%s" .major $.Chart.Version }} + labels: + {{- include "redis.labels" $ | nindent 4 }} + annotations: + {{- include "redis.annotations" $ | nindent 4 }} +spec: + componentDef: {{ printf "%s-%s" .syncerComponentDef $.Chart.Version }} + parametersDefs: + - {{ printf "redis%s-config-pd" .major }} + + configs: + - name: redis.conf + templateName: redis-replication-config + fileFormatConfig: + format: redis + reRenderResourceTypes: + - vscale +{{- end }} diff --git a/addons/redis/templates/redis-sentinel-scripts-template.yaml b/addons/redis/templates/redis-sentinel-scripts-template.yaml new file mode 100644 index 000000000..57142df74 --- /dev/null +++ b/addons/redis/templates/redis-sentinel-scripts-template.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "redisSentinel.scriptsTemplate" . }} + labels: + {{- include "redis.labels" . | nindent 4 }} + annotations: + {{- include "redis.annotations" . | nindent 4 }} +data: + common.sh: |- + #!/bin/bash + {{- include "kblib.commons.call_func_with_retry" $ | nindent 4 }} + {{- include "kblib.commons.extract_obj_ordinal" $ | nindent 4 }} + {{- include "kblib.compvars.get_target_pod_fqdn_from_pod_fqdn_vars" $ | nindent 4 }} + {{- include "kblib.pods.min_lexicographical_order_pod" $ | nindent 4 }} + {{- include "kblib.ututils.set_xtrace_when_ut_mode_false" $ | nindent 4 }} + {{- include "kblib.ututils.unset_xtrace_when_ut_mode_false" $ | nindent 4 }} + {{- include "kblib.ututils.sleep_when_ut_mode_false" $ | nindent 4 }} + {{- include "kblib.strings.contains" $ | nindent 4 }} + {{- include "kblib.strings.is_empty" $ | nindent 4 }} + {{- include "kblib.strings.equals" $ | nindent 4 }} + {{- include "kblib.strings.split" $ | nindent 4 }} + {{- include "kblib.envs.env_exist" $ | nindent 4 }} + {{- include "kblib.envs.env_exists" $ | nindent 4 }} + {{- with include "redis-sentinel.extend.scripts" . }} + {{- . | nindent 2 }} + {{- end }} diff --git a/addons/redis/values.yaml b/addons/redis/values.yaml index 415208d24..cab95e1c8 100644 --- a/addons/redis/values.yaml +++ b/addons/redis/values.yaml @@ -24,6 +24,7 @@ defaultServiceVersion: redisVersions: - major: "5" componentDef: "redis-5" + syncerComponentDef: "redis-syncer-5" clusterComponentDef: "redis-cluster-5" serviceVersion: "5.0.12" sentinelComponentDef: "redis-sentinel-5" @@ -34,6 +35,7 @@ redisVersions: imageTag: "5.0.12" - major: "6" componentDef: "redis-6" + syncerComponentDef: "redis-syncer-6" serviceVersion: "6.2.17" clusterComponentDef: "redis-cluster-6" sentinelComponentDef: "redis-sentinel-6" @@ -43,16 +45,15 @@ redisVersions: - version: "6.2.14" imageTag: "6.2.6-v10" - version: "6.2.17" - imageTag: "6.2.6-v18" - - version: "6.2.18" imageTag: "6.2.6-v19" - - version: "6.2.19" + - version: "6.2.18" imageTag: "6.2.6-v20" - version: "6.2.22" imageTag: "6.2.22" repository: "redis" - major: "7" componentDef: "redis-7" + syncerComponentDef: "redis-syncer-7" serviceVersion: "7.2.11" clusterComponentDef: "redis-cluster-7" sentinelComponentDef: "redis-sentinel-7" @@ -87,6 +88,7 @@ redisVersions: repository: "redis" - major: "8" componentDef: "redis-8" + syncerComponentDef: "redis-syncer-8" serviceVersion: "8.2.2" clusterComponentDef: "redis-cluster-8" sentinelComponentDef: "redis-sentinel-8" @@ -134,6 +136,9 @@ image: repository: apecloud/ape-dts tag: 2.0.26-alpha.16 reshardTag: 2.0.26-alpha.16 + syncer: + repository: apecloud/syncer + tag: 0.1.0-alpha.1 ceImage: registry: @@ -190,4 +195,4 @@ clusterDomain: "cluster.local" enableMetrics: true -tlsMountPath: /etc/pki/tls \ No newline at end of file +tlsMountPath: /etc/pki/tls diff --git a/examples/redis/README.md b/examples/redis/README.md index 8d5ce34a1..c32f63218 100644 --- a/examples/redis/README.md +++ b/examples/redis/README.md @@ -678,7 +678,7 @@ To create a redis with a proxy (Twemproxy) in front of it: kubectl apply -f examples/redis/cluster-twemproxy.yaml ``` -A cluster named `redis-twemproxy` will be created with three components, one for Redis (2 replicas), one for Sentinel (3 replicas), and one for twemproxy (3 replicas). +A cluster named `redis-twemproxy` will be created with two components: Redis (2 replicas, HA managed by syncer) and twemproxy (3 replicas). ```yaml # snippet of cluster.yaml @@ -687,10 +687,9 @@ kind: Cluster spec: terminationPolicy: Delete clusterDef: redis - topology: replication-twemproxy # set topology to standalone + topology: replication-twemproxy # redis HA is managed by syncer componentSpecs: - name: redis - - name: redis-sentinel - name: redis-twemproxy # add one componet on provisioniing: twemproxy replicas: 3 # set the desired number of replicas for twemproxy resources: diff --git a/examples/redis/cluster-syncer.yaml b/examples/redis/cluster-syncer.yaml new file mode 100644 index 000000000..f41b4c12e --- /dev/null +++ b/examples/redis/cluster-syncer.yaml @@ -0,0 +1,30 @@ +apiVersion: apps.kubeblocks.io/v1 +kind: Cluster +metadata: + name: redis-syncer + namespace: demo +spec: + terminationPolicy: Delete + clusterDef: redis + topology: replication-syncer + componentSpecs: + - name: redis + serviceVersion: "7.2.4" + disableExporter: false + replicas: 2 + resources: + limits: + cpu: "0.5" + memory: 0.5Gi + requests: + cpu: "0.5" + memory: 0.5Gi + volumeClaimTemplates: + - name: data + spec: + storageClassName: "" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/examples/redis/cluster-twemproxy.yaml b/examples/redis/cluster-twemproxy.yaml index b0f474c76..6f7fce123 100644 --- a/examples/redis/cluster-twemproxy.yaml +++ b/examples/redis/cluster-twemproxy.yaml @@ -6,7 +6,7 @@ metadata: spec: terminationPolicy: Delete clusterDef: redis - topology: replication-twemproxy # set topology to standalone + topology: replication-twemproxy # redis HA is managed by syncer; no redis-sentinel component componentSpecs: - name: redis replicas: 2 @@ -26,23 +26,6 @@ spec: resources: requests: storage: 20Gi - - name: redis-sentinel - replicas: 3 - resources: - limits: - cpu: "0.2" - memory: "0.2Gi" - requests: - cpu: "0.2" - memory: "0.2Gi" - volumeClaimTemplates: - - name: data - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi - name: redis-twemproxy # add one componet on provisioniing: twemproxy replicas: 3 resources: @@ -51,4 +34,4 @@ spec: memory: "0.2Gi" requests: cpu: "0.2" - memory: "0.2Gi" \ No newline at end of file + memory: "0.2Gi" diff --git a/examples/redis/test/client_compatibility.sh b/examples/redis/test/client_compatibility.sh new file mode 100755 index 000000000..6bb8667aa --- /dev/null +++ b/examples/redis/test/client_compatibility.sh @@ -0,0 +1,443 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Run Redis client compatibility checks against Redis twemproxy and syncer Fake Sentinel. + +Usage: + client_compatibility.sh --namespace --cluster [options] + +Options: + --namespace Kubernetes namespace. + --cluster KubeBlocks Cluster name. + --mode + Test scope. Default: all. + --twemproxy-host Twemproxy service host. + Default: -redis-twemproxy-twemproxy. + --twemproxy-port Twemproxy service port. Default: 22121. + --sentinel-host syncer Fake Sentinel service host. + Default: -redis-redis-syncer-sentinel. + --sentinel-port syncer Fake Sentinel service port. Default: 26379. + --master-name Sentinel master name. + Default: -redis. + --password-secret Redis default account secret. + Default: -redis-account-default. + --password-key Password key in secret. Default: password. + --timeout Per job timeout. Default: 180. + --keep-jobs Do not delete completed test jobs. + +What it covers: + - Twemproxy direct access: redis-cli, redis-py, ioredis, go-redis. + - Fake Sentinel discovery + master write: redis-cli, redis-py Sentinel, + ioredis Sentinel, go-redis FailoverClient. + +Examples: + ./examples/redis/test/client_compatibility.sh \ + --namespace redis-twemproxy-syncer --cluster k2so-rt + + ./examples/redis/test/client_compatibility.sh \ + --namespace redis-ti-r2 --cluster r2ti --mode sentinel +EOF +} + +NAMESPACE="" +CLUSTER="" +MODE="all" +TWEMPROXY_HOST="" +TWEMPROXY_PORT="22121" +SENTINEL_HOST="" +SENTINEL_PORT="26379" +MASTER_NAME="" +PASSWORD_SECRET="" +PASSWORD_KEY="password" +TIMEOUT_SECONDS="180" +KEEP_JOBS="false" + +while [ "$#" -gt 0 ]; do + case "$1" in + --namespace) + NAMESPACE="$2"; shift 2 ;; + --cluster) + CLUSTER="$2"; shift 2 ;; + --mode) + MODE="$2"; shift 2 ;; + --twemproxy-host) + TWEMPROXY_HOST="$2"; shift 2 ;; + --twemproxy-port) + TWEMPROXY_PORT="$2"; shift 2 ;; + --sentinel-host) + SENTINEL_HOST="$2"; shift 2 ;; + --sentinel-port) + SENTINEL_PORT="$2"; shift 2 ;; + --master-name) + MASTER_NAME="$2"; shift 2 ;; + --password-secret) + PASSWORD_SECRET="$2"; shift 2 ;; + --password-key) + PASSWORD_KEY="$2"; shift 2 ;; + --timeout) + TIMEOUT_SECONDS="$2"; shift 2 ;; + --keep-jobs) + KEEP_JOBS="true"; shift ;; + -h|--help) + usage; exit 0 ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 2 ;; + esac +done + +if [ -z "$NAMESPACE" ] || [ -z "$CLUSTER" ]; then + usage >&2 + exit 2 +fi + +case "$MODE" in + all|twemproxy|sentinel) ;; + *) + echo "invalid --mode: $MODE" >&2 + exit 2 ;; +esac + +TWEMPROXY_HOST="${TWEMPROXY_HOST:-${CLUSTER}-redis-twemproxy-twemproxy}" +SENTINEL_HOST="${SENTINEL_HOST:-${CLUSTER}-redis-redis-syncer-sentinel}" +PASSWORD_SECRET="${PASSWORD_SECRET:-${CLUSTER}-redis-account-default}" +MASTER_NAME="${MASTER_NAME:-${CLUSTER}-redis}" +RUN_ID="$(date +%Y%m%d%H%M%S)-$RANDOM" +JOB_PREFIX="redis-client-compat-$RUN_ID" +WORK_DIR="$(mktemp -d)" +FAILED_CLIENTS="" + +cleanup() { + rm -rf "$WORK_DIR" + if [ "$KEEP_JOBS" != "true" ]; then + kubectl -n "$NAMESPACE" delete job -l "redis-client-compat-run=$RUN_ID" --ignore-not-found >/dev/null 2>&1 || true + fi +} +trap cleanup EXIT + +wait_job() { + local job="$1" + local log_file="${WORK_DIR}/${job}.log" + + echo "==> waiting for $job" + if ! kubectl -n "$NAMESPACE" wait --for=condition=complete "job/$job" --timeout="${TIMEOUT_SECONDS}s"; then + echo "job $job did not complete; collecting logs and status" >&2 + kubectl -n "$NAMESPACE" get job "$job" -o wide >&2 || true + kubectl -n "$NAMESPACE" describe job "$job" >&2 || true + kubectl -n "$NAMESPACE" logs "job/$job" --all-containers=true --tail=-1 >&2 || true + return 1 + fi + + kubectl -n "$NAMESPACE" logs "job/$job" --all-containers=true --tail=-1 | tee "$log_file" +} + +apply_job() { + local job="$1" + local image="$2" + local script="$3" + + kubectl -n "$NAMESPACE" apply -f - < "$file" <<'EOF' +key="kb:compat:redis-cli:$(date +%s)" +if [ "${TEST_TWEMPROXY:-true}" = "true" ]; then + redis-cli -h "$TWEMPROXY_HOST" -p "$TWEMPROXY_PORT" -a "$REDIS_PASSWORD" --no-auth-warning SET "$key:tw" "twemproxy-ok" + value="$(redis-cli -h "$TWEMPROXY_HOST" -p "$TWEMPROXY_PORT" -a "$REDIS_PASSWORD" --no-auth-warning GET "$key:tw")" + test "$value" = "twemproxy-ok" + echo "redis-cli twemproxy OK" +fi + +if [ "${TEST_SENTINEL:-true}" = "true" ]; then + master="$(redis-cli -h "$SENTINEL_HOST" -p "$SENTINEL_PORT" --raw SENTINEL get-master-addr-by-name "$MASTER_NAME" | tr '\n' ' ')" + set -- $master + host="$1" + port="$2" + test -n "$host" + test -n "$port" + redis-cli -h "$host" -p "$port" -a "$REDIS_PASSWORD" --no-auth-warning SET "$key:sentinel" "sentinel-ok" + value="$(redis-cli -h "$host" -p "$port" -a "$REDIS_PASSWORD" --no-auth-warning GET "$key:sentinel")" + test "$value" = "sentinel-ok" + echo "redis-cli fake sentinel OK: $host:$port" +fi +EOF +} + +create_python_script() { + local file="$1" + cat > "$file" <<'EOF' +pip install --quiet 'redis>=5,<6' +python - <<'PY' +import os +import time +import redis +from redis.sentinel import Sentinel + +key = f"kb:compat:redis-py:{int(time.time())}" + +if os.environ.get("TEST_TWEMPROXY", "true") == "true": + r = redis.Redis( + host=os.environ["TWEMPROXY_HOST"], + port=int(os.environ["TWEMPROXY_PORT"]), + password=os.environ["REDIS_PASSWORD"], + socket_connect_timeout=10, + socket_timeout=10, + decode_responses=True, + ) + r.set(key + ":tw", "twemproxy-ok") + assert r.get(key + ":tw") == "twemproxy-ok" + print("redis-py twemproxy OK") + +if os.environ.get("TEST_SENTINEL", "true") == "true": + sentinel = Sentinel( + [(os.environ["SENTINEL_HOST"], int(os.environ["SENTINEL_PORT"]))], + socket_timeout=10, + decode_responses=True, + ) + master = sentinel.master_for( + os.environ.get("MASTER_NAME", "redis"), + password=os.environ["REDIS_PASSWORD"], + socket_timeout=10, + decode_responses=True, + ) + master.set(key + ":sentinel", "sentinel-ok") + assert master.get(key + ":sentinel") == "sentinel-ok" + print("redis-py fake sentinel OK") +PY +EOF +} + +create_node_script() { + local file="$1" + cat > "$file" <<'EOF' +cd /tmp +npm init -y >/dev/null +npm install ioredis@5 +node - <<'NODE' +const Redis = require("ioredis"); +const key = `kb:compat:ioredis:${Date.now()}`; + +async function main() { + if (process.env.TEST_TWEMPROXY !== "false") { + const direct = new Redis({ + host: process.env.TWEMPROXY_HOST, + port: Number(process.env.TWEMPROXY_PORT), + password: process.env.REDIS_PASSWORD, + lazyConnect: true, + connectTimeout: 10000, + maxRetriesPerRequest: 1, + }); + await direct.connect(); + await direct.set(`${key}:tw`, "twemproxy-ok"); + const value = await direct.get(`${key}:tw`); + if (value !== "twemproxy-ok") throw new Error(`bad twemproxy value: ${value}`); + await direct.quit(); + console.log("ioredis twemproxy OK"); + } + + if (process.env.TEST_SENTINEL !== "false") { + const sentinel = new Redis({ + sentinels: [{ host: process.env.SENTINEL_HOST, port: Number(process.env.SENTINEL_PORT) }], + name: process.env.MASTER_NAME || "redis", + password: process.env.REDIS_PASSWORD, + sentinelRetryStrategy: (times) => (times > 1 ? null : 100), + connectTimeout: 10000, + maxRetriesPerRequest: 1, + }); + sentinel.on("error", (err) => console.error("ioredis error", err)); + await sentinel.set(`${key}:sentinel`, "sentinel-ok"); + const value = await sentinel.get(`${key}:sentinel`); + if (value !== "sentinel-ok") throw new Error(`bad sentinel value: ${value}`); + await sentinel.quit(); + console.log("ioredis fake sentinel OK"); + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +NODE +EOF +} + +create_go_script() { + local file="$1" + cat > "$file" <<'EOF' +mkdir -p /tmp/redis-client-compat +cd /tmp/redis-client-compat +cat > go.mod <<'GOMOD' +module redis-client-compat + +go 1.24 + +require github.com/redis/go-redis/v9 v9.12.1 +GOMOD + +cat > main.go <<'GO' +package main + +import ( + "context" + "fmt" + "os" + "strconv" + "time" + + "github.com/redis/go-redis/v9" +) + +func mustPort(name string) int { + v, err := strconv.Atoi(os.Getenv(name)) + if err != nil { + panic(err) + } + return v +} + +func main() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + key := fmt.Sprintf("kb:compat:go-redis:%d", time.Now().UnixNano()) + + if os.Getenv("TEST_TWEMPROXY") != "false" { + direct := redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:%d", os.Getenv("TWEMPROXY_HOST"), mustPort("TWEMPROXY_PORT")), + Password: os.Getenv("REDIS_PASSWORD"), + }) + if err := direct.Set(ctx, key+":tw", "twemproxy-ok", 0).Err(); err != nil { + panic(err) + } + value, err := direct.Get(ctx, key+":tw").Result() + if err != nil || value != "twemproxy-ok" { + panic(fmt.Sprintf("bad twemproxy value=%q err=%v", value, err)) + } + _ = direct.Close() + fmt.Println("go-redis twemproxy OK") + } + + if os.Getenv("TEST_SENTINEL") != "false" { + failover := redis.NewFailoverClient(&redis.FailoverOptions{ + MasterName: os.Getenv("MASTER_NAME"), + SentinelAddrs: []string{fmt.Sprintf("%s:%d", os.Getenv("SENTINEL_HOST"), mustPort("SENTINEL_PORT"))}, + Password: os.Getenv("REDIS_PASSWORD"), + }) + if err := failover.Set(ctx, key+":sentinel", "sentinel-ok", 0).Err(); err != nil { + panic(err) + } + value, err := failover.Get(ctx, key+":sentinel").Result() + if err != nil || value != "sentinel-ok" { + panic(fmt.Sprintf("bad sentinel value=%q err=%v", value, err)) + } + _ = failover.Close() + fmt.Println("go-redis fake sentinel OK") + } +} +GO + +go mod tidy +go run . +EOF +} + +run_client() { + local name="$1" + local image="$2" + local script="$3" + local job="${JOB_PREFIX}-${name}" + + apply_job "$job" "$image" "$script" + if wait_job "$job"; then + echo "PASS $name" + else + echo "FAIL $name" >&2 + FAILED_CLIENTS="${FAILED_CLIENTS} ${name}" + fi +} + +if [ "$MODE" = "twemproxy" ]; then + export TEST_TWEMPROXY="true" + export TEST_SENTINEL="false" +elif [ "$MODE" = "sentinel" ]; then + export TEST_TWEMPROXY="false" + export TEST_SENTINEL="true" +else + export TEST_TWEMPROXY="true" + export TEST_SENTINEL="true" +fi + +echo "namespace=$NAMESPACE cluster=$CLUSTER mode=$MODE" +echo "twemproxy=${TWEMPROXY_HOST}:${TWEMPROXY_PORT}" +echo "fake-sentinel=${SENTINEL_HOST}:${SENTINEL_PORT} master-name=${MASTER_NAME}" + +redis_cli_script="${WORK_DIR}/redis-cli.sh" +python_script="${WORK_DIR}/redis-py.sh" +node_script="${WORK_DIR}/ioredis.sh" +go_script="${WORK_DIR}/go-redis.sh" + +create_redis_cli_script "$redis_cli_script" +create_python_script "$python_script" +create_node_script "$node_script" +create_go_script "$go_script" + +run_client "redis-cli" "redis:7-alpine" "$redis_cli_script" +run_client "redis-py" "python:3.12-alpine" "$python_script" +run_client "ioredis" "node:20-alpine" "$node_script" +run_client "go-redis" "golang:1.24-alpine" "$go_script" + +if [ -n "$FAILED_CLIENTS" ]; then + echo "failed Redis client compatibility checks:${FAILED_CLIENTS}" >&2 + exit 1 +fi + +echo "all selected Redis client compatibility checks passed"