diff --git a/cns/cniconflist/generator.go b/cns/cniconflist/generator.go index 1e154dd80b..9ff5563333 100644 --- a/cns/cniconflist/generator.go +++ b/cns/cniconflist/generator.go @@ -71,6 +71,10 @@ type SWIFTGenerator struct { Writer io.WriteCloser } +type AzureCNIChainedCiliumGenerator struct { + Writer io.WriteCloser +} + func (v *V4OverlayGenerator) Close() error { if err := v.Writer.Close(); err != nil { return errors.Wrap(err, "error closing generator") @@ -110,3 +114,11 @@ func (v *SWIFTGenerator) Close() error { return nil } + +func (v *AzureCNIChainedCiliumGenerator) Close() error { + if err := v.Writer.Close(); err != nil { + return errors.Wrap(err, "error closing generator") + } + + return nil +} diff --git a/cns/cniconflist/generator_linux.go b/cns/cniconflist/generator_linux.go index 1c65cdbff6..01886313a8 100644 --- a/cns/cniconflist/generator_linux.go +++ b/cns/cniconflist/generator_linux.go @@ -161,3 +161,33 @@ func (v *SWIFTGenerator) Generate() error { return nil } + +func (v *AzureCNIChainedCiliumGenerator) Generate() error { + conflist := cniConflist{ + CNIVersion: azurecniVersion, + Name: azureName, + Plugins: []any{ + cni.NetworkConfig{ + Type: azureType, + Mode: cninet.OpModeTransparent, + IPsToRouteViaHost: []string{nodeLocalDNSIP}, + ExecutionMode: string(util.V4Swift), + IPAM: cni.IPAM{ + Type: network.AzureCNS, + }, + }, + cni.NetworkConfig{ + Name: ciliumcniName, + Type: ciliumcniType, + }, + }, + } + + enc := json.NewEncoder(v.Writer) + enc.SetIndent("", "\t") + if err := enc.Encode(conflist); err != nil { + return errors.Wrap(err, "error encoding conflist to json") + } + + return nil +} diff --git a/cns/cniconflist/generator_linux_test.go b/cns/cniconflist/generator_linux_test.go index 982cabceed..d713ce9a08 100644 --- a/cns/cniconflist/generator_linux_test.go +++ b/cns/cniconflist/generator_linux_test.go @@ -92,6 +92,21 @@ func TestGenerateSWIFTConflist(t *testing.T) { assert.Equal(t, removeNewLines(fixtureBytes), removeNewLines(buffer.Bytes())) } +func TestGenerateAzurecniCiliumConflist(t *testing.T) { + fixture := "testdata/fixtures/azure-chained-cilium.conflist" + + buffer := new(bytes.Buffer) + g := cniconflist.AzureCNIChainedCiliumGenerator{Writer: &bufferWriteCloser{buffer}} + err := g.Generate() + assert.NoError(t, err) + + fixtureBytes, err := os.ReadFile(fixture) + assert.NoError(t, err) + + // remove newlines and carriage returns in case these UTs are running on Windows + assert.Equal(t, removeNewLines(fixtureBytes), removeNewLines(buffer.Bytes())) +} + // removeNewLines will remove the newlines and carriage returns from the byte slice func removeNewLines(b []byte) []byte { var bb []byte //nolint:prealloc // can't prealloc since we don't know how many bytes will get removed diff --git a/cns/cniconflist/generator_windows.go b/cns/cniconflist/generator_windows.go index 31551f4e59..d0ef208578 100644 --- a/cns/cniconflist/generator_windows.go +++ b/cns/cniconflist/generator_windows.go @@ -25,3 +25,7 @@ func (v *CiliumGenerator) Generate() error { func (v *SWIFTGenerator) Generate() error { return errNotImplemented } + +func (v *AzureCNIChainedCiliumGenerator) Generate() error { + return errNotImplemented +} diff --git a/cns/cniconflist/testdata/fixtures/azure-chained-cilium.conflist b/cns/cniconflist/testdata/fixtures/azure-chained-cilium.conflist new file mode 100644 index 0000000000..7fc6fca211 --- /dev/null +++ b/cns/cniconflist/testdata/fixtures/azure-chained-cilium.conflist @@ -0,0 +1,34 @@ +{ + "cniVersion": "0.3.0", + "name": "azure", + "plugins": [ + { + "type": "azure-vnet", + "mode": "transparent", + "ipsToRouteViaHost": [ + "169.254.20.10" + ], + "executionMode": "v4swift", + "ipam": { + "type": "azure-cns" + }, + "dns": {}, + "runtimeConfig": { + "dns": {} + }, + "windowsSettings": {} + }, + { + "name": "cilium", + "type": "cilium-cni", + "ipam": { + "type": "" + }, + "dns": {}, + "runtimeConfig": { + "dns": {} + }, + "windowsSettings": {} + } + ] +} diff --git a/cns/service/main.go b/cns/service/main.go index 67f7872f44..24263320d3 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -121,11 +121,12 @@ const ( type cniConflistScenario string const ( - scenarioV4Overlay cniConflistScenario = "v4overlay" - scenarioDualStackOverlay cniConflistScenario = "dualStackOverlay" - scenarioOverlay cniConflistScenario = "overlay" - scenarioCilium cniConflistScenario = "cilium" - scenarioSWIFT cniConflistScenario = "swift" + scenarioV4Overlay cniConflistScenario = "v4overlay" + scenarioDualStackOverlay cniConflistScenario = "dualStackOverlay" + scenarioOverlay cniConflistScenario = "overlay" + scenarioCilium cniConflistScenario = "cilium" + scenarioSWIFT cniConflistScenario = "swift" + scenarioAzurecniChainedCilium cniConflistScenario = "azurecni-chained-cilium" ) var ( @@ -623,6 +624,8 @@ func main() { conflistGenerator = &cniconflist.CiliumGenerator{Writer: writer} case scenarioSWIFT: conflistGenerator = &cniconflist.SWIFTGenerator{Writer: writer} + case scenarioAzurecniChainedCilium: + conflistGenerator = &cniconflist.AzureCNIChainedCiliumGenerator{Writer: writer} default: logger.Errorf("unable to generate cni conflist for unknown scenario: %s", scenario) os.Exit(1) diff --git a/docs/feature/swift-v2/setup-guide.md b/docs/feature/swift-v2/setup-guide.md new file mode 100644 index 0000000000..131501bac3 --- /dev/null +++ b/docs/feature/swift-v2/setup-guide.md @@ -0,0 +1,90 @@ +# Swiftv2 Cilium Setup Guide + +## Steps +### Clone repo + checkout branch for *.yamls +``` +git clone https://github.com/Azure/azure-container-networking.git +git checkout jpayne3506/conflist-generation < TODO Change before merge > +``` + +### Apply cilium config +``` +export DIR=1.17 +export CILIUM_VERSION_TAG=v1.17.7-250927 +export CILIUM_IMAGE_REGISTRY=mcr.microsoft.com/containernetworking +kubectl apply -f test/integration/manifests/cilium/v${DIR}/cilium-config/cilium-chained-config.yaml +``` + +- Remove `kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256"` from configmap if kube-proxy is current on nodes + +### Apply cilium Agent + Operator +``` +kubectl apply -f test/integration/manifests/cilium/v${DIR}/cilium-operator/files +kubectl apply -f test/integration/manifests/cilium/v${DIR}/cilium-agent/files +``` + +### Apply/Edit CNS configmap +``` +kubectl apply -f test/integration/manifests/cnsconfig/azcnichainedciliumconfigmap.yaml +``` +#### Must have configmap values +``` +"ProgramSNATIPTables": false +"CNIConflistScenario": "azurecni-chained-cilium" +"CNIConflistFilepath": "/etc/cni/net.d/05-azure-chained-cilium.conflist" +``` + +### Update CNS image +Leverage a cns build from branch or use `acnpublic.azurecr.io/azure-cns:v1.7.5-2-g94c36c070` < TODO Change before merge > +- This will install our chained conflist through the use of `"CNIConflistScenario": "azurecni-chained-cilium"` and it will be installed on the node here `"CNIConflistFilepath": "/etc/cni/net.d/05-azure-chained-cilium.conflist"` + +> NOTE: if your current conflist file name starts with `05` then change our previous filename to one with higher priority to ensure that it is consumed on restart. I.e. `03-azure-chained-cilium.conflist` + +### If kube-proxy was present +#### Remove kube-proxy +> NOTE: Reapply `kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256"` to cilium configmap if previously removed + +This can be done either by editing the node-selectors or deleting the ds. Both work... + +#### Restart Cilium +kubectl rollout restart ds -n kube-system cilium + + +### Quick Summary +- Apply/Edit Cilium Config with + - `cni-chaining-mode: generic-veth` + - remove `kube-proxy-replacement-healthz-bind-address` + - You do not need to remove if node does not have kube-proxy enabled + - If applied before agent is in ready state then no need to restart agent +- Apply Agent + Operator +- Apply/Edit CNS config with + - "ProgramSNATIPTables": false + - "CNIConflistScenario": "azurecni-chained-cilium" + - "CNIConflistFilepath": "/etc/cni/net.d/05-azure-chained-cilium.conflist" +- Update CNS image with build from branch or < TODO IMAGE NAME > + - This will install chained conflist + +#### If kube-proxy was present +- Reapply `kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256"` to cilium configmap +- Remove Kube-proxy +- Restart Cilium + + +## Quick Vaildation testing +- Create pods from deploy + - test/integration/manifests/swiftv2/mt-deploy.yaml + - Creates `container-*` pods on default namespace +- Create Cilium Network Policies + - test/integration/manifests/cilium/netpol/default-allow.yaml + - Will only allow cilium managed endpoints to transmit traffic through default namespace +- Check Cilium Management with + - `kubectl get cep -A` + - `kubectl get cnp -A` +- Check connectivity + - exec -it -- sh + - ip a + - look for delegatedNIC IP + - ping + - confirm CNP working by attempting to ping coredns pods + - should fail if both are being maintained by cilium + - confirm with `kubectl get cep -A` diff --git a/test/integration/manifests/cilium/netpol/default-allow.yaml b/test/integration/manifests/cilium/netpol/default-allow.yaml new file mode 100644 index 0000000000..e30f937210 --- /dev/null +++ b/test/integration/manifests/cilium/netpol/default-allow.yaml @@ -0,0 +1,15 @@ +## Only allows traffic within the default namespace +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-default +spec: + endpointSelector: {} + ingress: + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: default + egress: + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: default diff --git a/test/integration/manifests/cilium/v1.17/cilium-config/cilium-chained-config.yaml b/test/integration/manifests/cilium/v1.17/cilium-config/cilium-chained-config.yaml new file mode 100644 index 0000000000..dfef6291a2 --- /dev/null +++ b/test/integration/manifests/cilium/v1.17/cilium-config/cilium-chained-config.yaml @@ -0,0 +1,140 @@ +apiVersion: v1 #Not verified, placeholder +data: + agent-not-ready-taint-key: node.cilium.io/agent-not-ready + arping-refresh-period: 30s + auto-direct-node-routes: "false" + bpf-lb-external-clusterip: "false" + bpf-lb-map-max: "65536" + bpf-lb-mode: snat + bpf-map-dynamic-size-ratio: "0.0025" + bpf-policy-map-max: "16384" + bpf-root: /sys/fs/bpf + cgroup-root: /run/cilium/cgroupv2 + cilium-endpoint-gc-interval: 5m0s + cni-chaining-mode: generic-veth + cluster-id: "0" + cluster-name: default + debug: "false" + disable-cnp-status-updates: "true" + disable-endpoint-crd: "false" + enable-auto-protect-node-port-range: "true" + enable-bgp-control-plane: "false" + enable-bpf-clock-probe: "true" + enable-endpoint-health-checking: "false" + enable-endpoint-routes: "true" + enable-health-check-nodeport: "true" + enable-health-checking: "true" + enable-host-legacy-routing: "true" + enable-hubble: "false" + enable-ipv4: "true" + enable-ipv4-masquerade: "false" + enable-ipv6: "false" + enable-ipv6-masquerade: "false" + enable-k8s-terminating-endpoint: "true" + enable-l2-neigh-discovery: "true" + enable-l7-proxy: "false" + enable-local-node-route: "false" + enable-local-redirect-policy: "true" # set to true for lrp test + enable-metrics: "true" + enable-policy: default + enable-session-affinity: "true" + enable-svc-source-range-check: "true" + enable-vtep: "false" + enable-well-known-identities: "false" + enable-xt-socket-fallback: "true" + identity-allocation-mode: crd + install-iptables-rules: "true" + install-no-conntrack-iptables-rules: "false" + ipam: delegated-plugin + kube-proxy-replacement: "true" + kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256" ## Remove if kube-proxy is enabled + local-router-ipv4: 169.254.23.0 + metrics: +cilium_bpf_map_pressure + monitor-aggregation: medium + monitor-aggregation-flags: all + monitor-aggregation-interval: 5s + node-port-bind-protection: "true" + nodes-gc-interval: 5m0s + operator-api-serve-addr: 127.0.0.1:9234 + operator-prometheus-serve-addr: :9963 + preallocate-bpf-maps: "false" + procfs: /host/proc + prometheus-serve-addr: :9962 + remove-cilium-node-taints: "true" + set-cilium-is-up-condition: "true" + sidecar-istio-proxy-image: cilium/istio_proxy + synchronize-k8s-nodes: "true" + tofqdns-dns-reject-response-code: refused + tofqdns-enable-dns-compression: "true" + tofqdns-endpoint-max-ip-per-hostname: "1000" + tofqdns-idle-connection-grace-period: 0s + tofqdns-max-deferred-connection-deletes: "10000" + tofqdns-min-ttl: "0" + tofqdns-proxy-response-max-delay: 100ms + routing-mode: native + unmanaged-pod-watcher-interval: "15" + vtep-cidr: "" + vtep-endpoint: "" + vtep-mac: "" + vtep-mask: "" + enable-sctp: "false" + external-envoy-proxy: "false" + k8s-client-qps: "10" + k8s-client-burst: "20" + mesh-auth-enabled: "true" + mesh-auth-queue-size: "1024" + mesh-auth-rotated-identities-queue-size: "1024" + mesh-auth-gc-interval: "5m0s" + proxy-connect-timeout: "2" + proxy-max-requests-per-connection: "0" + proxy-max-connection-duration-seconds: "0" + set-cilium-node-taints: "true" + unmanaged-pod-watcher-interval: "15" +## new values added for 1.16 below + enable-ipv4-big-tcp: "false" + enable-ipv6-big-tcp: "false" + enable-masquerade-to-route-source: "false" + enable-health-check-loadbalancer-ip: "false" + bpf-lb-acceleration: "disabled" + enable-k8s-networkpolicy: "true" + cni-exclusive: "false" # Cilium takes ownership of /etc/cni/net.d, pods cannot be scheduled with any other cni if cilium is down + cni-log-file: "/var/run/cilium/cilium-cni.log" + ipam-cilium-node-update-rate: "15s" + egress-gateway-reconciliation-trigger-interval: "1s" + nat-map-stats-entries: "32" + nat-map-stats-interval: "30s" + bpf-events-drop-enabled: "true" # exposes drop events to cilium monitor/hubble + bpf-events-policy-verdict-enabled: "true" # exposes policy verdict events to cilium monitor/hubble + bpf-events-trace-enabled: "true" # exposes trace events to cilium monitor/hubble + enable-tcx: "false" # attach endpoint programs with tcx if supported by kernel + datapath-mode: "veth" + direct-routing-skip-unreachable: "false" + enable-runtime-device-detection: "false" + bpf-lb-sock: "false" + bpf-lb-sock-terminate-pod-connections: "false" + nodeport-addresses: "" + k8s-require-ipv4-pod-cidr: "false" + k8s-require-ipv6-pod-cidr: "false" + enable-node-selector-labels: "false" +## new values for 1.17 + ces-slice-mode: "fcfs" + enable-cilium-endpoint-slice: "true" + bpf-lb-source-range-all-types: "false" + bpf-algorithm-annotation: "false" + bpf-lb-mode-annotation: "false" + enable-experimental-lb: "false" + enable-endpoint-lockdown-on-policy-overflow: "false" + health-check-icmp-failure-threshold: "3" + enable-internal-traffic-policy: "true" + enable-lb-ipam: "true" + enable-non-default-deny-policies: "true" + enable-source-ip-verification: "true" +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: cilium + meta.helm.sh/release-namespace: kube-system + labels: + app.kubernetes.io/managed-by: Helm + name: cilium-config + namespace: kube-system diff --git a/test/integration/manifests/cnsconfig/azcnichainedciliumconfigmap.yaml b/test/integration/manifests/cnsconfig/azcnichainedciliumconfigmap.yaml new file mode 100644 index 0000000000..f1cb5e3f78 --- /dev/null +++ b/test/integration/manifests/cnsconfig/azcnichainedciliumconfigmap.yaml @@ -0,0 +1,46 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: cns-config + namespace: kube-system +data: + cns_config.json: | + { + "CNIConflistFilepath": "/etc/cni/net.d/05-azure-chained-cilium.conflist", + "CNIConflistScenario": "azurecni-chained-cilium", + "ChannelMode": "CRD", + "EnableAsyncPodDelete": true, + "EnableCNIConflistGeneration": true, + "EnableIPAMv2": true, + "EnableK8sDevicePlugin": true, + "EnableLoggerV2": true, + "EnableStateMigration": true, + "EnableSubnetScarcity": false, + "InitializeFromCNI": false, + "Logger": { + "file": { + "filepath": "/var/log/azure-cns/azure-cns.log", + "level": "info", + "maxBackups": 5, + "maxSize": 5 + } + }, + "ManageEndpointState": true, + "ManagedSettings": { + "InfrastructureNetworkID": "", + "NodeID": "", + "NodeSyncIntervalInSeconds": 30, + "PrivateEndpoint": "" + }, + "MetricsBindAddress": ":10092", + "ProgramSNATIPTables": false, + "TelemetrySettings": { + "DebugMode": false, + "DisableAll": false, + "HeartBeatIntervalInMins": 30, + "RefreshIntervalInSecs": 15, + "SnapshotIntervalInMins": 60, + "TelemetryBatchIntervalInSecs": 15, + "TelemetryBatchSizeBytes": 16384 + } + } diff --git a/test/integration/manifests/swiftv2/mt-deploy.yaml b/test/integration/manifests/swiftv2/mt-deploy.yaml new file mode 100644 index 0000000000..9027ca7f85 --- /dev/null +++ b/test/integration/manifests/swiftv2/mt-deploy.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: container + namespace: default +spec: + selector: + matchLabels: + app: container + replicas: 1 + template: # create pods using pod definition in this template + metadata: + # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is + # generated from the deployment name + labels: + app: container + kubernetes.azure.com/pod-network-instance: pni + spec: + containers: + - name: container + image: mcr.microsoft.com/azurelinux/busybox:1.36 + command: + - sh + - -c + - sleep 3650d + imagePullPolicy: Always + securityContext: + privileged: true + nodeSelector: + kubernetes.io/os: linux + tolerations: + - key: "cri-resource-consume" + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: "cri-resource-consume" + operator: "Equal" + value: "true" + effect: "NoExecute" + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname # KV: Key is hostname, value is each unique nodename + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: container