diff --git a/Vagrantfile b/Vagrantfile index 99eace0e39..53e2f6e4b0 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,6 +1,6 @@ Vagrant.configure("2") do |config| config.vm.box = "ubuntu/jammy64" - config.vm.hostname = "lab05" + config.vm.hostname = "lab09" # Disable project folder sharing inside the VM. # This avoids common Windows path issues (spaces, Cyrillic characters) @@ -14,11 +14,14 @@ Vagrant.configure("2") do |config| config.vm.network "forwarded_port", guest: 3000, host: 3000, host_ip: "0.0.0.0", id: "grafana", auto_correct: true config.vm.network "forwarded_port", guest: 3100, host: 3100, host_ip: "0.0.0.0", id: "loki", auto_correct: true config.vm.network "forwarded_port", guest: 9080, host: 9080, host_ip: "0.0.0.0", id: "promtail", auto_correct: true + config.vm.network "forwarded_port", guest: 9090, host: 9090, host_ip: "0.0.0.0", id: "prometheus", auto_correct: true + config.vm.network "forwarded_port", guest: 30080, host: 30080, host_ip: "0.0.0.0", id: "k8s-app1", auto_correct: true + config.vm.network "forwarded_port", guest: 30081, host: 30081, host_ip: "0.0.0.0", id: "k8s-app2", auto_correct: true config.ssh.insert_key = true config.vm.provider "virtualbox" do |vb| - vb.name = "lab07-monitoring" + vb.name = "lab09" vb.memory = 3072 vb.cpus = 2 end diff --git a/ansible/roles/monitoring/defaults/main.yml b/ansible/roles/monitoring/defaults/main.yml index 76fb4745d6..2524c6e8fe 100644 --- a/ansible/roles/monitoring/defaults/main.yml +++ b/ansible/roles/monitoring/defaults/main.yml @@ -5,23 +5,31 @@ monitoring_compose_project_name: devops-monitoring monitoring_loki_version: "3.0.0" monitoring_promtail_version: "3.0.0" monitoring_grafana_version: "12.3.1" +monitoring_prometheus_version: "v3.9.0" monitoring_loki_port: 3100 monitoring_promtail_port: 9080 monitoring_grafana_port: 3000 +monitoring_prometheus_port: 9090 monitoring_app_port: 8000 monitoring_app_internal_port: 8000 monitoring_loki_retention_period: "168h" +monitoring_prometheus_retention_days: 15 +monitoring_prometheus_retention_size: "10GB" +monitoring_prometheus_scrape_interval: "15s" + monitoring_grafana_admin_user: admin -monitoring_grafana_admin_password: ChangeMe_Lab07! -monitoring_grafana_datasource_uid: loki -monitoring_grafana_datasource_name: Loki +monitoring_grafana_admin_password: ChangeMe_Lab08! +monitoring_loki_datasource_uid: loki +monitoring_loki_datasource_name: Loki +monitoring_prometheus_datasource_uid: prometheus +monitoring_prometheus_datasource_name: Prometheus monitoring_app_service_name: app-python monitoring_app_container_name: devops-python monitoring_app_label: devops-python -monitoring_app_image: devops-info-service:lab07 +monitoring_app_image: devops-info-service:lab08 monitoring_app_source_dir: "{{ playbook_dir }}/../../app_python" monitoring_app_source_files: - .dockerignore @@ -33,16 +41,36 @@ monitoring_app_env: PORT: "{{ monitoring_app_internal_port | string }}" DEBUG: "false" +monitoring_prometheus_targets: + - job: prometheus + targets: + - localhost:9090 + - job: app + targets: + - "{{ monitoring_app_service_name }}:{{ monitoring_app_internal_port }}" + path: /metrics + - job: loki + targets: + - loki:3100 + path: /metrics + - job: grafana + targets: + - grafana:3000 + path: /metrics + monitoring_resource_limits: loki: limits: { cpus: '1.0', memory: 1G } reservations: { cpus: '0.25', memory: 256M } promtail: - limits: { cpus: '0.75', memory: 512M } + limits: { cpus: '0.5', memory: 512M } reservations: { cpus: '0.10', memory: 128M } grafana: + limits: { cpus: '0.5', memory: 512M } + reservations: { cpus: '0.10', memory: 128M } + prometheus: limits: { cpus: '1.0', memory: 1G } reservations: { cpus: '0.25', memory: 256M } app_python: - limits: { cpus: '0.75', memory: 512M } + limits: { cpus: '0.5', memory: 256M } reservations: { cpus: '0.10', memory: 128M } diff --git a/ansible/roles/monitoring/files/lab08-metrics.json b/ansible/roles/monitoring/files/lab08-metrics.json new file mode 100644 index 0000000000..b05c0fa86a --- /dev/null +++ b/ansible/roles/monitoring/files/lab08-metrics.json @@ -0,0 +1,454 @@ +{ + "id": null, + "uid": "lab08-prometheus-metrics", + "title": "Lab08 - Prometheus Metrics Overview", + "tags": [ + "lab08", + "prometheus", + "metrics", + "observability" + ], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "10s", + "time": { + "from": "now-30m", + "to": "now" + }, + "panels": [ + { + "id": 1, + "type": "timeseries", + "title": "Request Rate by Endpoint", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (endpoint) (rate(http_requests_total{endpoint!=\"/metrics\"}[5m]))", + "legendFormat": "{{endpoint}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 2, + "type": "timeseries", + "title": "Error Rate (5xx)", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx errors/sec", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + } + }, + { + "id": 3, + "type": "timeseries", + "title": "Request Duration p95 by Endpoint", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket{endpoint!=\"/metrics\"}[5m])))", + "legendFormat": "{{endpoint}} p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 4, + "type": "stat", + "title": "Active Requests", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(http_requests_in_progress)", + "legendFormat": "in-flight", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 5, + "type": "stat", + "title": "Application Uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "max(devops_info_uptime_seconds)", + "legendFormat": "uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 6, + "type": "heatmap", + "title": "Request Duration Heatmap", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 16 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (le) (rate(http_request_duration_seconds_bucket{endpoint!=\"/metrics\"}[5m]))", + "legendFormat": "{{le}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "calculate": false, + "legend": { + "show": false + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "unit": "s" + } + } + }, + { + "id": 7, + "type": "piechart", + "title": "Status Code Distribution", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 16 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (status_code) (rate(http_requests_total[5m]))", + "legendFormat": "{{status_code}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + } + } + }, + { + "id": 8, + "type": "stat", + "title": "App Target Uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "up{job=\"app\"}", + "legendFormat": "app", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN" + }, + "1": { + "text": "UP" + } + } + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 9, + "type": "timeseries", + "title": "System Info Collection p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le) (rate(devops_info_system_collection_seconds_bucket[5m])))", + "legendFormat": "system info p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + } + } + ], + "templating": { + "list": [] + }, + "annotations": { + "list": [] + } +} diff --git a/ansible/roles/monitoring/tasks/main.yml b/ansible/roles/monitoring/tasks/main.yml index a30f1d9812..a0d3f2b674 100644 --- a/ansible/roles/monitoring/tasks/main.yml +++ b/ansible/roles/monitoring/tasks/main.yml @@ -12,6 +12,7 @@ - "{{ monitoring_project_dir }}" - "{{ monitoring_project_dir }}/loki" - "{{ monitoring_project_dir }}/promtail" + - "{{ monitoring_project_dir }}/prometheus" - "{{ monitoring_project_dir }}/grafana" - "{{ monitoring_project_dir }}/grafana/provisioning" - "{{ monitoring_project_dir }}/grafana/provisioning/datasources" @@ -36,7 +37,7 @@ group: root mode: "0600" - - name: Template monitoring stack files + - name: Template monitoring stack configuration files ansible.builtin.template: src: "{{ item.src }}" dest: "{{ monitoring_project_dir }}/{{ item.dest }}" @@ -47,10 +48,19 @@ - { src: 'docker-compose.yml.j2', dest: 'docker-compose.yml' } - { src: 'loki-config.yml.j2', dest: 'loki/config.yml' } - { src: 'promtail-config.yml.j2', dest: 'promtail/config.yml' } + - { src: 'prometheus-config.yml.j2', dest: 'prometheus/prometheus.yml' } - { src: 'grafana-datasource.yml.j2', dest: 'grafana/provisioning/datasources/loki.yml' } - { src: 'grafana-dashboard-provider.yml.j2', dest: 'grafana/provisioning/dashboards/dashboard-provider.yml' } - { src: 'grafana-dashboard.json.j2', dest: 'grafana/dashboards/lab07-logging.json' } + - name: Copy Lab08 Grafana metrics dashboard + ansible.builtin.copy: + src: lab08-metrics.json + dest: "{{ monitoring_project_dir }}/grafana/dashboards/lab08-metrics.json" + owner: root + group: root + mode: "0644" + - name: Deploy monitoring stack with Docker Compose v2 community.docker.docker_compose_v2: project_src: "{{ monitoring_project_dir }}" @@ -70,6 +80,16 @@ delay: 3 until: loki_ready.status == 200 + - name: Wait for Prometheus to become healthy + ansible.builtin.uri: + url: "http://127.0.0.1:{{ monitoring_prometheus_port }}/-/healthy" + method: GET + status_code: 200 + register: prometheus_ready + retries: 20 + delay: 3 + until: prometheus_ready.status == 200 + - name: Wait for Grafana API health endpoint ansible.builtin.uri: url: "http://127.0.0.1:{{ monitoring_grafana_port }}/api/health" @@ -93,14 +113,32 @@ delay: 3 until: monitoring_app_health.status == 200 - - name: Verify Loki data source was provisioned + - name: Wait for monitored application metrics endpoint ansible.builtin.uri: - url: "http://127.0.0.1:{{ monitoring_grafana_port }}/api/datasources/uid/{{ monitoring_grafana_datasource_uid }}" + url: "http://127.0.0.1:{{ monitoring_app_port }}/metrics" + method: GET + status_code: 200 + return_content: true + register: monitoring_app_metrics + retries: 20 + delay: 3 + until: + - monitoring_app_metrics.status == 200 + - "'http_requests_total' in monitoring_app_metrics.content" + + - name: Verify provisioned Grafana data sources + ansible.builtin.uri: + url: "http://127.0.0.1:{{ monitoring_grafana_port }}/api/datasources/uid/{{ item.uid }}" method: GET user: "{{ monitoring_grafana_admin_user }}" password: "{{ monitoring_grafana_admin_password }}" force_basic_auth: true status_code: 200 + loop: + - { name: '{{ monitoring_loki_datasource_name }}', uid: '{{ monitoring_loki_datasource_uid }}' } + - { name: '{{ monitoring_prometheus_datasource_name }}', uid: '{{ monitoring_prometheus_datasource_uid }}' } + loop_control: + label: "{{ item.name }}" rescue: - name: Monitoring deployment failure hint diff --git a/ansible/roles/monitoring/templates/docker-compose.yml.j2 b/ansible/roles/monitoring/templates/docker-compose.yml.j2 index 52f249fbf9..972a969a60 100644 --- a/ansible/roles/monitoring/templates/docker-compose.yml.j2 +++ b/ansible/roles/monitoring/templates/docker-compose.yml.j2 @@ -19,9 +19,9 @@ services: restart: unless-stopped healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3100/ready"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: @@ -55,9 +55,9 @@ services: condition: service_healthy healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9080/ready"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: @@ -77,7 +77,9 @@ services: GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER:-{{ monitoring_grafana_admin_user }}}" GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD}" GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer GF_SECURITY_ALLOW_EMBEDDING: "false" + GF_METRICS_ENABLED: "true" ports: - "{{ monitoring_grafana_port }}:3000" volumes: @@ -95,9 +97,9 @@ services: condition: service_healthy healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/api/health"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 30s deploy: resources: @@ -108,6 +110,48 @@ services: cpus: "{{ monitoring_resource_limits.grafana.reservations.cpus }}" memory: {{ monitoring_resource_limits.grafana.reservations.memory }} + prometheus: + image: prom/prometheus:{{ monitoring_prometheus_version }} + container_name: devops-prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time={{ monitoring_prometheus_retention_days }}d + - --storage.tsdb.retention.size={{ monitoring_prometheus_retention_size }} + - --web.enable-lifecycle + ports: + - "{{ monitoring_prometheus_port }}:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + labels: + logging: "promtail" + app: "devops-prometheus" + restart: unless-stopped + depends_on: + loki: + condition: service_healthy + grafana: + condition: service_healthy + {{ monitoring_app_service_name }}: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9090/-/healthy"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "{{ monitoring_resource_limits.prometheus.limits.cpus }}" + memory: {{ monitoring_resource_limits.prometheus.limits.memory }} + reservations: + cpus: "{{ monitoring_resource_limits.prometheus.reservations.cpus }}" + memory: {{ monitoring_resource_limits.prometheus.reservations.memory }} + {{ monitoring_app_service_name }}: build: context: ./app-python @@ -128,9 +172,9 @@ services: restart: unless-stopped healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:{{ monitoring_app_internal_port }}/health')"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: @@ -149,3 +193,4 @@ volumes: loki-data: grafana-data: promtail-data: + prometheus-data: diff --git a/ansible/roles/monitoring/templates/env.j2 b/ansible/roles/monitoring/templates/env.j2 index 6afa6c1271..c922f823ea 100644 --- a/ansible/roles/monitoring/templates/env.j2 +++ b/ansible/roles/monitoring/templates/env.j2 @@ -3,4 +3,5 @@ GRAFANA_ADMIN_PASSWORD={{ monitoring_grafana_admin_password }} GRAFANA_PORT={{ monitoring_grafana_port }} LOKI_PORT={{ monitoring_loki_port }} PROMTAIL_PORT={{ monitoring_promtail_port }} +PROMETHEUS_PORT={{ monitoring_prometheus_port }} APP_PORT={{ monitoring_app_port }} diff --git a/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 b/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 index f607bfadda..a19cd5ec5c 100644 --- a/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 +++ b/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 @@ -1,14 +1,16 @@ apiVersion: 1 deleteDatasources: - - name: {{ monitoring_grafana_datasource_name }} + - name: {{ monitoring_loki_datasource_name }} + orgId: 1 + - name: {{ monitoring_prometheus_datasource_name }} orgId: 1 prune: true datasources: - - name: {{ monitoring_grafana_datasource_name }} - uid: {{ monitoring_grafana_datasource_uid }} + - name: {{ monitoring_loki_datasource_name }} + uid: {{ monitoring_loki_datasource_uid }} type: loki access: proxy url: http://loki:3100 @@ -17,3 +19,15 @@ datasources: jsonData: maxLines: 1000 timeout: 60 + + - name: {{ monitoring_prometheus_datasource_name }} + uid: {{ monitoring_prometheus_datasource_uid }} + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: false + editable: false + jsonData: + httpMethod: POST + prometheusType: Prometheus + timeInterval: {{ monitoring_prometheus_scrape_interval }} diff --git a/ansible/roles/monitoring/templates/prometheus-config.yml.j2 b/ansible/roles/monitoring/templates/prometheus-config.yml.j2 new file mode 100644 index 0000000000..f3ffd4378e --- /dev/null +++ b/ansible/roles/monitoring/templates/prometheus-config.yml.j2 @@ -0,0 +1,16 @@ +global: + scrape_interval: {{ monitoring_prometheus_scrape_interval }} + evaluation_interval: {{ monitoring_prometheus_scrape_interval }} + +scrape_configs: +{% for target in monitoring_prometheus_targets %} + - job_name: {{ target.job | quote }} +{% if target.path is defined %} + metrics_path: {{ target.path | quote }} +{% endif %} + static_configs: + - targets: +{% for endpoint in target.targets %} + - {{ endpoint | quote }} +{% endfor %} +{% endfor %} diff --git a/app_python/Dockerfile b/app_python/Dockerfile index 2ca732ad78..7548f6608b 100644 --- a/app_python/Dockerfile +++ b/app_python/Dockerfile @@ -1,4 +1,3 @@ - # Production-oriented image for a small Flask app. # Pin a specific Python version for reproducible builds. FROM python:3.13.1-slim @@ -9,9 +8,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ WORKDIR /app -# Create a dedicated non-root user (mandatory best practice). -RUN addgroup --system app \ - && adduser --system --ingroup app --no-create-home app +# Create a dedicated non-root user with numeric UID/GID. +RUN addgroup --system --gid 10001 app \ + && adduser --system --uid 10001 --ingroup app --no-create-home app # Install dependencies first to leverage Docker layer caching. COPY requirements.txt ./ @@ -20,8 +19,8 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy only the application code needed at runtime. COPY app.py ./ -# Drop privileges -USER app +# Drop privileges using a numeric user for Kubernetes runAsNonRoot validation. +USER 10001:10001 # Document the port (Flask defaults to 5000 in this repo) EXPOSE 5000 diff --git a/app_python/README.md b/app_python/README.md index 48cffa6caa..d418c0fd0e 100644 --- a/app_python/README.md +++ b/app_python/README.md @@ -1,159 +1,168 @@ -# DevOps Info Service - -[![python-ci](https://github.com/dorley174/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg)](https://github.com/dorley174/DevOps-Core-Course/actions/workflows/python-ci.yml) - -## Overview -DevOps Info Service is a production-ready starter web service for the DevOps course. -It reports service metadata, runtime details, and basic system information. - -The service exposes two endpoints: -- `GET /` — service + system + runtime + request information -- `GET /health` — health check endpoint (used later for Kubernetes probes) - -## Prerequisites -- Python **3.11+** -- `pip` -- (Recommended) Virtual environment (`venv`) -- **Windows:** Python Launcher `py` is recommended - -## Installation - -```bash -python -m venv venv -# Windows: .\venv\Scripts\activate -# Linux/macOS: source venv/bin/activate - -pip install -r requirements.txt -``` - -## Running the Application - -### Default run (port 5000) -> If `PORT` is not set, the application runs on **0.0.0.0:5000**. -```bash -python app.py -``` - -### Custom configuration - -**Linux/Mac:** -```bash -HOST=127.0.0.1 PORT=8080 DEBUG=True python app.py -``` - -**Windows (PowerShell):** -```powershell -$env:HOST="127.0.0.1" -$env:PORT="8080" -$env:DEBUG="True" -python app.py -``` - -**Windows (CMD):** -```bat -set HOST=127.0.0.1 -set PORT=8080 -set DEBUG=True -python app.py -``` - -## API Endpoints - -### `GET /` -Returns service metadata, system information, runtime details, request info, and a list of available endpoints. - -Example: -```bash -curl http://127.0.0.1:5000/ -``` - -### `GET /health` -Returns a minimal health response for monitoring. - -Example (includes HTTP status): -```bash -curl -i http://127.0.0.1:5000/health -``` - -## Testing / Pretty Output - -### Pretty-printed JSON -**Windows PowerShell tip:** use `curl.exe`. -```bash -curl -s http://127.0.0.1:5000/ | python -m json.tool -``` - -## Testing & Linting (LAB03) - -> Dev dependencies live in `requirements-dev.txt` (pytest, coverage, ruff). - -Install dev deps: -```bash -pip install -r requirements-dev.txt -``` - -Run linter: -```bash -ruff check . -``` - -Run tests + coverage: -```bash -pytest -q tests --cov=. --cov-report=term-missing -``` - -## CI/CD Secrets (GitHub Actions) - -In your GitHub repository: -**Settings → Secrets and variables → Actions → New repository secret** - -Add: -- `DOCKERHUB_USERNAME` — your Docker Hub username -- `DOCKERHUB_TOKEN` — Docker Hub Access Token (Account Settings → Security) -- `SNYK_TOKEN` — Snyk API token (Account settings → API token) - -## Configuration - -| Variable | Default | Description | -|----------|---------|-------------| -| HOST | 0.0.0.0 | Bind address | -| PORT | 5000 | HTTP port | -| DEBUG | False | Flask debug mode | - ---- - -## Docker - -> Examples below use placeholders like `` and ``. - -### Build (local) - -```bash -docker build -t : . -``` - -### Run - -```bash -docker run --rm -p 5000:5000 : -``` - -(Optional: override env vars) - -```bash -docker run --rm -p 5000:5000 -e PORT=5000 -e DEBUG=false : -``` - -### Pull from Docker Hub - -```bash -docker pull /: -docker run --rm -p 5000:5000 /: -``` - -### Quick test - -```bash -curl http://localhost:5000/health -curl http://localhost:5000/ -``` +# DevOps Info Service + +[![python-ci](https://github.com/dorley174/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg)](https://github.com/dorley174/DevOps-Core-Course/actions/workflows/python-ci.yml) + +## Overview +DevOps Info Service is a production-ready starter web service for the DevOps course. +It reports service metadata, runtime details, and basic system information. + +The service exposes two endpoints: +- `GET /` — service + system + runtime + request information +- `GET /health` — liveness health endpoint +- `GET /ready` — readiness health endpoint for Kubernetes + +## Prerequisites +- Python **3.11+** +- `pip` +- (Recommended) Virtual environment (`venv`) +- **Windows:** Python Launcher `py` is recommended + +## Installation + +```bash +python -m venv venv +# Windows: .\venv\Scripts\activate +# Linux/macOS: source venv/bin/activate + +pip install -r requirements.txt +``` + +## Running the Application + +### Default run (port 5000) +> If `PORT` is not set, the application runs on **0.0.0.0:5000**. +```bash +python app.py +``` + +### Custom configuration + +**Linux/Mac:** +```bash +HOST=127.0.0.1 PORT=8080 DEBUG=True python app.py +``` + +**Windows (PowerShell):** +```powershell +$env:HOST="127.0.0.1" +$env:PORT="8080" +$env:DEBUG="True" +python app.py +``` + +**Windows (CMD):** +```bat +set HOST=127.0.0.1 +set PORT=8080 +set DEBUG=True +python app.py +``` + +## API Endpoints + +### `GET /` +Returns service metadata, system information, runtime details, request info, and a list of available endpoints. + +Example: +```bash +curl http://127.0.0.1:5000/ +``` + +### `GET /health` +Returns a minimal liveness response for monitoring and Kubernetes liveness probes. + +Example (includes HTTP status): +```bash +curl -i http://127.0.0.1:5000/health +``` + +### `GET /ready` +Returns readiness information for Kubernetes readiness probes. + +Example: +```bash +curl -i http://127.0.0.1:5000/ready +``` + +## Testing / Pretty Output + +### Pretty-printed JSON +**Windows PowerShell tip:** use `curl.exe`. +```bash +curl -s http://127.0.0.1:5000/ | python -m json.tool +``` + +## Testing & Linting (LAB03) + +> Dev dependencies live in `requirements-dev.txt` (pytest, coverage, ruff). + +Install dev deps: +```bash +pip install -r requirements-dev.txt +``` + +Run linter: +```bash +ruff check . +``` + +Run tests + coverage: +```bash +pytest -q tests --cov=. --cov-report=term-missing +``` + +## CI/CD Secrets (GitHub Actions) + +In your GitHub repository: +**Settings → Secrets and variables → Actions → New repository secret** + +Add: +- `DOCKERHUB_USERNAME` — your Docker Hub username +- `DOCKERHUB_TOKEN` — Docker Hub Access Token (Account Settings → Security) +- `SNYK_TOKEN` — Snyk API token (Account settings → API token) + +## Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| HOST | 0.0.0.0 | Bind address | +| PORT | 5000 | HTTP port | +| DEBUG | False | Flask debug mode | + +--- + +## Docker + +> Examples below use placeholders like `` and ``. + +### Build (local) + +```bash +docker build -t : . +``` + +### Run + +```bash +docker run --rm -p 5000:5000 : +``` + +(Optional: override env vars) + +```bash +docker run --rm -p 5000:5000 -e PORT=5000 -e DEBUG=false : +``` + +### Pull from Docker Hub + +```bash +docker pull /: +docker run --rm -p 5000:5000 /: +``` + +### Quick test + +```bash +curl http://localhost:5000/health +curl http://localhost:5000/ +``` diff --git a/app_python/app.py b/app_python/app.py index ddca2ada41..36e505df46 100644 --- a/app_python/app.py +++ b/app_python/app.py @@ -5,6 +5,7 @@ Endpoints: - GET / : service + system + runtime + request info - GET /health : health check (for probes/monitoring) +- GET /metrics : Prometheus metrics endpoint """ from __future__ import annotations @@ -19,7 +20,8 @@ from datetime import datetime, timezone from typing import Any, Dict -from flask import Flask, g, jsonify, request +from flask import Flask, Response, g, jsonify, request +from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest # ----------------------------------------------------------------------------- # App & Config @@ -31,14 +33,54 @@ PORT: int = int(os.getenv("PORT", "5000")) DEBUG: bool = os.getenv("DEBUG", "False").strip().lower() == "true" -SERVICE_NAME = "devops-info-service" -SERVICE_VERSION = "1.1.0" -SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_NAME = os.getenv("SERVICE_NAME", "devops-info-service") +SERVICE_VERSION = os.getenv("SERVICE_VERSION", "1.1.0") +SERVICE_DESCRIPTION = os.getenv("SERVICE_DESCRIPTION", "DevOps course info service") SERVICE_FRAMEWORK = "Flask" +APP_VARIANT = os.getenv("APP_VARIANT", "primary") +APP_MESSAGE = os.getenv("APP_MESSAGE", "running") START_TIME_UTC = datetime.now(timezone.utc) +# ----------------------------------------------------------------------------- +# Prometheus metrics +# ----------------------------------------------------------------------------- + +HTTP_REQUESTS_TOTAL = Counter( + "http_requests_total", + "Total HTTP requests processed by the service.", + ["method", "endpoint", "status_code"], +) + +HTTP_REQUEST_DURATION_SECONDS = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds.", + ["method", "endpoint"], +) + +HTTP_REQUESTS_IN_PROGRESS = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed.", +) + +DEVOPS_INFO_ENDPOINT_CALLS_TOTAL = Counter( + "devops_info_endpoint_calls_total", + "Total endpoint calls for the DevOps info service.", + ["endpoint"], +) + +DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS = Histogram( + "devops_info_system_collection_seconds", + "Time spent collecting system information.", +) + +DEVOPS_INFO_UPTIME_SECONDS = Gauge( + "devops_info_uptime_seconds", + "Current service uptime in seconds.", +) + + # ----------------------------------------------------------------------------- # Logging # ----------------------------------------------------------------------------- @@ -122,9 +164,31 @@ def get_client_ip() -> str: return request.remote_addr or "unknown" +def normalize_endpoint() -> str: + """ + Keep endpoint labels low-cardinality for Prometheus. + Uses the Flask route template when available and groups unknown paths. + """ + if request.url_rule and request.url_rule.rule: + return request.url_rule.rule + + if request.path == "/": + return "/" + + return "unmatched" + + @app.before_request def log_request_started() -> None: g.request_started_at = time.perf_counter() + g.normalized_endpoint = normalize_endpoint() + g.skip_http_metrics = request.path == "/metrics" + g.active_request_metric_registered = False + + if not g.skip_http_metrics: + HTTP_REQUESTS_IN_PROGRESS.inc() + g.active_request_metric_registered = True + logger.debug( "request_started", extra={ @@ -139,11 +203,36 @@ def log_request_started() -> None: ) +@app.teardown_request +def track_request_finished(_error: Exception | None) -> None: + if getattr(g, "active_request_metric_registered", False): + HTTP_REQUESTS_IN_PROGRESS.dec() + g.active_request_metric_registered = False + + @app.after_request def add_headers(response): - response.headers["Content-Type"] = "application/json; charset=utf-8" + endpoint = getattr(g, "normalized_endpoint", normalize_endpoint()) + duration_seconds = time.perf_counter() - getattr(g, "request_started_at", time.perf_counter()) + duration_ms = round(duration_seconds * 1000, 2) + + if not getattr(g, "skip_http_metrics", False): + HTTP_REQUESTS_TOTAL.labels( + method=request.method, + endpoint=endpoint, + status_code=str(response.status_code), + ).inc() + HTTP_REQUEST_DURATION_SECONDS.labels( + method=request.method, + endpoint=endpoint, + ).observe(duration_seconds) + DEVOPS_INFO_ENDPOINT_CALLS_TOTAL.labels(endpoint=endpoint).inc() + + DEVOPS_INFO_UPTIME_SECONDS.set(get_uptime()["seconds"]) + + if response.mimetype == "application/json": + response.headers["Content-Type"] = "application/json; charset=utf-8" - duration_ms = round((time.perf_counter() - getattr(g, "request_started_at", time.perf_counter())) * 1000, 2) logger.info( "request_completed", extra={ @@ -185,21 +274,27 @@ def get_uptime() -> Dict[str, Any]: def get_system_info() -> Dict[str, Any]: """Collect system information using Python standard library.""" - return { - "hostname": socket.gethostname(), - "platform": platform.system(), - "platform_version": platform.platform(), - "architecture": platform.machine(), - "cpu_count": os.cpu_count() or 0, - "python_version": platform.python_version(), - } + started_at = time.perf_counter() + try: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.platform(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + finally: + DEVOPS_INFO_SYSTEM_COLLECTION_SECONDS.observe(time.perf_counter() - started_at) def build_endpoints() -> list[Dict[str, str]]: return [ {"path": "/", "method": "GET", "description": "Service information"}, - {"path": "/health", "method": "GET", "description": "Health check"}, + {"path": "/health", "method": "GET", "description": "Liveness health check"}, + {"path": "/ready", "method": "GET", "description": "Readiness health check"}, + {"path": "/metrics", "method": "GET", "description": "Prometheus metrics"}, ] @@ -218,6 +313,8 @@ def index(): "version": SERVICE_VERSION, "description": SERVICE_DESCRIPTION, "framework": SERVICE_FRAMEWORK, + "variant": APP_VARIANT, + "message": APP_MESSAGE, }, "system": get_system_info(), "runtime": { @@ -247,10 +344,33 @@ def health(): "status": "healthy", "timestamp": iso_utc_now_z(), "uptime_seconds": uptime["seconds"], + "variant": APP_VARIANT, } ), 200 +@app.get("/ready") +def ready(): + """Readiness endpoint used by Kubernetes readiness probes.""" + uptime = get_uptime() + return jsonify( + { + "status": "ready", + "timestamp": iso_utc_now_z(), + "uptime_seconds": uptime["seconds"], + "variant": APP_VARIANT, + "message": APP_MESSAGE, + } + ), 200 + + +@app.get("/metrics") +def metrics() -> Response: + """Expose Prometheus metrics for scraping.""" + DEVOPS_INFO_UPTIME_SECONDS.set(get_uptime()["seconds"]) + return Response(generate_latest(), content_type=CONTENT_TYPE_LATEST) + + # ----------------------------------------------------------------------------- # Error Handlers # ----------------------------------------------------------------------------- diff --git a/app_python/requirements.txt b/app_python/requirements.txt index 78180a1ad1..46c776bf8d 100644 --- a/app_python/requirements.txt +++ b/app_python/requirements.txt @@ -1 +1,2 @@ -Flask==3.1.0 \ No newline at end of file +Flask==3.1.0 +prometheus-client==0.23.1 diff --git a/get-docker.sh b/get-docker.sh new file mode 100644 index 0000000000..9a7bddb001 --- /dev/null +++ b/get-docker.sh @@ -0,0 +1,764 @@ +#!/bin/sh +set -e +# Docker Engine for Linux installation script. +# +# This script is intended as a convenient way to configure docker's package +# repositories and to install Docker Engine, This script is not recommended +# for production environments. Before running this script, make yourself familiar +# with potential risks and limitations, and refer to the installation manual +# at https://docs.docker.com/engine/install/ for alternative installation methods. +# +# The script: +# +# - Requires `root` or `sudo` privileges to run. +# - Attempts to detect your Linux distribution and version and configure your +# package management system for you. +# - Doesn't allow you to customize most installation parameters. +# - Installs dependencies and recommendations without asking for confirmation. +# - Installs the latest stable release (by default) of Docker CLI, Docker Engine, +# Docker Buildx, Docker Compose, containerd, and runc. When using this script +# to provision a machine, this may result in unexpected major version upgrades +# of these packages. Always test upgrades in a test environment before +# deploying to your production systems. +# - Isn't designed to upgrade an existing Docker installation. When using the +# script to update an existing installation, dependencies may not be updated +# to the expected version, resulting in outdated versions. +# +# Source code is available at https://github.com/docker/docker-install/ +# +# Usage +# ============================================================================== +# +# To install the latest stable versions of Docker CLI, Docker Engine, and their +# dependencies: +# +# 1. download the script +# +# $ curl -fsSL https://get.docker.com -o install-docker.sh +# +# 2. verify the script's content +# +# $ cat install-docker.sh +# +# 3. run the script with --dry-run to verify the steps it executes +# +# $ sh install-docker.sh --dry-run +# +# 4. run the script either as root, or using sudo to perform the installation. +# +# $ sudo sh install-docker.sh +# +# Command-line options +# ============================================================================== +# +# --version +# Use the --version option to install a specific version, for example: +# +# $ sudo sh install-docker.sh --version 23.0 +# +# --channel +# +# Use the --channel option to install from an alternative installation channel. +# The following example installs the latest versions from the "test" channel, +# which includes pre-releases (alpha, beta, rc): +# +# $ sudo sh install-docker.sh --channel test +# +# Alternatively, use the script at https://test.docker.com, which uses the test +# channel as default. +# +# --mirror +# +# Use the --mirror option to install from a mirror supported by this script. +# Available mirrors are "Aliyun" (https://mirrors.aliyun.com/docker-ce), and +# "AzureChinaCloud" (https://mirror.azure.cn/docker-ce), for example: +# +# $ sudo sh install-docker.sh --mirror AzureChinaCloud +# +# --setup-repo +# +# Use the --setup-repo option to configure Docker's package repositories without +# installing Docker packages. This is useful when you want to add the repository +# but install packages separately: +# +# $ sudo sh install-docker.sh --setup-repo +# +# Automatic Service Start +# +# By default, this script automatically starts the Docker daemon and enables the docker +# service after installation if systemd is used as init. +# +# If you prefer to start the service manually, use the --no-autostart option: +# +# $ sudo sh install-docker.sh --no-autostart +# +# Note: Starting the service requires appropriate privileges to manage system services. +# +# ============================================================================== + + +# Git commit from https://github.com/docker/docker-install when +# the script was uploaded (Should only be modified by upload job): +SCRIPT_COMMIT_SHA="f381ee68b32e515bb4dc034b339266aff1fbc460" + +# strip "v" prefix if present +VERSION="${VERSION#v}" + +# The channel to install from: +# * stable +# * test +DEFAULT_CHANNEL_VALUE="stable" +if [ -z "$CHANNEL" ]; then + CHANNEL=$DEFAULT_CHANNEL_VALUE +fi + +DEFAULT_DOWNLOAD_URL="https://download.docker.com" +if [ -z "$DOWNLOAD_URL" ]; then + DOWNLOAD_URL=$DEFAULT_DOWNLOAD_URL +fi + +DEFAULT_REPO_FILE="docker-ce.repo" +if [ -z "$REPO_FILE" ]; then + REPO_FILE="$DEFAULT_REPO_FILE" + # Automatically default to a staging repo fora + # a staging download url (download-stage.docker.com) + case "$DOWNLOAD_URL" in + *-stage*) REPO_FILE="docker-ce-staging.repo";; + esac +fi + +mirror='' +DRY_RUN=${DRY_RUN:-} +REPO_ONLY=${REPO_ONLY:-0} +NO_AUTOSTART=${NO_AUTOSTART:-0} +while [ $# -gt 0 ]; do + case "$1" in + --channel) + CHANNEL="$2" + shift + ;; + --dry-run) + DRY_RUN=1 + ;; + --mirror) + mirror="$2" + shift + ;; + --version) + VERSION="${2#v}" + shift + ;; + --setup-repo) + REPO_ONLY=1 + shift + ;; + --no-autostart) + NO_AUTOSTART=1 + ;; + --*) + echo "Illegal option $1" + ;; + esac + shift $(( $# > 0 ? 1 : 0 )) +done + +case "$mirror" in + Aliyun) + DOWNLOAD_URL="https://mirrors.aliyun.com/docker-ce" + ;; + AzureChinaCloud) + DOWNLOAD_URL="https://mirror.azure.cn/docker-ce" + ;; + "") + ;; + *) + >&2 echo "unknown mirror '$mirror': use either 'Aliyun', or 'AzureChinaCloud'." + exit 1 + ;; +esac + +case "$CHANNEL" in + stable|test) + ;; + *) + >&2 echo "unknown CHANNEL '$CHANNEL': use either stable or test." + exit 1 + ;; +esac + +command_exists() { + command -v "$@" > /dev/null 2>&1 +} + +# version_gte checks if the version specified in $VERSION is at least the given +# SemVer (Maj.Minor[.Patch]), or CalVer (YY.MM) version.It returns 0 (success) +# if $VERSION is either unset (=latest) or newer or equal than the specified +# version, or returns 1 (fail) otherwise. +# +# examples: +# +# VERSION=23.0 +# version_gte 23.0 // 0 (success) +# version_gte 20.10 // 0 (success) +# version_gte 19.03 // 0 (success) +# version_gte 26.1 // 1 (fail) +version_gte() { + if [ -z "$VERSION" ]; then + return 0 + fi + version_compare "$VERSION" "$1" +} + +# version_compare compares two version strings (either SemVer (Major.Minor.Path), +# or CalVer (YY.MM) version strings. It returns 0 (success) if version A is newer +# or equal than version B, or 1 (fail) otherwise. Patch releases and pre-release +# (-alpha/-beta) are not taken into account +# +# examples: +# +# version_compare 23.0.0 20.10 // 0 (success) +# version_compare 23.0 20.10 // 0 (success) +# version_compare 20.10 19.03 // 0 (success) +# version_compare 20.10 20.10 // 0 (success) +# version_compare 19.03 20.10 // 1 (fail) +version_compare() ( + set +x + + yy_a="$(echo "$1" | cut -d'.' -f1)" + yy_b="$(echo "$2" | cut -d'.' -f1)" + if [ "$yy_a" -lt "$yy_b" ]; then + return 1 + fi + if [ "$yy_a" -gt "$yy_b" ]; then + return 0 + fi + mm_a="$(echo "$1" | cut -d'.' -f2)" + mm_b="$(echo "$2" | cut -d'.' -f2)" + + # trim leading zeros to accommodate CalVer + mm_a="${mm_a#0}" + mm_b="${mm_b#0}" + + if [ "${mm_a:-0}" -lt "${mm_b:-0}" ]; then + return 1 + fi + + return 0 +) + +is_dry_run() { + if [ -z "$DRY_RUN" ]; then + return 1 + else + return 0 + fi +} + +is_wsl() { + case "$(uname -r)" in + *microsoft* ) true ;; # WSL 2 + *Microsoft* ) true ;; # WSL 1 + * ) false;; + esac +} + +is_darwin() { + case "$(uname -s)" in + *darwin* ) true ;; + *Darwin* ) true ;; + * ) false;; + esac +} + +deprecation_notice() { + distro=$1 + distro_version=$2 + echo + printf "\033[91;1mDEPRECATION WARNING\033[0m\n" + printf " This Linux distribution (\033[1m%s %s\033[0m) reached end-of-life and is no longer supported by this script.\n" "$distro" "$distro_version" + echo " No updates or security fixes will be released for this distribution, and users are recommended" + echo " to upgrade to a currently maintained version of $distro." + echo + printf "Press \033[1mCtrl+C\033[0m now to abort this script, or wait for the installation to continue." + echo + sleep 10 +} + +get_distribution() { + lsb_dist="" + # Every system that we officially support has /etc/os-release + if [ -r /etc/os-release ]; then + lsb_dist="$(. /etc/os-release && echo "$ID")" + fi + # Returning an empty string here should be alright since the + # case statements don't act unless you provide an actual value + echo "$lsb_dist" +} + +start_docker_daemon() { + # Use systemctl if available (for systemd-based systems) + if command_exists systemctl; then + is_dry_run || >&2 echo "Using systemd to manage Docker service" + if ( + is_dry_run || set -x + $sh_c systemctl enable --now docker.service 2>/dev/null + ); then + is_dry_run || echo "INFO: Docker daemon enabled and started" >&2 + else + is_dry_run || echo "WARNING: unable to enable the docker service" >&2 + fi + else + # No service management available (container environment) + if ! is_dry_run; then + >&2 echo "Note: Running in a container environment without service management" + >&2 echo "Docker daemon cannot be started automatically in this environment" + >&2 echo "The Docker packages have been installed successfully" + fi + fi + >&2 echo +} + +echo_docker_as_nonroot() { + if is_dry_run; then + return + fi + if command_exists docker && [ -e /var/run/docker.sock ]; then + ( + set -x + $sh_c 'docker version' + ) || true + fi + + # intentionally mixed spaces and tabs here -- tabs are stripped by "<<-EOF", spaces are kept in the output + echo + echo "================================================================================" + echo + if version_gte "20.10"; then + echo "To run Docker as a non-privileged user, consider setting up the" + echo "Docker daemon in rootless mode for your user:" + echo + echo " dockerd-rootless-setuptool.sh install" + echo + echo "Visit https://docs.docker.com/go/rootless/ to learn about rootless mode." + echo + fi + echo + echo "To run the Docker daemon as a fully privileged service, but granting non-root" + echo "users access, refer to https://docs.docker.com/go/daemon-access/" + echo + echo "WARNING: Access to the remote API on a privileged Docker daemon is equivalent" + echo " to root access on the host. Refer to the 'Docker daemon attack surface'" + echo " documentation for details: https://docs.docker.com/go/attack-surface/" + echo + echo "================================================================================" + echo +} + +# Check if this is a forked Linux distro +check_forked() { + + # Check for lsb_release command existence, it usually exists in forked distros + if command_exists lsb_release; then + # Check if the `-u` option is supported + set +e + lsb_release -a -u > /dev/null 2>&1 + lsb_release_exit_code=$? + set -e + + # Check if the command has exited successfully, it means we're in a forked distro + if [ "$lsb_release_exit_code" = "0" ]; then + # Print info about current distro + cat <<-EOF + You're using '$lsb_dist' version '$dist_version'. + EOF + + # Get the upstream release info + lsb_dist=$(lsb_release -a -u 2>&1 | tr '[:upper:]' '[:lower:]' | grep -E 'id' | cut -d ':' -f 2 | tr -d '[:space:]') + dist_version=$(lsb_release -a -u 2>&1 | tr '[:upper:]' '[:lower:]' | grep -E 'codename' | cut -d ':' -f 2 | tr -d '[:space:]') + + # Print info about upstream distro + cat <<-EOF + Upstream release is '$lsb_dist' version '$dist_version'. + EOF + else + if [ -r /etc/debian_version ] && [ "$lsb_dist" != "ubuntu" ] && [ "$lsb_dist" != "raspbian" ]; then + if [ "$lsb_dist" = "osmc" ]; then + # OSMC runs Raspbian + lsb_dist=raspbian + else + # We're Debian and don't even know it! + lsb_dist=debian + fi + dist_version="$(sed 's/\/.*//' /etc/debian_version | sed 's/\..*//')" + case "$dist_version" in + 13|14|forky) + dist_version="trixie" + ;; + 12) + dist_version="bookworm" + ;; + 11) + dist_version="bullseye" + ;; + 10) + dist_version="buster" + ;; + 9) + dist_version="stretch" + ;; + 8) + dist_version="jessie" + ;; + esac + fi + fi + fi +} + +do_install() { + echo "# Executing docker install script, commit: $SCRIPT_COMMIT_SHA" + + if command_exists docker; then + cat >&2 <<-'EOF' + Warning: the "docker" command appears to already exist on this system. + + If you already have Docker installed, this script can cause trouble, which is + why we're displaying this warning and provide the opportunity to cancel the + installation. + + If you installed the current Docker package using this script and are using it + again to update Docker, you can ignore this message, but be aware that the + script resets any custom changes in the deb and rpm repo configuration + files to match the parameters passed to the script. + + You may press Ctrl+C now to abort this script. + EOF + ( set -x; sleep 20 ) + fi + + user="$(id -un 2>/dev/null || true)" + + sh_c='sh -c' + if [ "$user" != 'root' ]; then + if command_exists sudo; then + sh_c='sudo -E sh -c' + elif command_exists su; then + sh_c='su -c' + else + cat >&2 <<-'EOF' + Error: this installer needs the ability to run commands as root. + We are unable to find either "sudo" or "su" available to make this happen. + EOF + exit 1 + fi + fi + + if is_dry_run; then + sh_c="echo" + fi + + # perform some very rudimentary platform detection + lsb_dist=$( get_distribution ) + lsb_dist="$(echo "$lsb_dist" | tr '[:upper:]' '[:lower:]')" + + if is_wsl; then + echo + echo "WSL DETECTED: We recommend using Docker Desktop for Windows." + echo "Please get Docker Desktop from https://www.docker.com/products/docker-desktop/" + echo + cat >&2 <<-'EOF' + + You may press Ctrl+C now to abort this script. + EOF + ( set -x; sleep 20 ) + fi + + case "$lsb_dist" in + + ubuntu) + if command_exists lsb_release; then + dist_version="$(lsb_release --codename | cut -f2)" + fi + if [ -z "$dist_version" ] && [ -r /etc/lsb-release ]; then + dist_version="$(. /etc/lsb-release && echo "$DISTRIB_CODENAME")" + fi + ;; + + debian|raspbian) + dist_version="$(sed 's/\/.*//' /etc/debian_version | sed 's/\..*//')" + case "$dist_version" in + 13) + dist_version="trixie" + ;; + 12) + dist_version="bookworm" + ;; + 11) + dist_version="bullseye" + ;; + 10) + dist_version="buster" + ;; + 9) + dist_version="stretch" + ;; + 8) + dist_version="jessie" + ;; + esac + ;; + + centos|rhel) + if [ -z "$dist_version" ] && [ -r /etc/os-release ]; then + dist_version="$(. /etc/os-release && echo "$VERSION_ID")" + fi + ;; + + *) + if command_exists lsb_release; then + dist_version="$(lsb_release --release | cut -f2)" + fi + if [ -z "$dist_version" ] && [ -r /etc/os-release ]; then + dist_version="$(. /etc/os-release && echo "$VERSION_ID")" + fi + ;; + + esac + + # Check if this is a forked Linux distro + check_forked + + # Print deprecation warnings for distro versions that recently reached EOL, + # but may still be commonly used (especially LTS versions). + case "$lsb_dist.$dist_version" in + centos.8|centos.7|rhel.7) + deprecation_notice "$lsb_dist" "$dist_version" + ;; + debian.buster|debian.stretch|debian.jessie) + deprecation_notice "$lsb_dist" "$dist_version" + ;; + raspbian.buster|raspbian.stretch|raspbian.jessie) + deprecation_notice "$lsb_dist" "$dist_version" + ;; + ubuntu.focal|ubuntu.bionic|ubuntu.xenial|ubuntu.trusty) + deprecation_notice "$lsb_dist" "$dist_version" + ;; + ubuntu.oracular|ubuntu.mantic|ubuntu.lunar|ubuntu.kinetic|ubuntu.impish|ubuntu.hirsute|ubuntu.groovy|ubuntu.eoan|ubuntu.disco|ubuntu.cosmic) + deprecation_notice "$lsb_dist" "$dist_version" + ;; + fedora.*) + if [ "$dist_version" -lt 41 ]; then + deprecation_notice "$lsb_dist" "$dist_version" + fi + ;; + esac + + # Run setup for each distro accordingly + case "$lsb_dist" in + ubuntu|debian|raspbian) + pre_reqs="ca-certificates curl" + apt_repo="deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] $DOWNLOAD_URL/linux/$lsb_dist $dist_version $CHANNEL" + ( + if ! is_dry_run; then + set -x + fi + $sh_c 'apt-get -qq update >/dev/null' + $sh_c "DEBIAN_FRONTEND=noninteractive apt-get -y -qq install $pre_reqs >/dev/null" + $sh_c 'install -m 0755 -d /etc/apt/keyrings' + $sh_c "curl -fsSL \"$DOWNLOAD_URL/linux/$lsb_dist/gpg\" -o /etc/apt/keyrings/docker.asc" + $sh_c "chmod a+r /etc/apt/keyrings/docker.asc" + $sh_c "echo \"$apt_repo\" > /etc/apt/sources.list.d/docker.list" + $sh_c 'apt-get -qq update >/dev/null' + ) + + if [ "$REPO_ONLY" = "1" ]; then + exit 0 + fi + + pkg_version="" + if [ -n "$VERSION" ]; then + if is_dry_run; then + echo "# WARNING: VERSION pinning is not supported in DRY_RUN" + else + # Will work for incomplete versions IE (17.12), but may not actually grab the "latest" if in the test channel + pkg_pattern="$(echo "$VERSION" | sed 's/-ce-/~ce~.*/g' | sed 's/-/.*/g')" + search_command="apt-cache madison docker-ce | grep '$pkg_pattern' | head -1 | awk '{\$1=\$1};1' | cut -d' ' -f 3" + pkg_version="$($sh_c "$search_command")" + echo "INFO: Searching repository for VERSION '$VERSION'" + echo "INFO: $search_command" + if [ -z "$pkg_version" ]; then + echo + echo "ERROR: '$VERSION' not found amongst apt-cache madison results" + echo + exit 1 + fi + if version_gte "18.09"; then + search_command="apt-cache madison docker-ce-cli | grep '$pkg_pattern' | head -1 | awk '{\$1=\$1};1' | cut -d' ' -f 3" + echo "INFO: $search_command" + cli_pkg_version="=$($sh_c "$search_command")" + fi + pkg_version="=$pkg_version" + fi + fi + ( + pkgs="docker-ce${pkg_version%=}" + if version_gte "18.09"; then + # older versions didn't ship the cli and containerd as separate packages + pkgs="$pkgs docker-ce-cli${cli_pkg_version%=} containerd.io" + fi + if version_gte "20.10"; then + pkgs="$pkgs docker-compose-plugin docker-ce-rootless-extras$pkg_version" + fi + if version_gte "23.0"; then + pkgs="$pkgs docker-buildx-plugin" + fi + if version_gte "28.2"; then + pkgs="$pkgs docker-model-plugin" + fi + if ! is_dry_run; then + set -x + fi + $sh_c "DEBIAN_FRONTEND=noninteractive apt-get -y -qq install $pkgs >/dev/null" + ) + if [ "$NO_AUTOSTART" != "1" ]; then + start_docker_daemon + fi + echo_docker_as_nonroot + exit 0 + ;; + centos|fedora|rhel) + if [ "$(uname -m)" = "s390x" ]; then + echo "Effective v27.5, please consult RHEL distro statement for s390x support." + exit 1 + fi + repo_file_url="$DOWNLOAD_URL/linux/$lsb_dist/$REPO_FILE" + ( + if ! is_dry_run; then + set -x + fi + if command_exists dnf5; then + $sh_c "dnf -y -q --setopt=install_weak_deps=False install dnf-plugins-core" + $sh_c "dnf5 config-manager addrepo --overwrite --save-filename=docker-ce.repo --from-repofile='$repo_file_url'" + + if [ "$CHANNEL" != "stable" ]; then + $sh_c "dnf5 config-manager setopt \"docker-ce-*.enabled=0\"" + $sh_c "dnf5 config-manager setopt \"docker-ce-$CHANNEL.enabled=1\"" + fi + $sh_c "dnf makecache" + elif command_exists dnf; then + $sh_c "dnf -y -q --setopt=install_weak_deps=False install dnf-plugins-core" + $sh_c "rm -f /etc/yum.repos.d/docker-ce.repo /etc/yum.repos.d/docker-ce-staging.repo" + $sh_c "dnf config-manager --add-repo $repo_file_url" + + if [ "$CHANNEL" != "stable" ]; then + $sh_c "dnf config-manager --set-disabled \"docker-ce-*\"" + $sh_c "dnf config-manager --set-enabled \"docker-ce-$CHANNEL\"" + fi + $sh_c "dnf makecache" + else + $sh_c "yum -y -q install yum-utils" + $sh_c "rm -f /etc/yum.repos.d/docker-ce.repo /etc/yum.repos.d/docker-ce-staging.repo" + $sh_c "yum-config-manager --add-repo $repo_file_url" + + if [ "$CHANNEL" != "stable" ]; then + $sh_c "yum-config-manager --disable \"docker-ce-*\"" + $sh_c "yum-config-manager --enable \"docker-ce-$CHANNEL\"" + fi + $sh_c "yum makecache" + fi + ) + + if [ "$REPO_ONLY" = "1" ]; then + exit 0 + fi + + pkg_version="" + if command_exists dnf; then + pkg_manager="dnf" + pkg_manager_flags="-y -q --best" + else + pkg_manager="yum" + pkg_manager_flags="-y -q" + fi + if [ -n "$VERSION" ]; then + if is_dry_run; then + echo "# WARNING: VERSION pinning is not supported in DRY_RUN" + else + if [ "$lsb_dist" = "fedora" ]; then + pkg_suffix="fc$dist_version" + else + pkg_suffix="el" + fi + pkg_pattern="$(echo "$VERSION" | sed 's/-ce-/\\\\.ce.*/g' | sed 's/-/.*/g').*$pkg_suffix" + search_command="$pkg_manager list --showduplicates docker-ce | grep '$pkg_pattern' | tail -1 | awk '{print \$2}'" + pkg_version="$($sh_c "$search_command")" + echo "INFO: Searching repository for VERSION '$VERSION'" + echo "INFO: $search_command" + if [ -z "$pkg_version" ]; then + echo + echo "ERROR: '$VERSION' not found amongst $pkg_manager list results" + echo + exit 1 + fi + if version_gte "18.09"; then + # older versions don't support a cli package + search_command="$pkg_manager list --showduplicates docker-ce-cli | grep '$pkg_pattern' | tail -1 | awk '{print \$2}'" + cli_pkg_version="$($sh_c "$search_command" | cut -d':' -f 2)" + fi + # Cut out the epoch and prefix with a '-' + pkg_version="-$(echo "$pkg_version" | cut -d':' -f 2)" + fi + fi + ( + pkgs="docker-ce$pkg_version" + if version_gte "18.09"; then + # older versions didn't ship the cli and containerd as separate packages + if [ -n "$cli_pkg_version" ]; then + pkgs="$pkgs docker-ce-cli-$cli_pkg_version containerd.io" + else + pkgs="$pkgs docker-ce-cli containerd.io" + fi + fi + if version_gte "20.10"; then + pkgs="$pkgs docker-compose-plugin docker-ce-rootless-extras$pkg_version" + fi + if version_gte "23.0"; then + pkgs="$pkgs docker-buildx-plugin docker-model-plugin" + fi + if ! is_dry_run; then + set -x + fi + $sh_c "$pkg_manager $pkg_manager_flags install $pkgs" + ) + if [ "$NO_AUTOSTART" != "1" ]; then + start_docker_daemon + fi + echo_docker_as_nonroot + exit 0 + ;; + sles) + echo "Effective v27.5, please consult SLES distro statement for s390x support." + exit 1 + ;; + *) + if [ -z "$lsb_dist" ]; then + if is_darwin; then + echo + echo "ERROR: Unsupported operating system 'macOS'" + echo "Please get Docker Desktop from https://www.docker.com/products/docker-desktop" + echo + exit 1 + fi + fi + echo + echo "ERROR: Unsupported distribution '$lsb_dist'" + echo + exit 1 + ;; + esac + exit 1 +} + +# wrapped up in a function so that we have some protection against only getting +# half the file during "curl | sh" +do_install diff --git a/get_helm.sh b/get_helm.sh new file mode 100644 index 0000000000..1c90bbad5b --- /dev/null +++ b/get_helm.sh @@ -0,0 +1,347 @@ +#!/usr/bin/env bash + +# Copyright The Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The install script is based off of the MIT-licensed script from glide, +# the package manager for Go: https://github.com/Masterminds/glide.sh/blob/master/get + +: ${BINARY_NAME:="helm"} +: ${USE_SUDO:="true"} +: ${DEBUG:="false"} +: ${VERIFY_CHECKSUM:="true"} +: ${VERIFY_SIGNATURES:="false"} +: ${HELM_INSTALL_DIR:="/usr/local/bin"} +: ${GPG_PUBRING:="pubring.kbx"} + +HAS_CURL="$(type "curl" &> /dev/null && echo true || echo false)" +HAS_WGET="$(type "wget" &> /dev/null && echo true || echo false)" +HAS_OPENSSL="$(type "openssl" &> /dev/null && echo true || echo false)" +HAS_GPG="$(type "gpg" &> /dev/null && echo true || echo false)" +HAS_GIT="$(type "git" &> /dev/null && echo true || echo false)" +HAS_TAR="$(type "tar" &> /dev/null && echo true || echo false)" + +# initArch discovers the architecture for this system. +initArch() { + ARCH=$(uname -m) + case $ARCH in + armv5*) ARCH="armv5";; + armv6*) ARCH="armv6";; + armv7*) ARCH="arm";; + aarch64) ARCH="arm64";; + x86) ARCH="386";; + x86_64) ARCH="amd64";; + i686) ARCH="386";; + i386) ARCH="386";; + esac +} + +# initOS discovers the operating system for this system. +initOS() { + OS=$(echo `uname`|tr '[:upper:]' '[:lower:]') + + case "$OS" in + # Minimalist GNU for Windows + mingw*|cygwin*) OS='windows';; + esac +} + +# runs the given command as root (detects if we are root already) +runAsRoot() { + if [ $EUID -ne 0 -a "$USE_SUDO" = "true" ]; then + sudo "${@}" + else + "${@}" + fi +} + +# verifySupported checks that the os/arch combination is supported for +# binary builds, as well whether or not necessary tools are present. +verifySupported() { + local supported="darwin-amd64\ndarwin-arm64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nlinux-loong64\nlinux-ppc64le\nlinux-s390x\nlinux-riscv64\nwindows-amd64\nwindows-arm64" + if ! echo "${supported}" | grep -q "${OS}-${ARCH}"; then + echo "No prebuilt binary for ${OS}-${ARCH}." + echo "To build from source, go to https://github.com/helm/helm" + exit 1 + fi + + if [ "${HAS_CURL}" != "true" ] && [ "${HAS_WGET}" != "true" ]; then + echo "Either curl or wget is required" + exit 1 + fi + + if [ "${VERIFY_CHECKSUM}" == "true" ] && [ "${HAS_OPENSSL}" != "true" ]; then + echo "In order to verify checksum, openssl must first be installed." + echo "Please install openssl or set VERIFY_CHECKSUM=false in your environment." + exit 1 + fi + + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + if [ "${HAS_GPG}" != "true" ]; then + echo "In order to verify signatures, gpg must first be installed." + echo "Please install gpg or set VERIFY_SIGNATURES=false in your environment." + exit 1 + fi + if [ "${OS}" != "linux" ]; then + echo "Signature verification is currently only supported on Linux." + echo "Please set VERIFY_SIGNATURES=false or verify the signatures manually." + exit 1 + fi + fi + + if [ "${HAS_GIT}" != "true" ]; then + echo "[WARNING] Could not find git. It is required for plugin installation." + fi + + if [ "${HAS_TAR}" != "true" ]; then + echo "[ERROR] Could not find tar. It is required to extract the helm binary archive." + exit 1 + fi +} + +# checkDesiredVersion checks if the desired version is available. +checkDesiredVersion() { + if [ "x$DESIRED_VERSION" == "x" ]; then + # Get tag from release URL + local latest_release_url="https://get.helm.sh/helm4-latest-version" + local latest_release_response="" + if [ "${HAS_CURL}" == "true" ]; then + latest_release_response=$( curl -L --silent --show-error --fail "$latest_release_url" 2>&1 || true ) + elif [ "${HAS_WGET}" == "true" ]; then + latest_release_response=$( wget "$latest_release_url" -q -O - 2>&1 || true ) + fi + TAG=$( echo "$latest_release_response" | grep '^v[0-9]' ) + if [ "x$TAG" == "x" ]; then + printf "Could not retrieve the latest release tag information from %s: %s\n" "${latest_release_url}" "${latest_release_response}" + exit 1 + fi + else + TAG=$DESIRED_VERSION + fi +} + +# checkHelmInstalledVersion checks which version of helm is installed and +# if it needs to be changed. +checkHelmInstalledVersion() { + if [[ -f "${HELM_INSTALL_DIR}/${BINARY_NAME}" ]]; then + local version=$("${HELM_INSTALL_DIR}/${BINARY_NAME}" version --template="{{ .Version }}") + if [[ "$version" == "$TAG" ]]; then + echo "Helm ${version} is already ${DESIRED_VERSION:-latest}" + return 0 + else + echo "Helm ${TAG} is available. Changing from version ${version}." + return 1 + fi + else + return 1 + fi +} + +# downloadFile downloads the latest binary package and also the checksum +# for that binary. +downloadFile() { + HELM_DIST="helm-$TAG-$OS-$ARCH.tar.gz" + DOWNLOAD_URL="https://get.helm.sh/$HELM_DIST" + CHECKSUM_URL="$DOWNLOAD_URL.sha256" + HELM_TMP_ROOT="$(mktemp -dt helm-installer-XXXXXX)" + HELM_TMP_FILE="$HELM_TMP_ROOT/$HELM_DIST" + HELM_SUM_FILE="$HELM_TMP_ROOT/$HELM_DIST.sha256" + echo "Downloading $DOWNLOAD_URL" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "$CHECKSUM_URL" -o "$HELM_SUM_FILE" + curl -SsL "$DOWNLOAD_URL" -o "$HELM_TMP_FILE" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "$HELM_SUM_FILE" "$CHECKSUM_URL" + wget -q -O "$HELM_TMP_FILE" "$DOWNLOAD_URL" + fi +} + +# verifyFile verifies the SHA256 checksum of the binary package +# and the GPG signatures for both the package and checksum file +# (depending on settings in environment). +verifyFile() { + if [ "${VERIFY_CHECKSUM}" == "true" ]; then + verifyChecksum + fi + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + verifySignatures + fi +} + +# installFile installs the Helm binary. +installFile() { + HELM_TMP="$HELM_TMP_ROOT/$BINARY_NAME" + mkdir -p "$HELM_TMP" + tar xf "$HELM_TMP_FILE" -C "$HELM_TMP" + HELM_TMP_BIN="$HELM_TMP/$OS-$ARCH/helm" + echo "Preparing to install $BINARY_NAME into ${HELM_INSTALL_DIR}" + runAsRoot cp "$HELM_TMP_BIN" "$HELM_INSTALL_DIR/$BINARY_NAME" + echo "$BINARY_NAME installed into $HELM_INSTALL_DIR/$BINARY_NAME" +} + +# verifyChecksum verifies the SHA256 checksum of the binary package. +verifyChecksum() { + printf "Verifying checksum... " + local sum=$(openssl sha1 -sha256 ${HELM_TMP_FILE} | awk '{print $2}') + local expected_sum=$(cat ${HELM_SUM_FILE}) + if [ "$sum" != "$expected_sum" ]; then + echo "SHA sum of ${HELM_TMP_FILE} does not match. Aborting." + exit 1 + fi + echo "Done." +} + +# verifySignatures obtains the latest KEYS file from GitHub main branch +# as well as the signature .asc files from the specific GitHub release, +# then verifies that the release artifacts were signed by a maintainer's key. +verifySignatures() { + printf "Verifying signatures... " + local keys_filename="KEYS" + local github_keys_url="https://raw.githubusercontent.com/helm/helm/main/${keys_filename}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_keys_url}" -o "${HELM_TMP_ROOT}/${keys_filename}" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/${keys_filename}" "${github_keys_url}" + fi + local gpg_keyring="${HELM_TMP_ROOT}/keyring.gpg" + local gpg_homedir="${HELM_TMP_ROOT}/gnupg" + mkdir -p -m 0700 "${gpg_homedir}" + local gpg_stderr_device="/dev/null" + if [ "${DEBUG}" == "true" ]; then + gpg_stderr_device="/dev/stderr" + fi + gpg --batch --quiet --homedir="${gpg_homedir}" --import "${HELM_TMP_ROOT}/${keys_filename}" 2> "${gpg_stderr_device}" + gpg --batch --no-default-keyring --keyring "${gpg_homedir}/${GPG_PUBRING}" --export > "${gpg_keyring}" + local github_release_url="https://github.com/helm/helm/releases/download/${TAG}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + fi + local error_text="If you think this might be a potential security issue," + error_text="${error_text}\nplease see here: https://github.com/helm/community/blob/master/SECURITY.md" + local num_goodlines_sha=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_sha} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256!" + echo -e "${error_text}" + exit 1 + fi + local num_goodlines_tar=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_tar} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz!" + echo -e "${error_text}" + exit 1 + fi + echo "Done." +} + +# fail_trap is executed if an error occurs. +fail_trap() { + result=$? + if [ "$result" != "0" ]; then + if [[ -n "$INPUT_ARGUMENTS" ]]; then + echo "Failed to install $BINARY_NAME with the arguments provided: $INPUT_ARGUMENTS" + help + else + echo "Failed to install $BINARY_NAME" + fi + echo -e "\tFor support, go to https://github.com/helm/helm." + fi + cleanup + exit $result +} + +# testVersion tests the installed client to make sure it is working. +testVersion() { + set +e + HELM="$(command -v $BINARY_NAME)" + if [ "$?" = "1" ]; then + echo "$BINARY_NAME not found. Is $HELM_INSTALL_DIR on your "'$PATH?' + exit 1 + fi + set -e +} + +# help provides possible cli installation arguments +help () { + echo "Accepted cli arguments are:" + echo -e "\t[--help|-h ] ->> prints this help" + echo -e "\t[--version|-v ] . When not defined it fetches the latest release tag from the Helm CDN" + echo -e "\te.g. --version v4.0.0 or -v canary" + echo -e "\t[--no-sudo] ->> install without sudo" +} + +# cleanup temporary files to avoid https://github.com/helm/helm/issues/2977 +cleanup() { + if [[ -d "${HELM_TMP_ROOT:-}" ]]; then + rm -rf "$HELM_TMP_ROOT" + fi +} + +# Execution + +#Stop execution on any error +trap "fail_trap" EXIT +set -e + +# Set debug if desired +if [ "${DEBUG}" == "true" ]; then + set -x +fi + +# Parsing input arguments (if any) +export INPUT_ARGUMENTS="${@}" +set -u +while [[ $# -gt 0 ]]; do + case $1 in + '--version'|-v) + shift + if [[ $# -ne 0 ]]; then + export DESIRED_VERSION="${1}" + if [[ "$1" != "v"* ]]; then + echo "Expected version arg ('${DESIRED_VERSION}') to begin with 'v', fixing..." + export DESIRED_VERSION="v${1}" + fi + else + echo -e "Please provide the desired version. e.g. --version v4.0.0 or -v canary" + exit 0 + fi + ;; + '--no-sudo') + USE_SUDO="false" + ;; + '--help'|-h) + help + exit 0 + ;; + *) exit 1 + ;; + esac + shift +done +set +u + +initArch +initOS +verifySupported +checkDesiredVersion +if ! checkHelmInstalledVersion; then + downloadFile + verifyFile + installFile +fi +testVersion +cleanup diff --git a/k8s/HELM.md b/k8s/HELM.md new file mode 100644 index 0000000000..b55e73dc21 --- /dev/null +++ b/k8s/HELM.md @@ -0,0 +1,1331 @@ +# Lab 10 — Helm Package Manager + +This report documents the Helm implementation for the Kubernetes manifests created in Lab 09. + +--- + +## 1. Helm Fundamentals & Setup + +### 1.1 What Helm gives us +Helm is the package manager for Kubernetes. In this lab, Helm improves the Lab 09 manifests in four major ways: + +1. **Templating** — the same manifests can be reused in multiple environments. +2. **Values-driven configuration** — image, replicas, ports, resources, and probes are configurable without editing YAML templates. +3. **Release management** — Helm tracks installs, upgrades, rollbacks, and uninstall operations. +4. **Lifecycle hooks** — pre-install and post-install jobs can validate configuration and run smoke checks. + +### 1.2 Helm installation in WSL +Recommended installation method for WSL/Linux: + +```bash +curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-4 +chmod 700 get_helm.sh +./get_helm.sh +helm version +``` + +**Evidence — Helm version:** + +```text +version.BuildInfo{Version:"v4.1.3", GitCommit:"c94d381b03be117e7e57908edbf642104e00eb8f", GitTreeState:"clean", GoVersion:"go1.25.8", KubeClientVersion:"v1.35"} +``` + +### 1.3 Public chart repositories explored +Commands used: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update +helm search repo prometheus +helm show chart prometheus-community/prometheus +``` + +**Evidence — repository search:** + +```text +NAME CHART VERSION APP VERSION DESCRIPTION +prometheus-community/kube-prometheus-stack 82.16.1 v0.89.0 kube-prometheus-stack collects Kubernetes manif... +prometheus-community/prometheus 28.15.0 v3.11.0 Prometheus is a monitoring system and time seri... +prometheus-community/prometheus-adapter 5.3.0 v0.12.0 A Helm chart for k8s prometheus adapter +prometheus-community/prometheus-blackbox-exporter 11.9.1 v0.28.0 Prometheus Blackbox Exporter +prometheus-community/prometheus-cloudwatch-expo... 0.28.1 0.16.0 A Helm chart for prometheus cloudwatch-exporter +prometheus-community/prometheus-conntrack-stats... 0.5.35 v0.4.42 A Helm chart for conntrack-stats-exporter +prometheus-community/prometheus-consul-exporter 1.1.1 v0.13.0 A Helm chart for the Prometheus Consul Exporter +prometheus-community/prometheus-couchdb-exporter 1.0.1 1.0 A Helm chart to export the metrics from couchdb... +prometheus-community/prometheus-druid-exporter 1.2.0 v0.11.0 Druid exporter to monitor druid metrics with Pr... +prometheus-community/prometheus-elasticsearch-e... 7.2.1 v1.10.0 Elasticsearch stats exporter for Prometheus +prometheus-community/prometheus-fastly-exporter 0.11.0 v10.2.0 A Helm chart for the Prometheus Fastly Exporter +prometheus-community/prometheus-ipmi-exporter 0.8.0 v1.10.1 This is an IPMI exporter for Prometheus. +prometheus-community/prometheus-json-exporter 0.19.2 v0.7.0 Install prometheus-json-exporter +prometheus-community/prometheus-kafka-exporter 3.0.1 v1.9.0 A Helm chart to export metrics from Kafka in Pr... +prometheus-community/prometheus-memcached-exporter 0.4.5 v0.15.5 Prometheus exporter for Memcached metrics +prometheus-community/prometheus-modbus-exporter 0.1.4 0.4.1 A Helm chart for prometheus-modbus-exporter +prometheus-community/prometheus-mongodb-exporter 3.18.0 0.49.0 A Prometheus exporter for MongoDB metrics +prometheus-community/prometheus-mysql-exporter 2.13.0 v0.19.0 A Helm chart for prometheus mysql exporter with... +prometheus-community/prometheus-nats-exporter 2.22.1 0.19.2 A Helm chart for prometheus-nats-exporter +prometheus-community/prometheus-nginx-exporter 1.20.8 1.5.1 A Helm chart for NGINX Prometheus Exporter +prometheus-community/prometheus-node-exporter 4.52.2 1.10.2 A Helm chart for prometheus node-exporter +prometheus-community/prometheus-opencost-exporter 0.1.2 1.108.0 Prometheus OpenCost Exporter +prometheus-community/prometheus-operator 9.3.2 0.38.1 DEPRECATED - This chart will be renamed. See ht... +prometheus-community/prometheus-operator-admiss... 0.38.0 0.90.1 Prometheus Operator Admission Webhook +prometheus-community/prometheus-operator-crds 28.0.1 v0.90.1 A Helm chart that collects custom resource defi... +prometheus-community/prometheus-pgbouncer-exporter 0.10.0 v0.12.0 A Helm chart for prometheus pgbouncer-exporter +prometheus-community/prometheus-pingdom-exporter 3.4.2 v0.5.6 A Helm chart for Prometheus Pingdom Exporter +prometheus-community/prometheus-pingmesh-exporter 0.4.3 v1.2.2 Prometheus Pingmesh Exporter +prometheus-community/prometheus-postgres-exporter 7.5.2 v0.19.1 A Helm chart for prometheus postgres-exporter +prometheus-community/prometheus-pushgateway 3.6.0 v1.11.2 A Helm chart for prometheus pushgateway +prometheus-community/prometheus-rabbitmq-exporter 2.1.2 1.0.0 Rabbitmq metrics exporter for prometheus +prometheus-community/prometheus-redis-exporter 6.22.0 v1.82.0 Prometheus exporter for Redis metrics +prometheus-community/prometheus-smartctl-exporter 0.16.0 v0.14.0 A Helm chart for Kubernetes +prometheus-community/prometheus-snmp-exporter 9.13.1 v0.30.1 Prometheus SNMP Exporter +prometheus-community/prometheus-sql-exporter 0.5.0 v0.8 Prometheus SQL Exporter +prometheus-community/prometheus-stackdriver-exp... 4.12.2 v0.18.0 Stackdriver exporter for Prometheus +prometheus-community/prometheus-statsd-exporter 1.0.0 v0.28.0 A Helm chart for prometheus stats-exporter +prometheus-community/prometheus-systemd-exporter 0.5.2 0.7.0 A Helm chart for prometheus systemd-exporter +prometheus-community/prometheus-to-sd 0.5.1 v0.9.2 Scrape metrics stored in prometheus format and ... +prometheus-community/prometheus-windows-exporter 0.12.6 0.31.6 A Helm chart for prometheus windows-exporter +prometheus-community/prometheus-yet-another-clo... 0.43.0 v0.64.0 Yace - Yet Another CloudWatch Exporter +prometheus-community/alertmanager 1.34.0 v0.31.1 The Alertmanager handles alerts sent by client ... +prometheus-community/alertmanager-snmp-notifier 2.1.0 v2.1.0 The SNMP Notifier handles alerts coming from Pr... +prometheus-community/jiralert 1.8.2 v1.3.0 A Helm chart for Kubernetes to install jiralert +prometheus-community/kube-state-metrics 7.2.2 2.18.0 Install kube-state-metrics to generate and expo... +prometheus-community/prom-label-proxy 0.18.0 v0.12.1 A proxy that enforces a given label in a given ... +prometheus-community/yet-another-cloudwatch-exp... 0.39.1 v0.62.1 Yace - Yet Another CloudWatch Exporter +grafana/cloudcost-exporter 1.1.2 0.25.0 Cloud Cost Exporter exports cloud provider agno... +grafana/loki 2.16.0 v2.6.1 Loki: like Prometheus, but for logs. +grafana/loki-stack 2.10.3 v2.9.3 Loki: like Prometheus, but for logs. +grafana/snyk-exporter 0.1.0 v1.4.1 Prometheus exporter for Snyk. +``` + +**Evidence — public chart inspection:** + +```text +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts + - name: Upstream Project + url: https://github.com/prometheus/prometheus +apiVersion: v2 +appVersion: v3.11.0 +dependencies: +- condition: alertmanager.enabled + name: alertmanager + repository: https://prometheus-community.github.io/helm-charts + version: 1.34.* +- condition: kube-state-metrics.enabled + name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 7.2.* +- condition: prometheus-node-exporter.enabled + name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.52.* +- condition: prometheus-pushgateway.enabled + name: prometheus-pushgateway + repository: https://prometheus-community.github.io/helm-charts + version: 3.6.* +description: Prometheus is a monitoring system and time series database. +home: https://prometheus.io/ +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +keywords: +- monitoring +- prometheus +kubeVersion: '>=1.19.0-0' +maintainers: +- email: gianrubio@gmail.com + name: gianrubio + url: https://github.com/gianrubio +- email: zanhsieh@gmail.com + name: zanhsieh + url: https://github.com/zanhsieh +- email: miroslav.hadzhiev@gmail.com + name: Xtigyro + url: https://github.com/Xtigyro +- email: naseem@transit.app + name: naseemkullah + url: https://github.com/naseemkullah +- email: rootsandtrees@posteo.de + name: zeritti + url: https://github.com/zeritti +name: prometheus +sources: +- https://github.com/prometheus/alertmanager +- https://github.com/prometheus/prometheus +- https://github.com/prometheus/pushgateway +- https://github.com/prometheus/node_exporter +- https://github.com/kubernetes/kube-state-metrics +type: application +version: 28.15.0 +``` + +--- + +## 2. Chart Overview + +### 2.1 Implemented chart structure +I created the following Helm structure inside `k8s/`: + +```text +k8s/ +├── HELM.md +├── common-lib/ +│ ├── Chart.yaml +│ └── templates/ +│ └── _helpers.tpl +├── devops-info-service/ +│ ├── Chart.yaml +│ ├── values.yaml +│ ├── values-dev.yaml +│ ├── values-prod.yaml +│ ├── charts/ +│ │ └── common-lib/ +│ └── templates/ +│ ├── _helpers.tpl +│ ├── deployment.yaml +│ ├── service.yaml +│ ├── NOTES.txt +│ └── hooks/ +│ ├── pre-install-job.yaml +│ └── post-install-job.yaml +└── devops-info-service-app2/ + ├── Chart.yaml + ├── values.yaml + ├── charts/ + │ └── common-lib/ + └── templates/ + ├── _helpers.tpl + ├── deployment.yaml + ├── service.yaml + ├── NOTES.txt + └── hooks/ + ├── pre-install-job.yaml + └── post-install-job.yaml +``` + +### 2.2 Main chart purpose +The main application chart is `k8s/devops-info-service/`. + +It converts the Lab 09 Kubernetes resources into Helm templates: +- `templates/deployment.yaml` — primary application Deployment +- `templates/service.yaml` — Service exposing the application +- `templates/hooks/pre-install-job.yaml` — validation Job before install +- `templates/hooks/post-install-job.yaml` — smoke-test Job after install +- `values.yaml` — shared default configuration +- `values-dev.yaml` — development overrides +- `values-prod.yaml` — production overrides + +### 2.3 Values organization strategy +The values file is organized by concern: +- `image.*` — image repository, tag, pull policy +- `service.*` — type, ports, NodePort configuration +- `resources.*` — CPU and memory requests/limits +- `env.*` — application runtime variables +- `readinessProbe.*` and `livenessProbe.*` — health-check parameters +- `hooks.*` — hook image, weights, and wait timing + +### 2.4 Helper strategy +The charts use a shared library chart named `common-lib`. + +The library chart centralizes: +- chart name generation +- fullname generation +- chart labels +- selector labels + +Each application chart also contains a small local `_helpers.tpl` file with wrapper templates, so the application templates remain readable and consistent. + +--- + +## 3. Configuration Guide + +### 3.1 Important values + +| Value | Purpose | Default | +|------|---------|---------| +| `replicaCount` | Number of Pod replicas | `3` | +| `image.repository` | Container image repository | `dorley174/devops-info-service` | +| `image.tag` | Container image tag | `latest` | +| `service.type` | Service exposure mode | `NodePort` | +| `service.port` | Service port | `80` | +| `service.targetPort` | Target container port | `5000` | +| `service.nodePort` | Fixed NodePort for dev/local access | `30080` | +| `resources.requests.*` | Guaranteed resources | `100m / 128Mi` | +| `resources.limits.*` | Hard resource limits | `250m / 256Mi` | +| `env.appVariant` | Application variant identifier | `app1` | +| `env.appMessage` | Human-readable deployment message | `Lab 10 Helm deployment` | +| `env.serviceVersion` | Version string exposed by the app | `lab10-v1` | +| `readinessProbe.*` | Readiness check configuration | `/ready` | +| `livenessProbe.*` | Liveness check configuration | `/health` | +| `hooks.preInstall.*` | Pre-install validation Job settings | enabled | +| `hooks.postInstall.*` | Post-install smoke-test Job settings | enabled | + +### 3.2 Environment-specific values + +#### Development profile (`values-dev.yaml`) +- `replicaCount: 1` +- `service.type: NodePort` +- smaller resource requests and limits +- faster probe timings +- dev-specific application message and service version + +#### Production profile (`values-prod.yaml`) +- `replicaCount: 3` +- `service.type: LoadBalancer` +- higher requests and limits +- slower and more realistic probe timings +- prod-specific application message and service version + +### 3.3 Rendering and installation commands +Render with defaults: + +```bash +helm template devops-app ./k8s/devops-info-service +``` + +Render with development values: + +```bash +helm template devops-app ./k8s/devops-info-service -f ./k8s/devops-info-service/values-dev.yaml +``` + +Install development release: + +```bash +helm install devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + --create-namespace \ + -f ./k8s/devops-info-service/values-dev.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait +``` + +Upgrade the same release to production: + +```bash +helm upgrade devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + -f ./k8s/devops-info-service/values-prod.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait +``` + +--- + +## 4. Hook Implementation + +### 4.1 Implemented hooks +Two lifecycle hooks were implemented in the main chart: + +1. **Pre-install hook** + - resource type: `Job` + - purpose: validate that required chart values are present before installation + - annotation: `helm.sh/hook: pre-install` + - weight: `-5` + +2. **Post-install hook** + - resource type: `Job` + - purpose: run a smoke test against the deployed Service `/ready` endpoint + - annotation: `helm.sh/hook: post-install` + - weight: `5` + +### 4.2 Execution order +Hook execution order is controlled with hook weights: +- lower weight runs first +- `pre-install` uses `-5` +- `post-install` uses `5` + +### 4.3 Deletion policy +Both hooks use this delete policy: + +```text +before-hook-creation,hook-succeeded +``` + +This means: +- the previous hook resource is removed before a new hook is created +- successful hook Jobs are automatically deleted after completion + +### 4.4 Important operational note +The post-install smoke-test hook is most reliable when the release is installed with `--wait`, because Helm will wait for regular resources to become ready before executing `post-install`. + +--- + +## 5. Installation Evidence + +### 5.1 Local chart validation +Commands used: + +```bash +helm lint ./k8s/devops-info-service +helm template devops-app ./k8s/devops-info-service > /tmp/devops-app-rendered.yaml +helm install --dry-run --debug devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + --create-namespace \ + -f ./k8s/devops-info-service/values-dev.yaml +``` + +**Evidence — `helm lint`:** + +```text +==> Linting ./k8s/devops-info-service +[INFO] Chart.yaml: icon is recommended + +1 chart(s) linted, 0 chart(s) failed +``` + +**Evidence — `helm template` verification:** + +```text +--- +# Source: devops-info-service/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: devops-app-devops-info-service + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + +spec: + type: NodePort + selector: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 5000 + + nodePort: 30080 +--- +# Source: devops-info-service/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-app-devops-info-service + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + +spec: + replicas: 3 + revisionHistoryLimit: 10 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + template: + metadata: + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: devops-info-service + image: "dorley174/devops-info-service:latest" + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + - name: APP_VARIANT + value: "app1" + - name: APP_MESSAGE + value: "Lab 10 Helm deployment" + - name: SERVICE_VERSION + value: "lab10-v1" + resources: + limits: + cpu: 250m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true +--- +# Source: devops-info-service/templates/hooks/post-install-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: devops-app-devops-info-service-post-install + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "5" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app +``` + +**Evidence — dry-run/debug output:** + +```text +level=WARN msg="--dry-run is deprecated and should be replaced with '--dry-run=client'" +level=DEBUG msg="Original chart version" version="" +level=DEBUG msg="Chart path" path=/mnt/c/DevOps/DevOps-Core-Course/k8s/devops-info-service +level=DEBUG msg="number of dependencies in the chart" chart=devops-info-service dependencies=1 +level=DEBUG msg="number of dependencies in the chart" chart=common-lib dependencies=0 +NAME: devops-app +LAST DEPLOYED: Thu Apr 2 21:17:47 2026 +NAMESPACE: devops-lab10 +STATUS: pending-install +REVISION: 1 +DESCRIPTION: Dry run complete +TEST SUITE: None +USER-SUPPLIED VALUES: +env: + appMessage: Lab 10 Helm development deployment + serviceVersion: lab10-dev +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 +readinessProbe: + initialDelaySeconds: 3 + periodSeconds: 5 +replicaCount: 1 +resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 50m + memory: 64Mi +service: + nodePort: 30080 + port: 80 + targetPort: 5000 + type: NodePort + +COMPUTED VALUES: +common-lib: + global: {} +commonLabels: {} +component: web +containerPort: 5000 +env: + appMessage: Lab 10 Helm development deployment + appVariant: app1 + debug: "False" + port: "5000" + serviceVersion: lab10-dev +fullnameOverride: "" +hooks: + image: + pullPolicy: IfNotPresent + repository: busybox + tag: "1.36" + postInstall: + enabled: true + sleepSeconds: 5 + weight: 5 + preInstall: + enabled: true + sleepSeconds: 3 + weight: -5 +image: + pullPolicy: IfNotPresent + repository: dorley174/devops-info-service + tag: latest +livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 5 + path: /health + periodSeconds: 10 + port: http + timeoutSeconds: 2 +nameOverride: "" +partOf: devops-core-course +readinessProbe: + failureThreshold: 3 + initialDelaySeconds: 3 + path: /ready + periodSeconds: 5 + port: http + timeoutSeconds: 2 +replicaCount: 1 +resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 50m + memory: 64Mi +revisionHistoryLimit: 10 +service: + nodePort: 30080 + port: 80 + targetPort: 5000 + type: NodePort +strategy: + maxSurge: 1 + maxUnavailable: 0 + +HOOKS: +--- +# Source: devops-info-service/templates/hooks/post-install-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: devops-app-devops-info-service-post-install + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "5" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + spec: + restartPolicy: Never + containers: + - name: post-install-smoke-test + image: busybox:1.36 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - >- + echo "[post-install] running smoke test against devops-app-devops-info-service" && + sleep 5 && + wget -qO- http://devops-app-devops-info-service:80/ready && + echo && + echo "[post-install] smoke test passed" +--- +# Source: devops-info-service/templates/hooks/pre-install-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: devops-app-devops-info-service-pre-install + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "-5" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + spec: + restartPolicy: Never + containers: + - name: pre-install-validation + image: busybox:1.36 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - >- + echo "[pre-install] validating chart values for devops-app-devops-info-service" && + test -n "dorley174/devops-info-service" && + echo "[pre-install] image repository is set" && + sleep 3 && + echo "[pre-install] validation completed" +MANIFEST: +--- +# Source: devops-info-service/templates/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: devops-app-devops-info-service + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + +spec: + type: NodePort + selector: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 5000 + + nodePort: 30080 +--- +# Source: devops-info-service/templates/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-app-devops-info-service + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + +spec: + replicas: 1 + revisionHistoryLimit: 10 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + template: + metadata: + labels: + helm.sh/chart: devops-info-service-0.1.0 + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/instance: devops-app + app.kubernetes.io/version: "latest" + app.kubernetes.io/managed-by: Helm + + app.kubernetes.io/component: "web" + + + app.kubernetes.io/part-of: "devops-core-course" + + + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: devops-info-service + image: "dorley174/devops-info-service:latest" + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + - name: APP_VARIANT + value: "app1" + - name: APP_MESSAGE + value: "Lab 10 Helm development deployment" + - name: SERVICE_VERSION + value: "lab10-dev" + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 50m + memory: 64Mi + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + +NOTES: +1. Get the release status: + helm status devops-app -n devops-lab10 + +2. Inspect the rendered Service: + kubectl get svc devops-app-devops-info-service -n devops-lab10 + +3. For local verification through port-forward: + kubectl port-forward -n devops-lab10 service/devops-app-devops-info-service 8080:80 + curl http://127.0.0.1:8080/health + curl http://127.0.0.1:8080/ready +``` + +During the first deployment attempts, two local environment issues were encountered. First, the Minikube kubeconfig context was stale and pointed to an outdated API server endpoint, which caused a cluster reachability error. After restarting Minikube and fixing the context, the next installation attempt failed because NodePort `30080` was already used by the Lab 09 Service in the `devops-lab09` namespace. To avoid modifying the previous lab environment, the Helm release was installed with `--set service.nodePort=30081`. A final adjustment was required for the `post-install` hook: the smoke-test Job initially ran too early, so the hook delay was increased with `--set hooks.postInstall.sleepSeconds=20`. + +### 5.2 Development installation evidence +Commands used: + +```bash +helm install devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + --create-namespace \ + -f ./k8s/devops-info-service/values-dev.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait + +helm list -n devops-lab10 +kubectl get all -n devops-lab10 +kubectl describe deployment devops-app-devops-info-service -n devops-lab10 +kubectl get svc devops-app-devops-info-service -n devops-lab10 +``` + +**Evidence — `helm install`:** + +```text +NAME: devops-app +LAST DEPLOYED: Thu Apr 2 21:37:40 2026 +NAMESPACE: devops-lab10 +STATUS: deployed +REVISION: 1 +DESCRIPTION: Install complete +TEST SUITE: None +NOTES: +1. Get the release status: + helm status devops-app -n devops-lab10 + +2. Inspect the rendered Service: + kubectl get svc devops-app-devops-info-service -n devops-lab10 + +3. For local verification through port-forward: + kubectl port-forward -n devops-lab10 service/devops-app-devops-info-service 8080:80 + curl http://127.0.0.1:8080/health + curl http://127.0.0.1:8080/ready +``` + +**Evidence — `helm list`:** + +```text +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +devops-app devops-lab10 1 2026-04-02 21:37:40.003860076 +0300 MSK deployed devops-info-service-0.1.0 latest +``` + +**Evidence — `kubectl get all`:** + +```text +NAME READY STATUS RESTARTS AGE +pod/devops-app-devops-info-service-b9966ddc4-49k97 1/1 Running 0 101s + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/devops-app-devops-info-service NodePort 10.108.253.122 80:30081/TCP 102s + +NAME READY UP-TO-DATE AVAILABLE AGE +deployment.apps/devops-app-devops-info-service 1/1 1 1 102s + +NAME DESIRED CURRENT READY AGE +replicaset.apps/devops-app-devops-info-service-b9966ddc4 1 1 1 102s +``` + +**Evidence — `kubectl describe deployment`:** + +```text +Name: devops-app-devops-info-service +Namespace: devops-lab10 +CreationTimestamp: Thu, 02 Apr 2026 21:37:47 +0300 +Labels: app.kubernetes.io/component=web + app.kubernetes.io/instance=devops-app + app.kubernetes.io/managed-by=Helm + app.kubernetes.io/name=devops-info-service + app.kubernetes.io/part-of=devops-core-course + app.kubernetes.io/version=latest + helm.sh/chart=devops-info-service-0.1.0 +Annotations: deployment.kubernetes.io/revision: 1 + meta.helm.sh/release-name: devops-app + meta.helm.sh/release-namespace: devops-lab10 +Selector: app.kubernetes.io/instance=devops-app,app.kubernetes.io/name=devops-info-service +Replicas: 1 desired | 1 updated | 1 total | 1 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Pod Template: + Labels: app.kubernetes.io/component=web + app.kubernetes.io/instance=devops-app + app.kubernetes.io/managed-by=Helm + app.kubernetes.io/name=devops-info-service + app.kubernetes.io/part-of=devops-core-course + app.kubernetes.io/version=latest + helm.sh/chart=devops-info-service-0.1.0 + Containers: + devops-info-service: + Image: dorley174/devops-info-service:latest + Port: 5000/TCP (http) + Host Port: 0/TCP (http) + Limits: + cpu: 100m + memory: 128Mi + Requests: + cpu: 50m + memory: 64Mi + Liveness: http-get http://:http/health delay=5s timeout=2s period=10s #success=1 #failure=3 + Readiness: http-get http://:http/ready delay=3s timeout=2s period=5s #success=1 #failure=3 + Environment: + PORT: 5000 + DEBUG: False + APP_VARIANT: app1 + APP_MESSAGE: Lab 10 Helm development deployment + SERVICE_VERSION: lab10-dev + Mounts: + Volumes: + Node-Selectors: + Tolerations: +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True NewReplicaSetAvailable +OldReplicaSets: +NewReplicaSet: devops-app-devops-info-service-b9966ddc4 (1/1 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 101s deployment-controller Scaled up replica set devops-app-devops-info-service-b9966ddc4 from 0 to 1 +``` + +**Evidence — `kubectl get svc`:** + +```text +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +devops-app-devops-info-service NodePort 10.108.253.122 80:30081/TCP 103s +``` + +### 5.3 Upgrade from development to production +Commands used: + +```bash +helm upgrade devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + -f ./k8s/devops-info-service/values-prod.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait + +helm list -n devops-lab10 +kubectl get deploy,svc,pods -n devops-lab10 -o wide +kubectl describe deployment devops-app-devops-info-service -n devops-lab10 +kubectl get svc devops-app-devops-info-service -n devops-lab10 -o wide +``` + +**Evidence — upgrade status:** + +```text +Not executed yet. Fill this section with the real output of: +- helm upgrade ... +- helm list -n devops-lab10 +- kubectl get deploy,svc,pods -n devops-lab10 -o wide +- kubectl describe deployment devops-app-devops-info-service -n devops-lab10 +- kubectl get svc devops-app-devops-info-service -n devops-lab10 -o wide +``` + +### 5.4 Hook execution evidence +Commands used: + +```bash +kubectl get jobs -n devops-lab10 +kubectl describe job devops-app-devops-info-service-pre-install -n devops-lab10 || true +kubectl describe job devops-app-devops-info-service-post-install -n devops-lab10 || true +kubectl logs job/devops-app-devops-info-service-pre-install -n devops-lab10 || true +kubectl logs job/devops-app-devops-info-service-post-install -n devops-lab10 || true +``` + +If the delete policy already removed the Jobs, that is valid evidence too. In that case, show that `kubectl get jobs -n devops-lab10` no longer lists them. + +**Evidence — `kubectl get jobs`:** + +```text +No resources found in devops-lab10 namespace. +``` + +**Evidence — hook details or proof of deletion:** + +```text +Error from server (NotFound): jobs.batch "devops-app-devops-info-service-pre-install" not found +Error from server (NotFound): jobs.batch "devops-app-devops-info-service-post-install" not found +``` + +This is expected because both hooks use the delete policy `before-hook-creation,hook-succeeded`, so successful hook Jobs are automatically removed after completion. + +### 5.5 Application accessibility verification +Recommended local check via port-forward: + +```bash +kubectl port-forward -n devops-lab10 service/devops-app-devops-info-service 8080:80 +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8080/ready +curl http://127.0.0.1:8080/ | python3 -m json.tool +``` + +**Evidence — `/health`:** + +```text +{"status":"healthy","timestamp":"2026-04-02T18:39:48.600Z","uptime_seconds":107,"variant":"app1"} +``` + +**Evidence — `/ready`:** + +```text +{"message":"Lab 10 Helm development deployment","status":"ready","timestamp":"2026-04-02T18:39:54.967Z","uptime_seconds":114,"variant":"app1"} +``` + +**Evidence — root endpoint JSON:** + +```text +{ + "endpoints": [ + { + "description": "Service information", + "method": "GET", + "path": "/" + }, + { + "description": "Liveness health check", + "method": "GET", + "path": "/health" + }, + { + "description": "Readiness health check", + "method": "GET", + "path": "/ready" + }, + { + "description": "Prometheus metrics", + "method": "GET", + "path": "/metrics" + } + ], + "request": { + "client_ip": "127.0.0.1", + "method": "GET", + "path": "/", + "user_agent": "curl/8.5.0" + }, + "runtime": { + "current_time": "2026-04-02T18:39:44.233Z", + "timezone": "UTC", + "uptime_human": "0 hours, 1 minute", + "uptime_seconds": 102 + }, + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "message": "Lab 10 Helm development deployment", + "name": "devops-info-service", + "variant": "app1", + "version": "lab10-dev" + }, + "system": { + "architecture": "x86_64", + "cpu_count": 20, + "hostname": "devops-app-devops-info-service-b9966ddc4-49k97", + "platform": "Linux", + "platform_version": "Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.36", + "python_version": "3.13.1" + } +} +``` + +--- + +## 6. Operations + +### 6.1 Install + +```bash +helm install devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + --create-namespace \ + -f ./k8s/devops-info-service/values-dev.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait +``` + +### 6.2 Upgrade + +```bash +helm upgrade devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + -f ./k8s/devops-info-service/values-prod.yaml \ + --set service.nodePort=30081 \ + --set hooks.postInstall.sleepSeconds=20 \ + --wait +``` + +### 6.3 Rollback + +```bash +helm history devops-app -n devops-lab10 +helm rollback devops-app 1 -n devops-lab10 --wait +helm list -n devops-lab10 +``` + +**Evidence — `helm history`:** + +```text +TO_FILL_HELM_HISTORY +``` + +**Evidence — rollback result:** + +```text +TO_FILL_HELM_ROLLBACK +``` + +### 6.4 Uninstall + +```bash +helm uninstall devops-app -n devops-lab10 +kubectl get all -n devops-lab10 +``` + +**Evidence — uninstall result:** + +```text +TO_FILL_HELM_UNINSTALL +``` + +--- + +## 7. Testing & Validation + +### 7.1 Commands used for validation + +```bash +helm lint ./k8s/devops-info-service +helm template devops-app ./k8s/devops-info-service +helm install --dry-run --debug devops-app ./k8s/devops-info-service \ + -n devops-lab10 \ + --create-namespace \ + -f ./k8s/devops-info-service/values-dev.yaml +``` + +### 7.2 Validation conclusion +The chart is considered valid when: +- `helm lint` finishes successfully +- `helm template` renders Kubernetes manifests without template errors +- `helm install --dry-run --debug` renders the release successfully +- the real installation creates a Deployment, a Service, Pods, and the hook Jobs +- the application responds on `/health` and `/ready` + +**Short conclusion:** + +`The Helm chart is valid. helm lint completed successfully, helm template rendered Kubernetes manifests without template errors, helm install --dry-run --debug rendered the release correctly, and the real installation created the expected Kubernetes resources. The application also responded successfully on /health and /ready.` + +--- + +## 8. Bonus — Library Chart + +### 8.1 Why a library chart was added +After creating a second application chart, helper logic became repetitive. The library chart removes duplication in these areas: +- naming logic +- common labels +- selector labels +- reusable helper patterns + +### 8.2 Library chart details +The shared library chart is stored in: + +```text +k8s/common-lib/ +``` + +Its `Chart.yaml` uses: + +```yaml +type: library +``` + +### 8.3 How the application charts use the library +Both application charts declare `common-lib` as a dependency in `Chart.yaml` and call the shared helper templates from their local wrappers. + +### 8.4 Bonus deployment commands + +```bash +helm install devops-app2 ./k8s/devops-info-service-app2 \ + -n devops-lab10 \ + --create-namespace \ + --set service.nodePort=30082 \ + --wait + +helm list -n devops-lab10 +kubectl get deploy,svc,pods -n devops-lab10 -o wide +kubectl port-forward -n devops-lab10 service/devops-app2-devops-info-service-app2 8081:80 +curl http://127.0.0.1:8081/health +curl http://127.0.0.1:8081/ready +``` + +**Evidence — second chart install:** + +```text +didn't complete +``` + +**Evidence — both releases present:** + +```text +didn't complete +``` + +**Evidence — second app accessibility:** + +```text +didn't complete +``` + +### 8.5 Benefits of the library approach +1. **DRY** — shared helper logic is defined once. +2. **Consistency** — both application charts render names and labels the same way. +3. **Maintainability** — future helper changes can be done in one place. +4. **Scalability** — additional application charts can reuse the same helper library. + +--- diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000000..1fade15d84 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,442 @@ +# Lab 09 — Kubernetes Fundamentals + +This report documents the Kubernetes deployment of the Flask-based `devops-info-service` application. + +--- + +## 1. Architecture Overview + +### 1.1 Selected local Kubernetes tool +I selected **minikube** with the **Docker driver** on **Windows + VS Code + WSL + Docker Desktop**. + +**Why this option fits my environment:** +1. It runs completely locally and does not require a cloud provider. +2. It works well with my Windows + VS Code + WSL workflow. +3. Docker Desktop provides the container runtime, and minikube uses the Docker driver directly from WSL. +4. It is practical in my region because the lab can be completed locally after the required images are downloaded. + +### 1.2 Deployment architecture +The main application is deployed with a Kubernetes **Deployment** and exposed with a **NodePort Service**. + +**Main application path:** +- `Deployment/devops-info-service` +- `Service/devops-info-service` +- `3 replicas` by default +- `NodePort 30080` +- container port `5000` + +**Bonus manifests prepared in the repository:** +- `Deployment/devops-info-service-app2` +- `Service/devops-info-service-app2` +- `Ingress/devops-course-ingress` +- host `local.example.com` +- routes `/app1` and `/app2` + +### 1.3 Networking flow +#### Base task +1. A client sends a request to the local cluster. +2. The NodePort Service exposes the application on port `30080`. +3. The Service selects Pods by label `app.kubernetes.io/name=devops-info-service`. +4. Traffic is forwarded to container port `5000`. + +#### Local verification flow actually used +In my WSL + Docker Desktop setup, direct NodePort access through `minikube ip` was not reliable. I verified the application with: + +```bash +kubectl port-forward -n devops-lab09 service/devops-info-service 8080:80 +``` + +This mapped local port `8080` to Service port `80` and allowed stable validation with `curl`. + +### 1.4 Resource allocation strategy +Each container defines conservative lab-friendly resources: +- **Requests:** `100m CPU`, `128Mi memory` +- **Limits:** `250m CPU`, `256Mi memory` + +These values are appropriate for a lightweight Flask service on a local minikube cluster running through WSL and Docker Desktop. + +--- + +## 2. Manifest Files + +### 2.1 `k8s/namespace.yml` +Creates a dedicated namespace `devops-lab09` for logical isolation of all lab resources. + +### 2.2 `k8s/deployment.yml` +Creates the primary Deployment. + +**Key implementation choices:** +1. `replicas: 3` satisfies the requirement for at least three Pod replicas. +2. `RollingUpdate` uses `maxSurge: 1` and `maxUnavailable: 0` to avoid downtime during updates. +3. `readinessProbe` checks `/ready`. +4. `livenessProbe` checks `/health`. +5. Resource requests and limits are defined. +6. Runtime hardening is enabled with `runAsNonRoot`, dropped capabilities, disabled privilege escalation, and `RuntimeDefault` seccomp. + +### 2.3 `k8s/service.yml` +Creates a `NodePort` Service. + +**Key implementation choices:** +1. Service port `80` is user-friendly. +2. `targetPort: http` forwards traffic to container port `5000`. +3. `nodePort: 30080` is fixed explicitly to simplify local testing. + +### 2.4 `k8s/deployment-app2.yml` +Creates a second Deployment for the bonus part, using the same image with different environment values. + +### 2.5 `k8s/service-app2.yml` +Creates a second NodePort Service on `30081` for the bonus application. + +### 2.6 `k8s/ingress.yml` +Defines nginx Ingress with path-based routing and TLS. + +### 2.7 Helper scripts +- `k8s/deploy.sh` — deploys the namespace, main Deployment, and Service. +- `k8s/deploy-bonus.sh` — deploys the bonus resources. +- `k8s/collect-evidence.sh` — saves Kubernetes evidence into `k8s/evidence/`. + +### 2.8 Docker image security fix +The initial deployment failed because Kubernetes could not validate a named non-root user with `runAsNonRoot: true`. + +I fixed this by updating `app_python/Dockerfile` to use a **numeric UID/GID**: + +```dockerfile +RUN addgroup --system --gid 10001 app \ + && adduser --system --uid 10001 --ingroup app --no-create-home app + +USER 10001:10001 +``` + +--- + +## 3. Deployment Evidence + +### 3.1 Cluster setup evidence + +```text +$ kubectl cluster-info +Kubernetes control plane is running at https://127.0.0.1:60412 +CoreDNS is running at https://127.0.0.1:60412/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy + +To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. +``` + +```text +$ kubectl get nodes -o wide +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +minikube Ready control-plane 14m v1.35.1 192.168.49.2 Debian GNU/Linux 12 (bookworm) 5.15.153.1-microsoft-standard-WSL2 docker://29.2.1 +``` + +### 3.2 Deployment evidence + +```text +$ kubectl get all -n devops-lab09 -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod/devops-info-service-7b48589b6b-2cf77 1/1 Running 0 5m26s 10.244.0.6 minikube +pod/devops-info-service-7b48589b6b-52j4f 1/1 Running 0 5m9s 10.244.0.8 minikube +pod/devops-info-service-7b48589b6b-wrvvj 1/1 Running 0 5m19s 10.244.0.7 minikube + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +service/devops-info-service NodePort 10.100.203.165 80:30080/TCP 13m app.kubernetes.io/name=devops-info-service + +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR +deployment.apps/devops-info-service 3/3 3 3 13m devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service +``` + +```text +$ kubectl get pods,svc -n devops-lab09 -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod/devops-info-service-7b48589b6b-2cf77 1/1 Running 0 5m27s 10.244.0.6 minikube +pod/devops-info-service-7b48589b6b-52j4f 1/1 Running 0 5m10s 10.244.0.8 minikube +pod/devops-info-service-7b48589b6b-wrvvj 1/1 Running 0 5m20s 10.244.0.7 minikube + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +service/devops-info-service NodePort 10.100.203.165 80:30080/TCP 13m app.kubernetes.io/name=devops-info-service +``` + +```text +$ kubectl describe deployment devops-info-service -n devops-lab09 +Name: devops-info-service +Namespace: devops-lab09 +Replicas: 3 desired | 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Image: dorley174/devops-info-service:latest +Liveness: http-get http://:http/health delay=15s timeout=2s period=10s #success=1 #failure=3 +Readiness: http-get http://:http/ready delay=5s timeout=2s period=5s #success=1 #failure=3 +Environment: + PORT: 5000 + DEBUG: False + APP_VARIANT: app1 + APP_MESSAGE: Lab 09 primary deployment + SERVICE_VERSION: lab09-v1 +``` + +### 3.3 Application verification +I verified the running Service with `kubectl port-forward`. + +```bash +kubectl port-forward -n devops-lab09 service/devops-info-service 8080:80 +``` + +```text +$ curl http://127.0.0.1:8080/health +{"status":"healthy","timestamp":"2026-03-26T19:47:24.216Z","uptime_seconds":264,"variant":"app1"} +``` + +```text +$ curl http://127.0.0.1:8080/ready +{"message":"Lab 09 primary deployment","status":"ready","timestamp":"2026-03-26T19:47:24.232Z","uptime_seconds":264,"variant":"app1"} +``` + +```json +$ curl http://127.0.0.1:8080/ | python3 -m json.tool +{ + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "message": "Lab 09 primary deployment", + "name": "devops-info-service", + "variant": "app1", + "version": "lab09-v1" + } +} +``` + +### 3.4 Evidence collection helper + +```text +$ ./k8s/collect-evidence.sh +Evidence saved to k8s/evidence +``` + +The raw evidence files are included in `k8s/evidence/`. + +--- + +## 4. Operations Performed + +### 4.1 Deploy the application +I deployed the namespace, Deployment, and Service declaratively with `kubectl apply` and confirmed that the Deployment reached `3/3` available replicas. + +### 4.2 Scale the Deployment to 5 replicas + +```bash +kubectl scale deployment/devops-info-service -n devops-lab09 --replicas=5 +kubectl rollout status deployment/devops-info-service -n devops-lab09 +kubectl get pods -n devops-lab09 -o wide +``` + +**Result:** the Deployment was successfully scaled from 3 to 5 replicas. + +```text +deployment.apps/devops-info-service scaled +Waiting for deployment "devops-info-service" rollout to finish: 3 of 5 updated replicas are available... +Waiting for deployment "devops-info-service" rollout to finish: 4 of 5 updated replicas are available... +deployment "devops-info-service" successfully rolled out +``` + +```text +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +devops-info-service-7b48589b6b-2b97b 1/1 Running 0 8m32s 10.244.0.14 minikube +devops-info-service-7b48589b6b-7fttq 1/1 Running 0 8m25s 10.244.0.15 minikube +devops-info-service-7b48589b6b-jjtsm 1/1 Running 0 8s 10.244.0.17 minikube +devops-info-service-7b48589b6b-wmf6g 1/1 Running 0 7s 10.244.0.18 minikube +devops-info-service-7b48589b6b-zhkpz 1/1 Running 0 8m18s 10.244.0.16 minikube +``` + +### 4.3 Demonstrate a rolling update +Instead of editing the YAML by hand during the live test, I used `kubectl set env` to trigger a Deployment rollout by changing the Pod template environment variables. + +```bash +kubectl set env deployment/devops-info-service -n devops-lab09 \ + APP_MESSAGE="Lab 09 rolling update" \ + SERVICE_VERSION="lab09-v2" +kubectl rollout status deployment/devops-info-service -n devops-lab09 +kubectl rollout history deployment/devops-info-service -n devops-lab09 +``` + +**Result:** rollout completed successfully and the Deployment spec reflected the updated values. + +```text +deployment.apps/devops-info-service env updated +Waiting for deployment "devops-info-service" rollout to finish: 1 out of 5 new replicas have been updated... +... +deployment "devops-info-service" successfully rolled out +``` + +```text +$ kubectl rollout history deployment/devops-info-service -n devops-lab09 +REVISION CHANGE-CAUSE +1 +4 +5 +``` + +```text +$ kubectl get deployment devops-info-service -n devops-lab09 -o yaml | grep -A1 -E 'APP_MESSAGE|SERVICE_VERSION' +- name: APP_MESSAGE + value: Lab 09 rolling update +- name: SERVICE_VERSION + value: lab09-v2 +``` + +### 4.4 Demonstrate rollback + +```bash +kubectl rollout undo deployment/devops-info-service -n devops-lab09 +kubectl rollout status deployment/devops-info-service -n devops-lab09 +kubectl rollout history deployment/devops-info-service -n devops-lab09 +``` + +**Result:** rollback completed successfully and the Deployment returned to the original values. + +```text +deployment.apps/devops-info-service rolled back +Waiting for deployment "devops-info-service" rollout to finish: 1 out of 5 new replicas have been updated... +... +deployment "devops-info-service" successfully rolled out +``` + +```text +$ kubectl rollout history deployment/devops-info-service -n devops-lab09 +REVISION CHANGE-CAUSE +1 +5 +6 +``` + +```text +$ kubectl get deployment devops-info-service -n devops-lab09 -o yaml | grep -A1 -E 'APP_MESSAGE|SERVICE_VERSION' +- name: APP_MESSAGE + value: Lab 09 primary deployment +- name: SERVICE_VERSION + value: lab09-v1 +``` + +Rollback response check: + +```json +$ curl http://127.0.0.1:8082/ | python3 -m json.tool +{ + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "message": "Lab 09 primary deployment", + "name": "devops-info-service", + "variant": "app1", + "version": "lab09-v1" + } +} +``` + +### 4.5 Bonus status +The repository includes bonus manifests for the second application and Ingress, but I did not complete final runtime verification for the bonus part during this execution session. + +--- + +## 5. Production Considerations + +### 5.1 Health checks +Two probes are implemented: +1. **Liveness probe** on `/health` checks whether the process is alive. +2. **Readiness probe** on `/ready` checks whether the Pod is ready to receive traffic. + +### 5.2 Resource limits rationale +The application is lightweight, so the selected requests and limits are sufficient for local development while still demonstrating correct Kubernetes resource configuration. + +### 5.3 Security choices +1. Non-root execution is enforced. +2. Privilege escalation is disabled. +3. All Linux capabilities are dropped. +4. `RuntimeDefault` seccomp is enabled. +5. The Docker image now uses a numeric UID/GID to satisfy Kubernetes non-root validation. + +### 5.4 Suggested production improvements +For a real production deployment, I would additionally introduce: +1. `ConfigMap` and `Secret` resources. +2. `HorizontalPodAutoscaler`. +3. `NetworkPolicy` rules. +4. `PodDisruptionBudget`. +5. immutable image tags instead of `latest`. +6. CI/CD-driven promotion between environments. +7. centralized monitoring, logging, and alerting. + +--- + +## 6. Challenges & Solutions + +### 6.1 Vagrant was more complex than necessary +I initially tried the lab with Vagrant, but the workflow was slower and more fragile than needed for this local setup. I switched to **WSL + Docker Desktop + minikube**, which was simpler and more reliable. + +### 6.2 Docker Desktop / WSL networking behavior +Direct access through `minikube ip` and the NodePort was not reliable in my environment. The stable solution was to verify the application with `kubectl port-forward`. + +### 6.3 `CreateContainerConfigError` +The first deployment failed with: + +```text +Error: container has runAsNonRoot and image has non-numeric user (app), cannot verify user is non-root +``` + +I fixed this by updating the Docker image to use a numeric UID/GID (`10001:10001`) and rebuilding the image inside minikube's Docker environment. + +### 6.4 Rolling update evidence capture +During update verification, one local `port-forward` session was interrupted. To keep the evidence trustworthy, I documented the successful rollout with: +- rollout status +- rollout history +- Deployment environment values after update +- rollback verification with a successful application response + +--- + +## 7. Recommended Local Execution Order + +### 7.1 Prerequisites +- Windows host +- Docker Desktop +- WSL +- `kubectl` installed in WSL +- `minikube` installed in WSL + +### 7.2 Cluster startup +```bash +minikube start --driver=docker +kubectl cluster-info +kubectl get nodes -o wide +``` + +### 7.3 Build and deploy +```bash +eval $(minikube -p minikube docker-env) +docker build -t dorley174/devops-info-service:latest ./app_python +./k8s/deploy.sh +``` + +### 7.4 Verify the application +```bash +kubectl port-forward -n devops-lab09 service/devops-info-service 8080:80 +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8080/ready +curl http://127.0.0.1:8080/ | python3 -m json.tool +``` + +### 7.5 Scale, update, rollback +Use the commands from Section 4. + +--- + +## 8. Conclusion + +The lab requirements for the base task were completed: +- local Kubernetes cluster started successfully +- application deployed with 3 replicas +- Service exposed through NodePort +- readiness and liveness probes implemented +- resource requests and limits configured +- Deployment scaled to 5 replicas +- rolling update demonstrated +- rollback demonstrated +- evidence collected into `k8s/evidence/` + +The repository also contains prepared bonus manifests for a second app and Ingress. diff --git a/k8s/collect-evidence.sh b/k8s/collect-evidence.sh new file mode 100644 index 0000000000..22d70da958 --- /dev/null +++ b/k8s/collect-evidence.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +OUT_DIR="k8s/evidence" +NS="devops-lab09" +APP="devops-info-service" + +mkdir -p "$OUT_DIR" + +kubectl cluster-info > "$OUT_DIR/01-cluster-info.txt" +kubectl get nodes -o wide > "$OUT_DIR/02-get-nodes.txt" +kubectl get all -n "$NS" -o wide > "$OUT_DIR/03-get-all.txt" +kubectl get pods,svc -n "$NS" -o wide > "$OUT_DIR/04-get-pods-svc.txt" +kubectl describe deployment "$APP" -n "$NS" > "$OUT_DIR/05-describe-deployment.txt" +kubectl rollout history deployment/"$APP" -n "$NS" > "$OUT_DIR/06-rollout-history.txt" +kubectl get ingress -n "$NS" -o wide > "$OUT_DIR/07-get-ingress.txt" 2>/dev/null || true +kubectl get events -n "$NS" --sort-by=.metadata.creationTimestamp > "$OUT_DIR/08-events.txt" + +echo "Evidence saved to $OUT_DIR" diff --git a/k8s/common-lib/Chart.yaml b/k8s/common-lib/Chart.yaml new file mode 100644 index 0000000000..b7ef5e10b6 --- /dev/null +++ b/k8s/common-lib/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: common-lib +description: Shared Helm helper templates for DevOps course applications +type: library +version: 0.1.0 diff --git a/k8s/common-lib/templates/_helpers.tpl b/k8s/common-lib/templates/_helpers.tpl new file mode 100644 index 0000000000..00bb324574 --- /dev/null +++ b/k8s/common-lib/templates/_helpers.tpl @@ -0,0 +1,56 @@ +{{/* +Expand the chart name. +*/}} +{{- define "common-lib.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully-qualified resource name. +*/}} +{{- define "common-lib.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "common-lib.name" . -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart label value. +*/}} +{{- define "common-lib.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" -}} +{{- end -}} + +{{/* +Selector labels shared across resources. +*/}} +{{- define "common-lib.selectorLabels" -}} +app.kubernetes.io/name: {{ include "common-lib.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Common labels shared across resources. +*/}} +{{- define "common-lib.labels" -}} +helm.sh/chart: {{ include "common-lib.chart" . }} +{{ include "common-lib.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ with .Values.component }} +app.kubernetes.io/component: {{ . | quote }} +{{ end }} +{{ with .Values.partOf }} +app.kubernetes.io/part-of: {{ . | quote }} +{{ end }} +{{ with .Values.commonLabels }} +{{ toYaml . }} +{{ end }} +{{- end }} diff --git a/k8s/curl_result.txt b/k8s/curl_result.txt new file mode 100644 index 0000000000..8725f74d33 --- /dev/null +++ b/k8s/curl_result.txt @@ -0,0 +1,55 @@ + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 917 100 917 0 0 20928 0 --:--:-- --:--:-- --:--:-- 21325 +{ + "endpoints": [ + { + "description": "Service information", + "method": "GET", + "path": "/" + }, + { + "description": "Liveness health check", + "method": "GET", + "path": "/health" + }, + { + "description": "Readiness health check", + "method": "GET", + "path": "/ready" + }, + { + "description": "Prometheus metrics", + "method": "GET", + "path": "/metrics" + } + ], + "request": { + "client_ip": "127.0.0.1", + "method": "GET", + "path": "/", + "user_agent": "curl/8.5.0" + }, + "runtime": { + "current_time": "2026-03-26T19:57:23.089Z", + "timezone": "UTC", + "uptime_human": "0 hours, 1 minute", + "uptime_seconds": 61 + }, + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "message": "Lab 09 primary deployment", + "name": "devops-info-service", + "variant": "app1", + "version": "lab09-v1" + }, + "system": { + "architecture": "x86_64", + "cpu_count": 20, + "hostname": "devops-info-service-7b48589b6b-2b97b", + "platform": "Linux", + "platform_version": "Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.36", + "python_version": "3.13.1" + } +} \ No newline at end of file diff --git a/k8s/deploy-bonus.sh b/k8s/deploy-bonus.sh new file mode 100644 index 0000000000..f099f39332 --- /dev/null +++ b/k8s/deploy-bonus.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +kubectl apply -f k8s/deployment-app2.yml +kubectl apply -f k8s/service-app2.yml +kubectl apply -f k8s/ingress.yml +kubectl rollout status deployment/devops-info-service-app2 -n devops-lab09 +kubectl get ingress,pods,svc -n devops-lab09 -o wide diff --git a/k8s/deploy.sh b/k8s/deploy.sh new file mode 100644 index 0000000000..baef96f8f4 --- /dev/null +++ b/k8s/deploy.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +kubectl apply -f k8s/namespace.yml +kubectl apply -f k8s/deployment.yml +kubectl apply -f k8s/service.yml +kubectl rollout status deployment/devops-info-service -n devops-lab09 +kubectl get pods,svc -n devops-lab09 -o wide diff --git a/k8s/deployment-app2.yml b/k8s/deployment-app2.yml new file mode 100644 index 0000000000..4a0b472fde --- /dev/null +++ b/k8s/deployment-app2.yml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service-app2 + namespace: devops-lab09 + labels: + app.kubernetes.io/name: devops-info-service-app2 + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course +spec: + replicas: 2 + revisionHistoryLimit: 10 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app.kubernetes.io/name: devops-info-service-app2 + template: + metadata: + labels: + app.kubernetes.io/name: devops-info-service-app2 + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: devops-info-service-app2 + image: dorley174/devops-info-service:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + - name: APP_VARIANT + value: "app2" + - name: APP_MESSAGE + value: "Lab 09 bonus deployment" + - name: SERVICE_VERSION + value: "lab09-bonus" + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "250m" + memory: "256Mi" + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true diff --git a/k8s/deployment.yml b/k8s/deployment.yml new file mode 100644 index 0000000000..283a808676 --- /dev/null +++ b/k8s/deployment.yml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service + namespace: devops-lab09 + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course +spec: + replicas: 3 + revisionHistoryLimit: 10 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app.kubernetes.io/name: devops-info-service + template: + metadata: + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web + app.kubernetes.io/part-of: devops-core-course + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: devops-info-service + image: dorley174/devops-info-service:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + protocol: TCP + env: + - name: PORT + value: "5000" + - name: DEBUG + value: "False" + - name: APP_VARIANT + value: "app1" + - name: APP_MESSAGE + value: "Lab 09 primary deployment" + - name: SERVICE_VERSION + value: "lab09-v1" + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "250m" + memory: "256Mi" + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true diff --git a/k8s/devops-info-service-app2/Chart.yaml b/k8s/devops-info-service-app2/Chart.yaml new file mode 100644 index 0000000000..b44318d903 --- /dev/null +++ b/k8s/devops-info-service-app2/Chart.yaml @@ -0,0 +1,16 @@ +apiVersion: v2 +name: devops-info-service-app2 +description: Helm chart for the bonus DevOps info service deployment +type: application +version: 0.1.0 +appVersion: "latest" +keywords: + - python + - flask + - kubernetes + - helm + - bonus +dependencies: + - name: common-lib + version: 0.1.0 + repository: "file://../common-lib" diff --git a/k8s/devops-info-service-app2/charts/common-lib/Chart.yaml b/k8s/devops-info-service-app2/charts/common-lib/Chart.yaml new file mode 100644 index 0000000000..b7ef5e10b6 --- /dev/null +++ b/k8s/devops-info-service-app2/charts/common-lib/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: common-lib +description: Shared Helm helper templates for DevOps course applications +type: library +version: 0.1.0 diff --git a/k8s/devops-info-service-app2/charts/common-lib/templates/_helpers.tpl b/k8s/devops-info-service-app2/charts/common-lib/templates/_helpers.tpl new file mode 100644 index 0000000000..00bb324574 --- /dev/null +++ b/k8s/devops-info-service-app2/charts/common-lib/templates/_helpers.tpl @@ -0,0 +1,56 @@ +{{/* +Expand the chart name. +*/}} +{{- define "common-lib.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully-qualified resource name. +*/}} +{{- define "common-lib.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "common-lib.name" . -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart label value. +*/}} +{{- define "common-lib.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" -}} +{{- end -}} + +{{/* +Selector labels shared across resources. +*/}} +{{- define "common-lib.selectorLabels" -}} +app.kubernetes.io/name: {{ include "common-lib.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Common labels shared across resources. +*/}} +{{- define "common-lib.labels" -}} +helm.sh/chart: {{ include "common-lib.chart" . }} +{{ include "common-lib.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ with .Values.component }} +app.kubernetes.io/component: {{ . | quote }} +{{ end }} +{{ with .Values.partOf }} +app.kubernetes.io/part-of: {{ . | quote }} +{{ end }} +{{ with .Values.commonLabels }} +{{ toYaml . }} +{{ end }} +{{- end }} diff --git a/k8s/devops-info-service-app2/templates/NOTES.txt b/k8s/devops-info-service-app2/templates/NOTES.txt new file mode 100644 index 0000000000..513703355b --- /dev/null +++ b/k8s/devops-info-service-app2/templates/NOTES.txt @@ -0,0 +1,10 @@ +1. Get the release status: + helm status {{ .Release.Name }} -n {{ .Release.Namespace }} + +2. Inspect the rendered Service: + kubectl get svc {{ include "devops-info-service-app2.fullname" . }} -n {{ .Release.Namespace }} + +3. For local verification through port-forward: + kubectl port-forward -n {{ .Release.Namespace }} service/{{ include "devops-info-service-app2.fullname" . }} 8081:{{ .Values.service.port }} + curl http://127.0.0.1:8081/health + curl http://127.0.0.1:8081/ready diff --git a/k8s/devops-info-service-app2/templates/_helpers.tpl b/k8s/devops-info-service-app2/templates/_helpers.tpl new file mode 100644 index 0000000000..3a2d3c36ad --- /dev/null +++ b/k8s/devops-info-service-app2/templates/_helpers.tpl @@ -0,0 +1,26 @@ +{{/* +Compatibility wrappers around the shared common-lib helper templates. +*/}} +{{- define "devops-info-service-app2.name" -}} +{{ include "common-lib.name" . }} +{{- end -}} + +{{- define "devops-info-service-app2.fullname" -}} +{{ include "common-lib.fullname" . }} +{{- end -}} + +{{- define "devops-info-service-app2.chart" -}} +{{ include "common-lib.chart" . }} +{{- end -}} + +{{- define "devops-info-service-app2.labels" -}} +{{ include "common-lib.labels" . }} +{{- end -}} + +{{- define "devops-info-service-app2.selectorLabels" -}} +{{ include "common-lib.selectorLabels" . }} +{{- end -}} + +{{- define "devops-info-service-app2.hookImage" -}} +{{- printf "%s:%s" .Values.hooks.image.repository .Values.hooks.image.tag -}} +{{- end -}} diff --git a/k8s/devops-info-service-app2/templates/deployment.yaml b/k8s/devops-info-service-app2/templates/deployment.yaml new file mode 100644 index 0000000000..bbd23b0644 --- /dev/null +++ b/k8s/devops-info-service-app2/templates/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info-service-app2.fullname" . }} + labels: + {{- include "devops-info-service-app2.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: {{ .Values.strategy.maxSurge }} + maxUnavailable: {{ .Values.strategy.maxUnavailable }} + selector: + matchLabels: + {{- include "devops-info-service-app2.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info-service-app2.labels" . | nindent 8 }} + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: {{ .Chart.Name }} + image: "{{ required "image.repository is required" .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.containerPort }} + protocol: TCP + env: + - name: PORT + value: {{ .Values.env.port | quote }} + - name: DEBUG + value: {{ .Values.env.debug | quote }} + - name: APP_VARIANT + value: {{ .Values.env.appVariant | quote }} + - name: APP_MESSAGE + value: {{ .Values.env.appMessage | quote }} + - name: SERVICE_VERSION + value: {{ .Values.env.serviceVersion | quote }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: {{ .Values.readinessProbe.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: {{ .Values.livenessProbe.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true diff --git a/k8s/devops-info-service-app2/templates/hooks/post-install-job.yaml b/k8s/devops-info-service-app2/templates/hooks/post-install-job.yaml new file mode 100644 index 0000000000..23a394fd84 --- /dev/null +++ b/k8s/devops-info-service-app2/templates/hooks/post-install-job.yaml @@ -0,0 +1,33 @@ +{{- if .Values.hooks.postInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "devops-info-service-app2.fullname" . }}-post-install + labels: + {{- include "devops-info-service-app2.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "{{ .Values.hooks.postInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + {{- include "devops-info-service-app2.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: post-install-smoke-test + image: {{ include "devops-info-service-app2.hookImage" . }} + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - sh + - -c + - >- + echo "[post-install] running smoke test against {{ include "devops-info-service-app2.fullname" . }}" && + sleep {{ .Values.hooks.postInstall.sleepSeconds }} && + wget -qO- http://{{ include "devops-info-service-app2.fullname" . }}:{{ .Values.service.port }}/ready && + echo && + echo "[post-install] smoke test passed" +{{- end }} diff --git a/k8s/devops-info-service-app2/templates/hooks/pre-install-job.yaml b/k8s/devops-info-service-app2/templates/hooks/pre-install-job.yaml new file mode 100644 index 0000000000..adac174cce --- /dev/null +++ b/k8s/devops-info-service-app2/templates/hooks/pre-install-job.yaml @@ -0,0 +1,33 @@ +{{- if .Values.hooks.preInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "devops-info-service-app2.fullname" . }}-pre-install + labels: + {{- include "devops-info-service-app2.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "{{ .Values.hooks.preInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + {{- include "devops-info-service-app2.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: pre-install-validation + image: {{ include "devops-info-service-app2.hookImage" . }} + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - sh + - -c + - >- + echo "[pre-install] validating chart values for {{ include "devops-info-service-app2.fullname" . }}" && + test -n "{{ .Values.image.repository }}" && + echo "[pre-install] image repository is set" && + sleep {{ .Values.hooks.preInstall.sleepSeconds }} && + echo "[pre-install] validation completed" +{{- end }} diff --git a/k8s/devops-info-service-app2/templates/service.yaml b/k8s/devops-info-service-app2/templates/service.yaml new file mode 100644 index 0000000000..d6031dac77 --- /dev/null +++ b/k8s/devops-info-service-app2/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info-service-app2.fullname" . }} + labels: + {{- include "devops-info-service-app2.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "devops-info-service-app2.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} +{{ if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} +{{ end }} diff --git a/k8s/devops-info-service-app2/values.yaml b/k8s/devops-info-service-app2/values.yaml new file mode 100644 index 0000000000..b3523bfd9f --- /dev/null +++ b/k8s/devops-info-service-app2/values.yaml @@ -0,0 +1,70 @@ +nameOverride: "" +fullnameOverride: "" +component: web +partOf: devops-core-course +commonLabels: {} + +replicaCount: 2 +revisionHistoryLimit: 10 + +image: + repository: dorley174/devops-info-service + tag: "latest" + pullPolicy: IfNotPresent + +containerPort: 5000 + +strategy: + maxSurge: 1 + maxUnavailable: 0 + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30081 + +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi + +env: + port: "5000" + debug: "False" + appVariant: app2 + appMessage: "Lab 10 Helm bonus deployment" + serviceVersion: "lab10-bonus" + +readinessProbe: + path: /ready + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + +livenessProbe: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +hooks: + image: + repository: busybox + tag: "1.36" + pullPolicy: IfNotPresent + preInstall: + enabled: true + weight: -5 + sleepSeconds: 3 + postInstall: + enabled: true + weight: 5 + sleepSeconds: 5 diff --git a/k8s/devops-info-service/Chart.yaml b/k8s/devops-info-service/Chart.yaml new file mode 100644 index 0000000000..20b1962009 --- /dev/null +++ b/k8s/devops-info-service/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: devops-info-service +description: Helm chart for the primary DevOps info service deployment +type: application +version: 0.1.0 +appVersion: "latest" +keywords: + - python + - flask + - kubernetes + - helm +dependencies: + - name: common-lib + version: 0.1.0 + repository: "file://../common-lib" diff --git a/k8s/devops-info-service/charts/common-lib/Chart.yaml b/k8s/devops-info-service/charts/common-lib/Chart.yaml new file mode 100644 index 0000000000..b7ef5e10b6 --- /dev/null +++ b/k8s/devops-info-service/charts/common-lib/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: common-lib +description: Shared Helm helper templates for DevOps course applications +type: library +version: 0.1.0 diff --git a/k8s/devops-info-service/charts/common-lib/templates/_helpers.tpl b/k8s/devops-info-service/charts/common-lib/templates/_helpers.tpl new file mode 100644 index 0000000000..00bb324574 --- /dev/null +++ b/k8s/devops-info-service/charts/common-lib/templates/_helpers.tpl @@ -0,0 +1,56 @@ +{{/* +Expand the chart name. +*/}} +{{- define "common-lib.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully-qualified resource name. +*/}} +{{- define "common-lib.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := include "common-lib.name" . -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart label value. +*/}} +{{- define "common-lib.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" -}} +{{- end -}} + +{{/* +Selector labels shared across resources. +*/}} +{{- define "common-lib.selectorLabels" -}} +app.kubernetes.io/name: {{ include "common-lib.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Common labels shared across resources. +*/}} +{{- define "common-lib.labels" -}} +helm.sh/chart: {{ include "common-lib.chart" . }} +{{ include "common-lib.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ with .Values.component }} +app.kubernetes.io/component: {{ . | quote }} +{{ end }} +{{ with .Values.partOf }} +app.kubernetes.io/part-of: {{ . | quote }} +{{ end }} +{{ with .Values.commonLabels }} +{{ toYaml . }} +{{ end }} +{{- end }} diff --git a/k8s/devops-info-service/templates/NOTES.txt b/k8s/devops-info-service/templates/NOTES.txt new file mode 100644 index 0000000000..dade968511 --- /dev/null +++ b/k8s/devops-info-service/templates/NOTES.txt @@ -0,0 +1,10 @@ +1. Get the release status: + helm status {{ .Release.Name }} -n {{ .Release.Namespace }} + +2. Inspect the rendered Service: + kubectl get svc {{ include "devops-info-service.fullname" . }} -n {{ .Release.Namespace }} + +3. For local verification through port-forward: + kubectl port-forward -n {{ .Release.Namespace }} service/{{ include "devops-info-service.fullname" . }} 8080:{{ .Values.service.port }} + curl http://127.0.0.1:8080/health + curl http://127.0.0.1:8080/ready diff --git a/k8s/devops-info-service/templates/_helpers.tpl b/k8s/devops-info-service/templates/_helpers.tpl new file mode 100644 index 0000000000..b9380d7edb --- /dev/null +++ b/k8s/devops-info-service/templates/_helpers.tpl @@ -0,0 +1,26 @@ +{{/* +Compatibility wrappers around the shared common-lib helper templates. +*/}} +{{- define "devops-info-service.name" -}} +{{ include "common-lib.name" . }} +{{- end -}} + +{{- define "devops-info-service.fullname" -}} +{{ include "common-lib.fullname" . }} +{{- end -}} + +{{- define "devops-info-service.chart" -}} +{{ include "common-lib.chart" . }} +{{- end -}} + +{{- define "devops-info-service.labels" -}} +{{ include "common-lib.labels" . }} +{{- end -}} + +{{- define "devops-info-service.selectorLabels" -}} +{{ include "common-lib.selectorLabels" . }} +{{- end -}} + +{{- define "devops-info-service.hookImage" -}} +{{- printf "%s:%s" .Values.hooks.image.repository .Values.hooks.image.tag -}} +{{- end -}} diff --git a/k8s/devops-info-service/templates/deployment.yaml b/k8s/devops-info-service/templates/deployment.yaml new file mode 100644 index 0000000000..d711e62ccb --- /dev/null +++ b/k8s/devops-info-service/templates/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: {{ .Values.strategy.maxSurge }} + maxUnavailable: {{ .Values.strategy.maxUnavailable }} + selector: + matchLabels: + {{- include "devops-info-service.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info-service.labels" . | nindent 8 }} + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: {{ .Chart.Name }} + image: "{{ required "image.repository is required" .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.containerPort }} + protocol: TCP + env: + - name: PORT + value: {{ .Values.env.port | quote }} + - name: DEBUG + value: {{ .Values.env.debug | quote }} + - name: APP_VARIANT + value: {{ .Values.env.appVariant | quote }} + - name: APP_MESSAGE + value: {{ .Values.env.appMessage | quote }} + - name: SERVICE_VERSION + value: {{ .Values.env.serviceVersion | quote }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: {{ .Values.readinessProbe.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: {{ .Values.livenessProbe.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true diff --git a/k8s/devops-info-service/templates/hooks/post-install-job.yaml b/k8s/devops-info-service/templates/hooks/post-install-job.yaml new file mode 100644 index 0000000000..88e5d358ad --- /dev/null +++ b/k8s/devops-info-service/templates/hooks/post-install-job.yaml @@ -0,0 +1,33 @@ +{{- if .Values.hooks.postInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "devops-info-service.fullname" . }}-post-install + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "{{ .Values.hooks.postInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + {{- include "devops-info-service.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: post-install-smoke-test + image: {{ include "devops-info-service.hookImage" . }} + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - sh + - -c + - >- + echo "[post-install] running smoke test against {{ include "devops-info-service.fullname" . }}" && + sleep {{ .Values.hooks.postInstall.sleepSeconds }} && + wget -qO- http://{{ include "devops-info-service.fullname" . }}:{{ .Values.service.port }}/ready && + echo && + echo "[post-install] smoke test passed" +{{- end }} diff --git a/k8s/devops-info-service/templates/hooks/pre-install-job.yaml b/k8s/devops-info-service/templates/hooks/pre-install-job.yaml new file mode 100644 index 0000000000..a4eb9b7d2b --- /dev/null +++ b/k8s/devops-info-service/templates/hooks/pre-install-job.yaml @@ -0,0 +1,33 @@ +{{- if .Values.hooks.preInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "devops-info-service.fullname" . }}-pre-install + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "{{ .Values.hooks.preInstall.weight }}" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 0 + template: + metadata: + labels: + {{- include "devops-info-service.labels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: pre-install-validation + image: {{ include "devops-info-service.hookImage" . }} + imagePullPolicy: {{ .Values.hooks.image.pullPolicy }} + command: + - sh + - -c + - >- + echo "[pre-install] validating chart values for {{ include "devops-info-service.fullname" . }}" && + test -n "{{ .Values.image.repository }}" && + echo "[pre-install] image repository is set" && + sleep {{ .Values.hooks.preInstall.sleepSeconds }} && + echo "[pre-install] validation completed" +{{- end }} diff --git a/k8s/devops-info-service/templates/service.yaml b/k8s/devops-info-service/templates/service.yaml new file mode 100644 index 0000000000..14b7ce49f0 --- /dev/null +++ b/k8s/devops-info-service/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info-service.fullname" . }} + labels: + {{- include "devops-info-service.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "devops-info-service.selectorLabels" . | nindent 4 }} + ports: + - name: http + protocol: TCP + port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} +{{ if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} +{{ end }} diff --git a/k8s/devops-info-service/values-dev.yaml b/k8s/devops-info-service/values-dev.yaml new file mode 100644 index 0000000000..0bd7f4d242 --- /dev/null +++ b/k8s/devops-info-service/values-dev.yaml @@ -0,0 +1,27 @@ +replicaCount: 1 + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + +resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi + +env: + appMessage: "Lab 10 Helm development deployment" + serviceVersion: "lab10-dev" + +readinessProbe: + initialDelaySeconds: 3 + periodSeconds: 5 + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 diff --git a/k8s/devops-info-service/values-prod.yaml b/k8s/devops-info-service/values-prod.yaml new file mode 100644 index 0000000000..89dd180c0b --- /dev/null +++ b/k8s/devops-info-service/values-prod.yaml @@ -0,0 +1,27 @@ +replicaCount: 3 + +service: + type: LoadBalancer + port: 80 + targetPort: 5000 + nodePort: null + +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + +env: + appMessage: "Lab 10 Helm production deployment" + serviceVersion: "lab10-prod" + +readinessProbe: + initialDelaySeconds: 10 + periodSeconds: 3 + +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 5 diff --git a/k8s/devops-info-service/values.yaml b/k8s/devops-info-service/values.yaml new file mode 100644 index 0000000000..911fd72ab0 --- /dev/null +++ b/k8s/devops-info-service/values.yaml @@ -0,0 +1,70 @@ +nameOverride: "" +fullnameOverride: "" +component: web +partOf: devops-core-course +commonLabels: {} + +replicaCount: 3 +revisionHistoryLimit: 10 + +image: + repository: dorley174/devops-info-service + tag: "latest" + pullPolicy: IfNotPresent + +containerPort: 5000 + +strategy: + maxSurge: 1 + maxUnavailable: 0 + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 250m + memory: 256Mi + +env: + port: "5000" + debug: "False" + appVariant: app1 + appMessage: "Lab 10 Helm deployment" + serviceVersion: "lab10-v1" + +readinessProbe: + path: /ready + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + +livenessProbe: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +hooks: + image: + repository: busybox + tag: "1.36" + pullPolicy: IfNotPresent + preInstall: + enabled: true + weight: -5 + sleepSeconds: 3 + postInstall: + enabled: true + weight: 5 + sleepSeconds: 5 diff --git a/k8s/evidence/01-cluster-info.txt b/k8s/evidence/01-cluster-info.txt new file mode 100644 index 0000000000..378eadb21b --- /dev/null +++ b/k8s/evidence/01-cluster-info.txt @@ -0,0 +1,4 @@ +Kubernetes control plane is running at https://127.0.0.1:60412 +CoreDNS is running at https://127.0.0.1:60412/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy + +To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. diff --git a/k8s/evidence/02-get-nodes.txt b/k8s/evidence/02-get-nodes.txt new file mode 100644 index 0000000000..d9a2b26a89 --- /dev/null +++ b/k8s/evidence/02-get-nodes.txt @@ -0,0 +1,2 @@ +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +minikube Ready control-plane 14m v1.35.1 192.168.49.2 Debian GNU/Linux 12 (bookworm) 5.15.153.1-microsoft-standard-WSL2 docker://29.2.1 diff --git a/k8s/evidence/03-get-all.txt b/k8s/evidence/03-get-all.txt new file mode 100644 index 0000000000..ae2d1d6915 --- /dev/null +++ b/k8s/evidence/03-get-all.txt @@ -0,0 +1,14 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod/devops-info-service-7b48589b6b-2cf77 1/1 Running 0 5m26s 10.244.0.6 minikube +pod/devops-info-service-7b48589b6b-52j4f 1/1 Running 0 5m9s 10.244.0.8 minikube +pod/devops-info-service-7b48589b6b-wrvvj 1/1 Running 0 5m19s 10.244.0.7 minikube + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +service/devops-info-service NodePort 10.100.203.165 80:30080/TCP 13m app.kubernetes.io/name=devops-info-service + +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR +deployment.apps/devops-info-service 3/3 3 3 13m devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service + +NAME DESIRED CURRENT READY AGE CONTAINERS IMAGES SELECTOR +replicaset.apps/devops-info-service-7b48589b6b 3 3 3 5m26s devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service,pod-template-hash=7b48589b6b +replicaset.apps/devops-info-service-8689cb4bbc 0 0 0 13m devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service,pod-template-hash=8689cb4bbc diff --git a/k8s/evidence/04-get-pods-svc.txt b/k8s/evidence/04-get-pods-svc.txt new file mode 100644 index 0000000000..037a7c981e --- /dev/null +++ b/k8s/evidence/04-get-pods-svc.txt @@ -0,0 +1,7 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod/devops-info-service-7b48589b6b-2cf77 1/1 Running 0 5m27s 10.244.0.6 minikube +pod/devops-info-service-7b48589b6b-52j4f 1/1 Running 0 5m10s 10.244.0.8 minikube +pod/devops-info-service-7b48589b6b-wrvvj 1/1 Running 0 5m20s 10.244.0.7 minikube + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +service/devops-info-service NodePort 10.100.203.165 80:30080/TCP 13m app.kubernetes.io/name=devops-info-service diff --git a/k8s/evidence/05-describe-deployment.txt b/k8s/evidence/05-describe-deployment.txt new file mode 100644 index 0000000000..1efc8774b5 --- /dev/null +++ b/k8s/evidence/05-describe-deployment.txt @@ -0,0 +1,57 @@ +Name: devops-info-service +Namespace: devops-lab09 +CreationTimestamp: Thu, 26 Mar 2026 22:35:19 +0300 +Labels: app.kubernetes.io/component=web + app.kubernetes.io/name=devops-info-service + app.kubernetes.io/part-of=devops-core-course +Annotations: deployment.kubernetes.io/revision: 2 +Selector: app.kubernetes.io/name=devops-info-service +Replicas: 3 desired | 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Pod Template: + Labels: app.kubernetes.io/component=web + app.kubernetes.io/name=devops-info-service + app.kubernetes.io/part-of=devops-core-course + Annotations: kubectl.kubernetes.io/restartedAt: 2026-03-26T22:42:54+03:00 + Containers: + devops-info-service: + Image: dorley174/devops-info-service:latest + Port: 5000/TCP (http) + Host Port: 0/TCP (http) + Limits: + cpu: 250m + memory: 256Mi + Requests: + cpu: 100m + memory: 128Mi + Liveness: http-get http://:http/health delay=15s timeout=2s period=10s #success=1 #failure=3 + Readiness: http-get http://:http/ready delay=5s timeout=2s period=5s #success=1 #failure=3 + Environment: + PORT: 5000 + DEBUG: False + APP_VARIANT: app1 + APP_MESSAGE: Lab 09 primary deployment + SERVICE_VERSION: lab09-v1 + Mounts: + Volumes: + Node-Selectors: + Tolerations: +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True NewReplicaSetAvailable +OldReplicaSets: devops-info-service-8689cb4bbc (0/0 replicas created) +NewReplicaSet: devops-info-service-7b48589b6b (3/3 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 13m deployment-controller Scaled up replica set devops-info-service-8689cb4bbc from 0 to 3 + Normal ScalingReplicaSet 5m27s deployment-controller Scaled up replica set devops-info-service-7b48589b6b from 0 to 1 + Normal ScalingReplicaSet 5m20s deployment-controller Scaled down replica set devops-info-service-8689cb4bbc from 3 to 2 + Normal ScalingReplicaSet 5m20s deployment-controller Scaled up replica set devops-info-service-7b48589b6b from 1 to 2 + Normal ScalingReplicaSet 5m10s deployment-controller Scaled down replica set devops-info-service-8689cb4bbc from 2 to 1 + Normal ScalingReplicaSet 5m10s deployment-controller Scaled up replica set devops-info-service-7b48589b6b from 2 to 3 + Normal ScalingReplicaSet 5m2s deployment-controller Scaled down replica set devops-info-service-8689cb4bbc from 1 to 0 diff --git a/k8s/evidence/06-rollout-history.txt b/k8s/evidence/06-rollout-history.txt new file mode 100644 index 0000000000..4e3de9fb60 --- /dev/null +++ b/k8s/evidence/06-rollout-history.txt @@ -0,0 +1,5 @@ +deployment.apps/devops-info-service +REVISION CHANGE-CAUSE +1 +2 + diff --git a/k8s/evidence/07-get-ingress.txt b/k8s/evidence/07-get-ingress.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/k8s/evidence/08-events.txt b/k8s/evidence/08-events.txt new file mode 100644 index 0000000000..953684f826 --- /dev/null +++ b/k8s/evidence/08-events.txt @@ -0,0 +1,38 @@ +LAST SEEN TYPE REASON OBJECT MESSAGE +13m Normal SuccessfulCreate replicaset/devops-info-service-8689cb4bbc Created pod: devops-info-service-8689cb4bbc-49c68 +13m Normal Scheduled pod/devops-info-service-8689cb4bbc-gbntr Successfully assigned devops-lab09/devops-info-service-8689cb4bbc-gbntr to minikube +13m Normal ScalingReplicaSet deployment/devops-info-service Scaled up replica set devops-info-service-8689cb4bbc from 0 to 3 +13m Normal Scheduled pod/devops-info-service-8689cb4bbc-49c68 Successfully assigned devops-lab09/devops-info-service-8689cb4bbc-49c68 to minikube +13m Normal SuccessfulCreate replicaset/devops-info-service-8689cb4bbc Created pod: devops-info-service-8689cb4bbc-gbntr +13m Normal SuccessfulCreate replicaset/devops-info-service-8689cb4bbc Created pod: devops-info-service-8689cb4bbc-tmcsb +13m Normal Scheduled pod/devops-info-service-8689cb4bbc-tmcsb Successfully assigned devops-lab09/devops-info-service-8689cb4bbc-tmcsb to minikube +7m19s Warning Failed pod/devops-info-service-8689cb4bbc-49c68 Error: container has runAsNonRoot and image has non-numeric user (app), cannot verify user is non-root (pod: "devops-info-service-8689cb4bbc-49c68_devops-lab09(73e9a23a-7db6-421c-b843-970354f1e086)", container: devops-info-service) +7m19s Normal Pulled pod/devops-info-service-8689cb4bbc-49c68 Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +7m21s Normal Pulled pod/devops-info-service-8689cb4bbc-gbntr Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +7m18s Warning Failed pod/devops-info-service-8689cb4bbc-tmcsb Error: container has runAsNonRoot and image has non-numeric user (app), cannot verify user is non-root (pod: "devops-info-service-8689cb4bbc-tmcsb_devops-lab09(67a638c8-08d0-4a24-a638-9a1e6b758032)", container: devops-info-service) +7m18s Normal Pulled pod/devops-info-service-8689cb4bbc-tmcsb Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +7m21s Warning Failed pod/devops-info-service-8689cb4bbc-gbntr Error: container has runAsNonRoot and image has non-numeric user (app), cannot verify user is non-root (pod: "devops-info-service-8689cb4bbc-gbntr_devops-lab09(6221b7e7-e23d-42bd-9957-c4efe897415f)", container: devops-info-service) +5m29s Normal ScalingReplicaSet deployment/devops-info-service Scaled up replica set devops-info-service-7b48589b6b from 0 to 1 +5m29s Normal SuccessfulCreate replicaset/devops-info-service-7b48589b6b Created pod: devops-info-service-7b48589b6b-2cf77 +5m29s Normal Scheduled pod/devops-info-service-7b48589b6b-2cf77 Successfully assigned devops-lab09/devops-info-service-7b48589b6b-2cf77 to minikube +5m28s Normal Started pod/devops-info-service-7b48589b6b-2cf77 Container started +5m28s Normal Created pod/devops-info-service-7b48589b6b-2cf77 Container created +5m28s Normal Pulled pod/devops-info-service-7b48589b6b-2cf77 Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +5m22s Normal SuccessfulDelete replicaset/devops-info-service-8689cb4bbc Deleted pod: devops-info-service-8689cb4bbc-49c68 +5m22s Normal Scheduled pod/devops-info-service-7b48589b6b-wrvvj Successfully assigned devops-lab09/devops-info-service-7b48589b6b-wrvvj to minikube +5m22s Normal SuccessfulCreate replicaset/devops-info-service-7b48589b6b Created pod: devops-info-service-7b48589b6b-wrvvj +5m22s Normal ScalingReplicaSet deployment/devops-info-service Scaled up replica set devops-info-service-7b48589b6b from 1 to 2 +5m22s Normal ScalingReplicaSet deployment/devops-info-service Scaled down replica set devops-info-service-8689cb4bbc from 3 to 2 +5m21s Normal Pulled pod/devops-info-service-7b48589b6b-wrvvj Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +5m21s Normal Created pod/devops-info-service-7b48589b6b-wrvvj Container created +5m21s Normal Started pod/devops-info-service-7b48589b6b-wrvvj Container started +5m12s Normal SuccessfulCreate replicaset/devops-info-service-7b48589b6b Created pod: devops-info-service-7b48589b6b-52j4f +5m12s Normal SuccessfulDelete replicaset/devops-info-service-8689cb4bbc Deleted pod: devops-info-service-8689cb4bbc-gbntr +5m12s Normal Scheduled pod/devops-info-service-7b48589b6b-52j4f Successfully assigned devops-lab09/devops-info-service-7b48589b6b-52j4f to minikube +5m12s Normal ScalingReplicaSet deployment/devops-info-service Scaled down replica set devops-info-service-8689cb4bbc from 2 to 1 +5m12s Normal ScalingReplicaSet deployment/devops-info-service Scaled up replica set devops-info-service-7b48589b6b from 2 to 3 +5m11s Normal Started pod/devops-info-service-7b48589b6b-52j4f Container started +5m11s Normal Created pod/devops-info-service-7b48589b6b-52j4f Container created +5m11s Normal Pulled pod/devops-info-service-7b48589b6b-52j4f Container image "dorley174/devops-info-service:latest" already present on machine and can be accessed by the pod +5m4s Normal SuccessfulDelete replicaset/devops-info-service-8689cb4bbc Deleted pod: devops-info-service-8689cb4bbc-tmcsb +5m4s Normal ScalingReplicaSet deployment/devops-info-service Scaled down replica set devops-info-service-8689cb4bbc from 1 to 0 diff --git a/k8s/evidence/09-deployment-before.txt b/k8s/evidence/09-deployment-before.txt new file mode 100644 index 0000000000..1564dc93c0 --- /dev/null +++ b/k8s/evidence/09-deployment-before.txt @@ -0,0 +1,2 @@ +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR +devops-info-service 3/3 3 3 29m devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service diff --git a/k8s/evidence/10-pods-before.txt b/k8s/evidence/10-pods-before.txt new file mode 100644 index 0000000000..69ceb653d0 --- /dev/null +++ b/k8s/evidence/10-pods-before.txt @@ -0,0 +1,4 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +devops-info-service-7b48589b6b-2b97b 1/1 Running 0 8m24s 10.244.0.14 minikube +devops-info-service-7b48589b6b-7fttq 1/1 Running 0 8m17s 10.244.0.15 minikube +devops-info-service-7b48589b6b-zhkpz 1/1 Running 0 8m10s 10.244.0.16 minikube diff --git a/k8s/evidence/11-pods-after-scale.txt b/k8s/evidence/11-pods-after-scale.txt new file mode 100644 index 0000000000..5936628d54 --- /dev/null +++ b/k8s/evidence/11-pods-after-scale.txt @@ -0,0 +1,6 @@ +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +devops-info-service-7b48589b6b-2b97b 1/1 Running 0 8m32s 10.244.0.14 minikube +devops-info-service-7b48589b6b-7fttq 1/1 Running 0 8m25s 10.244.0.15 minikube +devops-info-service-7b48589b6b-jjtsm 1/1 Running 0 8s 10.244.0.17 minikube +devops-info-service-7b48589b6b-wmf6g 1/1 Running 0 7s 10.244.0.18 minikube +devops-info-service-7b48589b6b-zhkpz 1/1 Running 0 8m18s 10.244.0.16 minikube diff --git a/k8s/evidence/12-deployment-after-scale.txt b/k8s/evidence/12-deployment-after-scale.txt new file mode 100644 index 0000000000..6f3d84e203 --- /dev/null +++ b/k8s/evidence/12-deployment-after-scale.txt @@ -0,0 +1,2 @@ +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR +devops-info-service 5/5 5 5 29m devops-info-service dorley174/devops-info-service:latest app.kubernetes.io/name=devops-info-service diff --git a/k8s/evidence/13-rollout-history-after-update.txt b/k8s/evidence/13-rollout-history-after-update.txt new file mode 100644 index 0000000000..76380fc135 --- /dev/null +++ b/k8s/evidence/13-rollout-history-after-update.txt @@ -0,0 +1,5 @@ +deployment.apps/devops-info-service +REVISION CHANGE-CAUSE +1 +4 +5 diff --git a/k8s/evidence/14-env-after-update.txt b/k8s/evidence/14-env-after-update.txt new file mode 100644 index 0000000000..c4222427cf --- /dev/null +++ b/k8s/evidence/14-env-after-update.txt @@ -0,0 +1,4 @@ + - name: APP_MESSAGE + value: Lab 09 rolling update + - name: SERVICE_VERSION + value: lab09-v2 diff --git a/k8s/evidence/15-rollout-history-after-rollback.txt b/k8s/evidence/15-rollout-history-after-rollback.txt new file mode 100644 index 0000000000..e2feb8609c --- /dev/null +++ b/k8s/evidence/15-rollout-history-after-rollback.txt @@ -0,0 +1,5 @@ +deployment.apps/devops-info-service +REVISION CHANGE-CAUSE +1 +5 +6 diff --git a/k8s/evidence/16-env-after-rollback.txt b/k8s/evidence/16-env-after-rollback.txt new file mode 100644 index 0000000000..abd5fafc86 --- /dev/null +++ b/k8s/evidence/16-env-after-rollback.txt @@ -0,0 +1,4 @@ + - name: APP_MESSAGE + value: Lab 09 primary deployment + - name: SERVICE_VERSION + value: lab09-v1 diff --git a/k8s/evidence/17-health.txt b/k8s/evidence/17-health.txt new file mode 100644 index 0000000000..2f5438bd07 --- /dev/null +++ b/k8s/evidence/17-health.txt @@ -0,0 +1 @@ +{"status":"healthy","timestamp":"2026-03-26T19:47:24.216Z","uptime_seconds":264,"variant":"app1"} diff --git a/k8s/evidence/18-ready.txt b/k8s/evidence/18-ready.txt new file mode 100644 index 0000000000..422d3a0176 --- /dev/null +++ b/k8s/evidence/18-ready.txt @@ -0,0 +1 @@ +{"message":"Lab 09 primary deployment","status":"ready","timestamp":"2026-03-26T19:47:24.232Z","uptime_seconds":264,"variant":"app1"} diff --git a/k8s/evidence/19-root-after-rollback.json b/k8s/evidence/19-root-after-rollback.json new file mode 100644 index 0000000000..e618153f29 --- /dev/null +++ b/k8s/evidence/19-root-after-rollback.json @@ -0,0 +1,52 @@ +{ + "endpoints": [ + { + "description": "Service information", + "method": "GET", + "path": "/" + }, + { + "description": "Liveness health check", + "method": "GET", + "path": "/health" + }, + { + "description": "Readiness health check", + "method": "GET", + "path": "/ready" + }, + { + "description": "Prometheus metrics", + "method": "GET", + "path": "/metrics" + } + ], + "request": { + "client_ip": "127.0.0.1", + "method": "GET", + "path": "/", + "user_agent": "curl/8.5.0" + }, + "runtime": { + "current_time": "2026-03-26T19:57:23.089Z", + "timezone": "UTC", + "uptime_human": "0 hours, 1 minute", + "uptime_seconds": 61 + }, + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "message": "Lab 09 primary deployment", + "name": "devops-info-service", + "variant": "app1", + "version": "lab09-v1" + }, + "system": { + "architecture": "x86_64", + "cpu_count": 20, + "hostname": "devops-info-service-7b48589b6b-2b97b", + "platform": "Linux", + "platform_version": "Linux-5.15.153.1-microsoft-standard-WSL2-x86_64-with-glibc2.36", + "python_version": "3.13.1" + } +} diff --git a/k8s/ingress.yml b/k8s/ingress.yml new file mode 100644 index 0000000000..2b6e7d130a --- /dev/null +++ b/k8s/ingress.yml @@ -0,0 +1,32 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: devops-course-ingress + namespace: devops-lab09 + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/rewrite-target: /$2 +spec: + ingressClassName: nginx + tls: + - hosts: + - local.example.com + secretName: apps-ingress-tls + rules: + - host: local.example.com + http: + paths: + - path: /app1(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: devops-info-service + port: + number: 80 + - path: /app2(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: devops-info-service-app2 + port: + number: 80 diff --git a/k8s/namespace.yml b/k8s/namespace.yml new file mode 100644 index 0000000000..3bc418d3e7 --- /dev/null +++ b/k8s/namespace.yml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: devops-lab09 + labels: + app.kubernetes.io/part-of: devops-core-course + app.kubernetes.io/managed-by: kubectl diff --git a/k8s/service-app2.yml b/k8s/service-app2.yml new file mode 100644 index 0000000000..14b9aba0df --- /dev/null +++ b/k8s/service-app2.yml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service-app2 + namespace: devops-lab09 + labels: + app.kubernetes.io/name: devops-info-service-app2 + app.kubernetes.io/component: web +spec: + type: NodePort + selector: + app.kubernetes.io/name: devops-info-service-app2 + ports: + - name: http + protocol: TCP + port: 80 + targetPort: http + nodePort: 30081 diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..c27e336f28 --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service + namespace: devops-lab09 + labels: + app.kubernetes.io/name: devops-info-service + app.kubernetes.io/component: web +spec: + type: NodePort + selector: + app.kubernetes.io/name: devops-info-service + ports: + - name: http + protocol: TCP + port: 80 + targetPort: http + nodePort: 30080 diff --git a/monitoring/.env.example b/monitoring/.env.example index 7b1de1cf95..dd8134271f 100644 --- a/monitoring/.env.example +++ b/monitoring/.env.example @@ -3,8 +3,9 @@ # Bash: cp .env.example .env GRAFANA_ADMIN_USER=admin -GRAFANA_ADMIN_PASSWORD=ChangeMe_Lab07! +GRAFANA_ADMIN_PASSWORD=ChangeMe_Lab08! GRAFANA_PORT=3000 LOKI_PORT=3100 PROMTAIL_PORT=9080 +PROMETHEUS_PORT=9090 APP_PORT=8000 diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 91209015d4..c32068b172 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -19,9 +19,9 @@ services: restart: unless-stopped healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3100/ready"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: @@ -55,14 +55,14 @@ services: condition: service_healthy healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9080/ready"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: limits: - cpus: "0.75" + cpus: "0.5" memory: 512M reservations: cpus: "0.10" @@ -79,6 +79,7 @@ services: GF_AUTH_ANONYMOUS_ENABLED: "false" GF_AUTH_ANONYMOUS_ORG_ROLE: Viewer GF_SECURITY_ALLOW_EMBEDDING: "false" + GF_METRICS_ENABLED: "true" ports: - "${GRAFANA_PORT:-3000}:3000" volumes: @@ -96,10 +97,52 @@ services: condition: service_healthy healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3000/api/health"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 30s + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.10" + memory: 128M + + prometheus: + image: prom/prometheus:v3.9.0 + container_name: devops-prometheus + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=15d + - --storage.tsdb.retention.size=10GB + - --web.enable-lifecycle + ports: + - "${PROMETHEUS_PORT:-9090}:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + labels: + logging: "promtail" + app: "devops-prometheus" + restart: unless-stopped + depends_on: + loki: + condition: service_healthy + grafana: + condition: service_healthy + app-python: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:9090/-/healthy"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 20s deploy: resources: limits: @@ -113,7 +156,7 @@ services: build: context: ../app_python dockerfile: Dockerfile - image: devops-info-service:lab07 + image: devops-info-service:lab08 container_name: devops-python environment: HOST: "0.0.0.0" @@ -129,15 +172,15 @@ services: restart: unless-stopped healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')"] - interval: 15s + interval: 10s timeout: 5s - retries: 10 + retries: 5 start_period: 20s deploy: resources: limits: - cpus: "0.75" - memory: 512M + cpus: "0.5" + memory: 256M reservations: cpus: "0.10" memory: 128M @@ -150,3 +193,4 @@ volumes: loki-data: grafana-data: promtail-data: + prometheus-data: diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..2a1e0dfd2a --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,522 @@ +# Lab08 — Metrics & Monitoring with Prometheus + +## Summary + +This implementation extends the existing Lab07 observability stack with Prometheus-based metrics collection and a Grafana metrics dashboard. + +The solution is designed for a fully local, free workflow: +- Windows host +- WSL2 for Linux tooling +- optional Vagrant VM for a 100% local deployment target +- no external cloud services required + +The repository now contains: +- Prometheus instrumentation in the Python Flask application +- a `/metrics` endpoint compatible with Prometheus scraping +- a Prometheus service added to `monitoring/docker-compose.yml` +- Grafana provisioning for both Loki and Prometheus data sources +- a prebuilt Grafana metrics dashboard with 9 panels +- updated local validation instructions for Windows + WSL2 + Vagrant +- extended Ansible automation for the full Loki + Prometheus + Grafana stack + +## Architecture + +```text + +-----------------------+ + | Grafana | + | dashboards + Explore | + +-----------+-----------+ + ^ + | + +--------------+---------------+ + | | + | | + metrics | | logs + | | + | | ++---------------------+-----+ +---------+---------+ +| Python Flask App | | Promtail | +| /, /health, /metrics | | Docker SD + labels| ++------------+--------------+ +---------+---------+ + | | + | scrape | push + v v + +------+----------------+ +-------+--------+ + | Prometheus | | Loki | + | pull-based TSDB | | log storage | + +-----------------------+ +----------------+ +``` + +## Repository Structure + +```text +monitoring/ +├── .env.example +├── docker-compose.yml +├── docs/ +│ ├── LAB07.md +│ ├── LAB08.md +│ ├── LOCAL_VALIDATION_WINDOWS.md +│ └── screenshots/ +├── grafana/ +│ ├── dashboards/ +│ │ ├── lab07-logging.json +│ │ └── lab08-metrics.json +│ └── provisioning/ +│ ├── dashboards/ +│ │ └── dashboard-provider.yml +│ └── datasources/ +│ └── loki.yml +├── loki/ +│ └── config.yml +├── prometheus/ +│ └── prometheus.yml +└── promtail/ + └── config.yml +``` + +## Application Instrumentation + +### Metrics added to the Flask application + +#### 1. HTTP request counter + +Metric: + +```text +http_requests_total{method, endpoint, status_code} +``` + +Purpose: +- total request volume +- request rate calculations +- status code distribution +- error rate calculations + +#### 2. HTTP request duration histogram + +Metric: + +```text +http_request_duration_seconds{method, endpoint} +``` + +Purpose: +- latency measurements +- percentile calculations such as p95 +- heatmap visualizations for duration buckets + +Note: +- `status_code` is intentionally not used on the latency histogram to avoid unnecessary cardinality growth and to keep PromQL queries simpler + +#### 3. In-progress requests gauge + +Metric: + +```text +http_requests_in_progress +``` + +Purpose: +- concurrent request visibility +- simple saturation signal +- active request panel in Grafana + +#### 4. Application-specific business metrics + +Metrics: + +```text +devops_info_endpoint_calls_total{endpoint} +devops_info_system_collection_seconds +devops_info_uptime_seconds +``` + +Purpose: +- endpoint usage tracking +- system information collection cost tracking +- current service uptime for dashboards and troubleshooting + +### Instrumentation design choices + +- request metrics are collected in Flask hooks (`before_request`, `after_request`, `teardown_request`) +- endpoint labels are normalized to avoid high-cardinality labels +- unknown routes are grouped as `unmatched` +- `/metrics` scrape traffic is excluded from request-rate business metrics to avoid self-scrape noise in dashboards +- `/metrics` keeps the Prometheus content type and is not overwritten by the JSON response middleware +- the in-progress gauge is decremented in `teardown_request` to avoid leaks in error scenarios + +## Prometheus Configuration + +File: + +```text +monitoring/prometheus/prometheus.yml +``` + +### Scrape settings + +- scrape interval: `15s` +- evaluation interval: `15s` + +### Scrape targets + +1. `prometheus` + - target: `localhost:9090` +2. `app` + - target: `app-python:8000` + - path: `/metrics` +3. `loki` + - target: `loki:3100` +4. `grafana` + - target: `grafana:3000` + +### Retention policy + +Prometheus retention is configured via container flags in `docker-compose.yml`: + +```text +--storage.tsdb.retention.time=15d +--storage.tsdb.retention.size=10GB +``` + +Why this matters: +- avoids unbounded disk usage +- keeps local storage predictable on a laptop or VM +- is fully compatible with a free local deployment model + +## Grafana Dashboard Walkthrough + +Dashboard file: + +```text +monitoring/grafana/dashboards/lab08-metrics.json +``` + +The dashboard includes 9 panels. + +### 1. Request Rate by Endpoint + +Query: + +```promql +sum by (endpoint) (rate(http_requests_total{endpoint!="/metrics"}[5m])) +``` + +Purpose: +- visualizes request throughput +- supports the **R** in RED + +### 2. Error Rate (5xx) + +Query: + +```promql +sum(rate(http_requests_total{status_code=~"5.."}[5m])) +``` + +Purpose: +- shows server-side errors per second +- supports the **E** in RED + +### 3. Request Duration p95 by Endpoint + +Query: + +```promql +histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket{endpoint!="/metrics"}[5m]))) +``` + +Purpose: +- tracks latency percentiles +- supports the **D** in RED + +### 4. Active Requests + +Query: + +```promql +sum(http_requests_in_progress) +``` + +Purpose: +- highlights current concurrency pressure + +### 5. Application Uptime + +Query: + +```promql +max(devops_info_uptime_seconds) +``` + +Purpose: +- confirms the service is alive and progressing normally + +### 6. Request Duration Heatmap + +Query: + +```promql +sum by (le) (rate(http_request_duration_seconds_bucket{endpoint!="/metrics"}[5m])) +``` + +Purpose: +- visualizes latency bucket distribution over time + +### 7. Status Code Distribution + +Query: + +```promql +sum by (status_code) (rate(http_requests_total[5m])) +``` + +Purpose: +- shows how traffic is split across response classes and codes + +### 8. App Target Uptime + +Query: + +```promql +up{job="app"} +``` + +Purpose: +- shows whether Prometheus is successfully scraping the application + +### 9. System Info Collection p95 + +Query: + +```promql +histogram_quantile(0.95, sum by (le) (rate(devops_info_system_collection_seconds_bucket[5m]))) +``` + +Purpose: +- tracks the internal cost of the service-specific system information collection function + +## PromQL Examples + +### RED method queries + +```promql +sum by (endpoint) (rate(http_requests_total{endpoint!="/metrics"}[5m])) +sum(rate(http_requests_total{status_code=~"5.."}[5m])) +histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket{endpoint!="/metrics"}[5m]))) +``` + +### Additional useful queries + +```promql +sum by (status_code) (rate(http_requests_total[5m])) +sum(http_requests_in_progress) +up{job="app"} +max(devops_info_uptime_seconds) +sum by (job) (up) +histogram_quantile(0.95, sum by (le) (rate(devops_info_system_collection_seconds_bucket[5m]))) +``` + +## Production Setup + +### Health checks + +Health checks are configured for: +- Loki +- Promtail +- Grafana +- Prometheus +- Python application + +### Resource limits + +Configured Docker Compose limits: + +| Service | CPU | Memory | +|--------|-----|--------| +| Loki | 1.0 | 1G | +| Promtail | 0.5 | 512M | +| Grafana | 0.5 | 512M | +| Prometheus | 1.0 | 1G | +| App | 0.5 | 256M | + +### Persistence + +Named volumes: +- `loki-data` +- `promtail-data` +- `grafana-data` +- `prometheus-data` + +### Security and operational choices + +- Grafana anonymous access is disabled +- Grafana credentials are externalized via `.env` +- Grafana metrics are explicitly enabled for Prometheus scraping +- the app health check uses Python stdlib instead of `curl` to keep the slim image lightweight + +## Local Run Commands + +### Docker Compose from WSL2 + +```bash +cd monitoring +cp .env.example .env +# edit GRAFANA_ADMIN_PASSWORD in .env + +docker compose up -d --build +docker compose ps +``` + +### Generate traffic + +```bash +for i in {1..25}; do curl -s http://127.0.0.1:8000/ > /dev/null; done +for i in {1..25}; do curl -s http://127.0.0.1:8000/health > /dev/null; done +curl -s http://127.0.0.1:8000/does-not-exist > /dev/null +curl -s http://127.0.0.1:8000/metrics | head -40 +``` + +### Validate Prometheus + +```bash +curl http://127.0.0.1:9090/-/healthy +curl http://127.0.0.1:9090/api/v1/targets | python -m json.tool +``` + +Open in browser: +- `http://127.0.0.1:9090/targets` +- `http://127.0.0.1:3000` + +### Useful Grafana checks + +- Confirm both data sources are provisioned: Loki and Prometheus +- Open dashboard: `Lab08 - Prometheus Metrics Overview` +- Run ad hoc PromQL query: `up` + +## Testing Results + +### Static validation completed in the sandbox + +The following checks were completed while preparing this repository update: +- Python syntax validation for the Flask application +- JSON validation for the Grafana dashboard file +- YAML validation for Docker Compose, Prometheus, Loki, and Promtail configuration files +- local application runtime validation for `/`, `/health`, `/metrics`, and request counters + +Captured application-side evidence is stored in: + +```text +monitoring/docs/evidence/ +``` + +Files included there: +- `app-root.json` +- `app-health.json` +- `app-metrics-sample.txt` +- `app-metrics-headers.txt` + +### Full stack runtime validation to run locally + +Because the sandbox used to prepare this patch does not provide a Docker daemon, UI screenshots and multi-container runtime proofs must be captured locally with the provided commands. + +Recommended screenshots to capture locally: +- all `lab08*.png` files in `monitoring/docs/screenshots` + + +## Metrics vs Logs + +### Use metrics when you need +- rates and trends over time +- low-cost aggregation +- alert thresholds +- latency and saturation views + +### Use logs when you need +- exact event details +- request-specific context +- raw error messages and stack traces +- forensic troubleshooting + +### Combined observability model in this repository + +- **Loki + Promtail** answer: *what happened?* +- **Prometheus** answers: *how much, how often, how fast?* +- **Grafana** provides one UI for both views + +## Challenges and Solutions + +### 1. `/metrics` vs JSON middleware + +Problem: +- the existing `after_request` hook forced `application/json` on every response + +Solution: +- the hook now preserves the Prometheus metrics content type and only rewrites JSON responses + +### 2. Label cardinality control + +Problem: +- raw request paths can create unbounded label values + +Solution: +- metrics use normalized endpoints and group unknown routes as `unmatched` + +### 3. In-progress gauge correctness + +Problem: +- a gauge can leak if it is incremented but not decremented during exceptions + +Solution: +- the decrement is handled in `teardown_request` + +### 4. Windows + WSL2 + Vagrant networking + +Problem: +- forwarded ports can behave differently in Windows and WSL2 + +Solution: +- the local validation guide includes both localhost and host-IP guidance + +### 5. Free local deployment requirement + +Problem: +- the lab should not depend on paid services or third-party cloud platforms + +Solution: +- the full stack runs locally with Docker Compose and can also run on the Vagrant VM via Ansible + +## Bonus — Ansible Automation + +The existing `ansible/roles/monitoring` role was extended to cover Lab08 as well. + +Implemented bonus scope: +- Prometheus variables added to role defaults +- Prometheus config generated from a Jinja2 template +- Docker Compose template updated to include Prometheus +- Grafana provisioning now includes both Loki and Prometheus data sources +- the Lab08 dashboard JSON is rendered by Ansible automatically +- the deployment task verifies Prometheus health and both data sources + +Playbook: + +```bash +cd ansible +ansible-galaxy collection install -r requirements.yml +ansible-playbook -i inventory/hosts.ini playbooks/deploy-monitoring.yml +``` + +## Evidence Checklist + +After you run the stack locally, verify the following: + +- `/metrics` endpoint returns Prometheus-formatted metrics +- Prometheus `/targets` shows all targets as `UP` +- Grafana has both Loki and Prometheus data sources +- the Lab08 dashboard shows live data in all panels +- `docker compose ps` shows healthy containers +- dashboards survive `docker compose down` / `docker compose up -d` + +All these checklist you can see as a screenshots in: +- all `lab08*.png` files in `monitoring/docs/screenshots` diff --git a/monitoring/docs/LOCAL_VALIDATION_WINDOWS.md b/monitoring/docs/LOCAL_VALIDATION_WINDOWS.md index 7055bf9974..f66c637525 100644 --- a/monitoring/docs/LOCAL_VALIDATION_WINDOWS.md +++ b/monitoring/docs/LOCAL_VALIDATION_WINDOWS.md @@ -1,80 +1,119 @@ -# Lab07 Local Validation on Windows 11 (WSL2 + Vagrant-friendly) - -This guide is tailored for the same environment used in earlier labs: -- Windows 11 host -- WSL2 Ubuntu for Linux commands -- VirtualBox + Vagrant for the free local VM option -- Russia / unstable access to Docker registries is possible, so retries are expected - -## Option A — Run the stack directly from WSL (if you already have a Linux Docker engine) - -1. Copy the example environment file: - ```bash - cd monitoring - cp .env.example .env - ``` -2. Edit `.env` and set a real Grafana password. -3. Start the stack: - ```bash - docker compose up -d --build - docker compose ps - ``` -4. Verify endpoints: - ```bash - curl http://127.0.0.1:3100/ready - curl http://127.0.0.1:9080/targets - curl http://127.0.0.1:8000/health - ``` -5. Open Grafana in a browser: `http://127.0.0.1:3000` - - user: `admin` - - password: value from `.env` - -## Option B — 100% free path using the Vagrant VM and Ansible bonus automation - -1. Reload Vagrant to apply the new forwarded ports: - ```powershell - vagrant reload - vagrant status - vagrant port - ``` -2. Make sure the VM is reachable from WSL. If forwarded ports do not work via `127.0.0.1` inside WSL, use the current Windows host IP from the `vEthernet (WSL...)` adapter. -3. From WSL, activate your Python environment and run: - ```bash - source ~/venvs/devops-lab/bin/activate - export ANSIBLE_CONFIG=/home//work/ansible/ansible.cfg - cd /path/to/repo/ansible - ansible-galaxy collection install -r requirements.yml - ansible-playbook -i inventory/hosts.ini playbooks/deploy-monitoring.yml - ``` -4. From Windows open: - - Grafana: `http://127.0.0.1:3000` - - Loki ready endpoint: `http://127.0.0.1:3100/ready` - - App health: `http://127.0.0.1:8000/health` -5. From WSL, if `127.0.0.1` forwarding does not work, use the current Windows host IP instead of localhost. - -## Generate example traffic - -### Bash / WSL +# Local Validation on Windows 11 (WSL2 + Vagrant-friendly) + +This guide is tailored for a fully local and free setup: +- Windows host +- WSL2 for Linux commands +- VS Code connected to WSL2 +- optional Vagrant VM for a dedicated Linux target +- no external cloud services required + +## Ports used by the observability stack + +- App: `8000` +- Grafana: `3000` +- Loki: `3100` +- Promtail: `9080` +- Prometheus: `9090` + +## Option A — Run directly from WSL2 + +### 1. Prepare environment file + +```bash +cd monitoring +cp .env.example .env +``` + +Set a real Grafana password in `.env`. + +### 2. Start the stack + +```bash +docker compose up -d --build +docker compose ps +``` + +### 3. Verify endpoints + +```bash +curl http://127.0.0.1:8000/health +curl http://127.0.0.1:8000/metrics | head -40 +curl http://127.0.0.1:3100/ready +curl http://127.0.0.1:9080/targets +curl http://127.0.0.1:9090/-/healthy +``` + +### 4. Open browser pages + +- Grafana: `http://127.0.0.1:3000` +- Prometheus targets: `http://127.0.0.1:9090/targets` +- Prometheus graph: `http://127.0.0.1:9090/graph` + +### 5. Generate traffic + ```bash -for i in {1..20}; do curl -s http://127.0.0.1:8000/ > /dev/null; done -for i in {1..20}; do curl -s http://127.0.0.1:8000/health > /dev/null; done +for i in {1..30}; do curl -s http://127.0.0.1:8000/ > /dev/null; done +for i in {1..30}; do curl -s http://127.0.0.1:8000/health > /dev/null; done curl -s http://127.0.0.1:8000/does-not-exist > /dev/null ``` -### PowerShell +### 6. Prometheus queries to test + +```promql +up +sum by (endpoint) (rate(http_requests_total{endpoint!="/metrics"}[5m])) +sum(rate(http_requests_total{status_code=~"5.."}[5m])) +histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket{endpoint!="/metrics"}[5m]))) +sum by (status_code) (rate(http_requests_total[5m])) +``` + +### 7. Grafana checks + +- log in with the credentials from `.env` +- confirm both data sources exist: Loki and Prometheus +- open the `Lab08 - Prometheus Metrics Overview` dashboard +- verify the panels update after traffic generation + +## Option B — Run on the Vagrant VM + +### 1. Reload Vagrant to apply port forwarding + ```powershell -1..20 | ForEach-Object { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/ | Out-Null } -1..20 | ForEach-Object { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/health | Out-Null } -try { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/does-not-exist | Out-Null } catch {} +vagrant reload +vagrant status +vagrant port ``` -## Useful LogQL checks +### 2. Run Ansible deployment from WSL2 -```logql -{job="docker"} -{app="devops-python"} -{app="devops-python"} | json | level="INFO" -{app="devops-python"} | json | level="ERROR" -sum by (app) (rate({app=~"devops-.*"}[1m])) -sum by (level) (count_over_time({app=~"devops-.*"} | json [5m])) +```bash +cd ansible +ansible-galaxy collection install -r requirements.yml +ansible-playbook -i inventory/hosts.ini playbooks/deploy-monitoring.yml ``` + +### 3. Verify from the Windows host or WSL2 + +```bash +curl http://127.0.0.1:8000/health +curl http://127.0.0.1:9090/-/healthy +``` + +If WSL2 cannot reach forwarded ports through `127.0.0.1`, use the current Windows host IP. + +## PowerShell traffic generation + +```powershell +1..30 | ForEach-Object { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/ | Out-Null } +1..30 | ForEach-Object { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/health | Out-Null } +try { Invoke-WebRequest -UseBasicParsing http://127.0.0.1:8000/does-not-exist | Out-Null } catch {} +``` + +## What to capture for proof + +- `/metrics` endpoint output in the browser or terminal +- Prometheus `/targets` page with all targets `UP` +- Prometheus query page with `up` +- Grafana Prometheus data source test +- Grafana dashboard with all panels populated +- `docker compose ps` showing healthy services diff --git a/monitoring/docs/evidence/README.md b/monitoring/docs/evidence/README.md new file mode 100644 index 0000000000..96a3a3a14e --- /dev/null +++ b/monitoring/docs/evidence/README.md @@ -0,0 +1,11 @@ +This directory contains lightweight validation artifacts captured from the sandbox for the instrumented Flask application itself. + +Included files: +- `app-root.json` — sample response from `GET /` +- `app-health.json` — sample response from `GET /health` +- `app-metrics-sample.txt` — sample Prometheus metrics output from `GET /metrics` +- `app-metrics-headers.txt` — response headers for `/metrics` confirming the Prometheus content type + +These files validate the application-side instrumentation. + +Full multi-container validation for Prometheus, Grafana, and Loki must still be executed locally because the sandbox does not provide a Docker daemon. diff --git a/monitoring/docs/evidence/app-health.json b/monitoring/docs/evidence/app-health.json new file mode 100644 index 0000000000..09e5ff36e3 --- /dev/null +++ b/monitoring/docs/evidence/app-health.json @@ -0,0 +1,5 @@ +{ + "status": "healthy", + "timestamp": "2026-03-19T13:05:23.900Z", + "uptime_seconds": 9 +} diff --git a/monitoring/docs/evidence/app-metrics-headers.txt b/monitoring/docs/evidence/app-metrics-headers.txt new file mode 100644 index 0000000000..a9447d71ae --- /dev/null +++ b/monitoring/docs/evidence/app-metrics-headers.txt @@ -0,0 +1,127 @@ +HTTP/1.1 200 OK +Server: Werkzeug/3.1.6 Python/3.13.5 +Date: Thu, 19 Mar 2026 13:05:16 GMT +Content-Type: text/plain; version=1.0.0; charset=utf-8 +Content-Length: 9658 +Connection: close + +# HELP python_gc_objects_collected_total Objects collected during gc +# TYPE python_gc_objects_collected_total counter +python_gc_objects_collected_total{generation="0"} 523.0 +python_gc_objects_collected_total{generation="1"} 0.0 +python_gc_objects_collected_total{generation="2"} 0.0 +# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC +# TYPE python_gc_objects_uncollectable_total counter +python_gc_objects_uncollectable_total{generation="0"} 0.0 +python_gc_objects_uncollectable_total{generation="1"} 0.0 +python_gc_objects_uncollectable_total{generation="2"} 0.0 +# HELP python_gc_collections_total Number of times this generation was collected +# TYPE python_gc_collections_total counter +python_gc_collections_total{generation="0"} 26.0 +python_gc_collections_total{generation="1"} 2.0 +python_gc_collections_total{generation="2"} 0.0 +# HELP python_info Python platform information +# TYPE python_info gauge +python_info{implementation="CPython",major="3",minor="13",patchlevel="5",version="3.13.5"} 1.0 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.35983104e+08 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 5.4464512e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.77392551375e+09 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 0.26 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 6.0 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP http_requests_total Total HTTP requests processed by the service. +# TYPE http_requests_total counter +http_requests_total{endpoint="/",method="GET",status_code="200"} 1.0 +http_requests_total{endpoint="/health",method="GET",status_code="200"} 1.0 +http_requests_total{endpoint="unmatched",method="GET",status_code="404"} 1.0 +# HELP http_requests_created Total HTTP requests processed by the service. +# TYPE http_requests_created gauge +http_requests_created{endpoint="/",method="GET",status_code="200"} 1.773925515830881e+09 +http_requests_created{endpoint="/health",method="GET",status_code="200"} 1.7739255158618011e+09 +http_requests_created{endpoint="unmatched",method="GET",status_code="404"} 1.773925515895349e+09 +# HELP http_request_duration_seconds HTTP request duration in seconds. +# TYPE http_request_duration_seconds histogram +http_request_duration_seconds_bucket{endpoint="/",le="0.005",method="GET"} 0.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.01",method="GET"} 0.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="/",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="/",method="GET"} 0.011629433000052813 +http_request_duration_seconds_bucket{endpoint="/health",le="0.005",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.01",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="/health",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="/health",method="GET"} 0.00015099899997039756 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.005",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.01",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="unmatched",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="unmatched",method="GET"} 0.00043901000003643276 +# HELP http_request_duration_seconds_created HTTP request duration in seconds. +# TYPE http_request_duration_seconds_created gauge +http_request_duration_seconds_created{endpoint="/",method="GET"} 1.773925515830906e+09 +http_request_duration_seconds_created{endpoint="/health",method="GET"} 1.7739255158618293e+09 +http_request_duration_seconds_created{endpoint="unmatched",method="GET"} 1.773925515895371e+09 +# HELP http_requests_in_progress HTTP requests currently being processed. +# TYPE http_requests_in_progress gauge +http_requests_in_progress 0.0 +# HELP devops_info_endpoint_calls_total Total endpoint calls for the DevOps info service. +# TYPE devops_info_endpoint_calls_total counter +devops_info_endpoint_calls_total{endpoint="/"} 1.0 +devops_info_endpoint_calls_total{endpoint="/health"} 1.0 +devops_info_endpoint_calls_total{endpoint="unmatched"} 1.0 +# HELP devops_info_endpoint_calls_created Total endpoint calls for the DevOps info service. +# TYPE devops_info_endpoint_calls_created gauge +devops_info_endpoint_calls_created{endpoint="/"} 1.7739255158309686e+09 +devops_info_endpoint_calls_created{endpoint="/health"} 1.7739255158618608e+09 +devops_info_endpoint_calls_created{endpoint="unmatched"} 1.773925515895407e+09 +# HELP devops_info_system_collection_seconds Time spent collecting system information. +# TYPE devops_info_system_collection_seconds histogram +devops_info_system_collection_seconds_bucket{le="0.005"} 0.0 diff --git a/monitoring/docs/evidence/app-metrics-sample.txt b/monitoring/docs/evidence/app-metrics-sample.txt new file mode 100644 index 0000000000..5b6436e2cf --- /dev/null +++ b/monitoring/docs/evidence/app-metrics-sample.txt @@ -0,0 +1,142 @@ +# HELP python_gc_objects_collected_total Objects collected during gc +# TYPE python_gc_objects_collected_total counter +python_gc_objects_collected_total{generation="0"} 523.0 +python_gc_objects_collected_total{generation="1"} 0.0 +python_gc_objects_collected_total{generation="2"} 0.0 +# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC +# TYPE python_gc_objects_uncollectable_total counter +python_gc_objects_uncollectable_total{generation="0"} 0.0 +python_gc_objects_uncollectable_total{generation="1"} 0.0 +python_gc_objects_uncollectable_total{generation="2"} 0.0 +# HELP python_gc_collections_total Number of times this generation was collected +# TYPE python_gc_collections_total counter +python_gc_collections_total{generation="0"} 26.0 +python_gc_collections_total{generation="1"} 2.0 +python_gc_collections_total{generation="2"} 0.0 +# HELP python_info Python platform information +# TYPE python_info gauge +python_info{implementation="CPython",major="3",minor="13",patchlevel="5",version="3.13.5"} 1.0 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.35983104e+08 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 5.4464512e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.77392551375e+09 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 0.26 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 6.0 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP http_requests_total Total HTTP requests processed by the service. +# TYPE http_requests_total counter +http_requests_total{endpoint="/",method="GET",status_code="200"} 1.0 +http_requests_total{endpoint="/health",method="GET",status_code="200"} 1.0 +http_requests_total{endpoint="unmatched",method="GET",status_code="404"} 1.0 +# HELP http_requests_created Total HTTP requests processed by the service. +# TYPE http_requests_created gauge +http_requests_created{endpoint="/",method="GET",status_code="200"} 1.773925515830881e+09 +http_requests_created{endpoint="/health",method="GET",status_code="200"} 1.7739255158618011e+09 +http_requests_created{endpoint="unmatched",method="GET",status_code="404"} 1.773925515895349e+09 +# HELP http_request_duration_seconds HTTP request duration in seconds. +# TYPE http_request_duration_seconds histogram +http_request_duration_seconds_bucket{endpoint="/",le="0.005",method="GET"} 0.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.01",method="GET"} 0.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="/",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="/",method="GET"} 0.011629433000052813 +http_request_duration_seconds_bucket{endpoint="/health",le="0.005",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.01",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="/health",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="/health",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="/health",method="GET"} 0.00015099899997039756 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.005",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.01",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.025",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.05",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.075",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.1",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.25",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="0.75",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="1.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="2.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="5.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="7.5",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="10.0",method="GET"} 1.0 +http_request_duration_seconds_bucket{endpoint="unmatched",le="+Inf",method="GET"} 1.0 +http_request_duration_seconds_count{endpoint="unmatched",method="GET"} 1.0 +http_request_duration_seconds_sum{endpoint="unmatched",method="GET"} 0.00043901000003643276 +# HELP http_request_duration_seconds_created HTTP request duration in seconds. +# TYPE http_request_duration_seconds_created gauge +http_request_duration_seconds_created{endpoint="/",method="GET"} 1.773925515830906e+09 +http_request_duration_seconds_created{endpoint="/health",method="GET"} 1.7739255158618293e+09 +http_request_duration_seconds_created{endpoint="unmatched",method="GET"} 1.773925515895371e+09 +# HELP http_requests_in_progress HTTP requests currently being processed. +# TYPE http_requests_in_progress gauge +http_requests_in_progress 0.0 +# HELP devops_info_endpoint_calls_total Total endpoint calls for the DevOps info service. +# TYPE devops_info_endpoint_calls_total counter +devops_info_endpoint_calls_total{endpoint="/"} 1.0 +devops_info_endpoint_calls_total{endpoint="/health"} 1.0 +devops_info_endpoint_calls_total{endpoint="unmatched"} 1.0 +# HELP devops_info_endpoint_calls_created Total endpoint calls for the DevOps info service. +# TYPE devops_info_endpoint_calls_created gauge +devops_info_endpoint_calls_created{endpoint="/"} 1.7739255158309686e+09 +devops_info_endpoint_calls_created{endpoint="/health"} 1.7739255158618608e+09 +devops_info_endpoint_calls_created{endpoint="unmatched"} 1.773925515895407e+09 +# HELP devops_info_system_collection_seconds Time spent collecting system information. +# TYPE devops_info_system_collection_seconds histogram +devops_info_system_collection_seconds_bucket{le="0.005"} 0.0 +devops_info_system_collection_seconds_bucket{le="0.01"} 0.0 +devops_info_system_collection_seconds_bucket{le="0.025"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.05"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.075"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.1"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.25"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.5"} 1.0 +devops_info_system_collection_seconds_bucket{le="0.75"} 1.0 +devops_info_system_collection_seconds_bucket{le="1.0"} 1.0 +devops_info_system_collection_seconds_bucket{le="2.5"} 1.0 +devops_info_system_collection_seconds_bucket{le="5.0"} 1.0 +devops_info_system_collection_seconds_bucket{le="7.5"} 1.0 +devops_info_system_collection_seconds_bucket{le="10.0"} 1.0 +devops_info_system_collection_seconds_bucket{le="+Inf"} 1.0 +devops_info_system_collection_seconds_count 1.0 +devops_info_system_collection_seconds_sum 0.011327940000001036 +# HELP devops_info_system_collection_seconds_created Time spent collecting system information. +# TYPE devops_info_system_collection_seconds_created gauge +devops_info_system_collection_seconds_created 1.7739255140238588e+09 +# HELP devops_info_uptime_seconds Current service uptime in seconds. +# TYPE devops_info_uptime_seconds gauge +devops_info_uptime_seconds 1.0 diff --git a/monitoring/docs/evidence/app-root.json b/monitoring/docs/evidence/app-root.json new file mode 100644 index 0000000000..005e35267b --- /dev/null +++ b/monitoring/docs/evidence/app-root.json @@ -0,0 +1,45 @@ +{ + "endpoints": [ + { + "description": "Service information", + "method": "GET", + "path": "/" + }, + { + "description": "Health check", + "method": "GET", + "path": "/health" + }, + { + "description": "Prometheus metrics", + "method": "GET", + "path": "/metrics" + } + ], + "request": { + "client_ip": "127.0.0.1", + "method": "GET", + "path": "/", + "user_agent": "curl/8.10.1" + }, + "runtime": { + "current_time": "2026-03-19T13:05:22.568Z", + "timezone": "UTC", + "uptime_human": "0 hours, 0 minutes", + "uptime_seconds": 8 + }, + "service": { + "description": "DevOps course info service", + "framework": "Flask", + "name": "devops-info-service", + "version": "1.1.0" + }, + "system": { + "architecture": "x86_64", + "cpu_count": 56, + "hostname": "17ceccfebd47", + "platform": "Linux", + "platform_version": "Linux-4.4.0-x86_64-with-glibc2.41", + "python_version": "3.13.5" + } +} diff --git a/monitoring/docs/screenshots/README.md b/monitoring/docs/screenshots/README.md index 25d39fa7a3..8b6572b859 100644 --- a/monitoring/docs/screenshots/README.md +++ b/monitoring/docs/screenshots/README.md @@ -1,6 +1,10 @@ Store your local proof screenshots here after you run the stack: -- grafana-explore.png +- metrics-endpoint.png +- prometheus-targets.png +- prometheus-query-up.png +- grafana-prometheus-datasource.png +- grafana-lab08-dashboard.png +- docker-compose-ps-healthy.png +- grafana-explore-app-logs.png - grafana-dashboard.png -- grafana-login.png -- app-json-logs.png diff --git a/monitoring/docs/screenshots/lab08-datasources.png b/monitoring/docs/screenshots/lab08-datasources.png new file mode 100644 index 0000000000..c607a53a92 Binary files /dev/null and b/monitoring/docs/screenshots/lab08-datasources.png differ diff --git a/monitoring/docs/screenshots/lab08-devops_info_uptime_seconds.png b/monitoring/docs/screenshots/lab08-devops_info_uptime_seconds.png new file mode 100644 index 0000000000..eaf983b8a8 Binary files /dev/null and b/monitoring/docs/screenshots/lab08-devops_info_uptime_seconds.png differ diff --git a/monitoring/docs/screenshots/lab08-docker-compose-ps.png b/monitoring/docs/screenshots/lab08-docker-compose-ps.png new file mode 100644 index 0000000000..8e518c34cd Binary files /dev/null and b/monitoring/docs/screenshots/lab08-docker-compose-ps.png differ diff --git a/monitoring/docs/screenshots/lab08-grafana-dasshboards.png b/monitoring/docs/screenshots/lab08-grafana-dasshboards.png new file mode 100644 index 0000000000..30740e612b Binary files /dev/null and b/monitoring/docs/screenshots/lab08-grafana-dasshboards.png differ diff --git a/monitoring/docs/screenshots/lab08-grafana-logs.png b/monitoring/docs/screenshots/lab08-grafana-logs.png new file mode 100644 index 0000000000..423709452f Binary files /dev/null and b/monitoring/docs/screenshots/lab08-grafana-logs.png differ diff --git a/monitoring/docs/screenshots/lab08-http_requests_total.png b/monitoring/docs/screenshots/lab08-http_requests_total.png new file mode 100644 index 0000000000..9771656300 Binary files /dev/null and b/monitoring/docs/screenshots/lab08-http_requests_total.png differ diff --git a/monitoring/docs/screenshots/lab08-prometheus-targets-up.png b/monitoring/docs/screenshots/lab08-prometheus-targets-up.png new file mode 100644 index 0000000000..88ad4c6365 Binary files /dev/null and b/monitoring/docs/screenshots/lab08-prometheus-targets-up.png differ diff --git a/monitoring/docs/screenshots/lab08-rate(http_requests_total[5m])-query.png b/monitoring/docs/screenshots/lab08-rate(http_requests_total[5m])-query.png new file mode 100644 index 0000000000..1ecfc5ad6e Binary files /dev/null and b/monitoring/docs/screenshots/lab08-rate(http_requests_total[5m])-query.png differ diff --git a/monitoring/docs/screenshots/lab08-up.png b/monitoring/docs/screenshots/lab08-up.png new file mode 100644 index 0000000000..8e1ca8d3fb Binary files /dev/null and b/monitoring/docs/screenshots/lab08-up.png differ diff --git a/monitoring/grafana/dashboards/lab08-metrics.json b/monitoring/grafana/dashboards/lab08-metrics.json new file mode 100644 index 0000000000..b05c0fa86a --- /dev/null +++ b/monitoring/grafana/dashboards/lab08-metrics.json @@ -0,0 +1,454 @@ +{ + "id": null, + "uid": "lab08-prometheus-metrics", + "title": "Lab08 - Prometheus Metrics Overview", + "tags": [ + "lab08", + "prometheus", + "metrics", + "observability" + ], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "10s", + "time": { + "from": "now-30m", + "to": "now" + }, + "panels": [ + { + "id": 1, + "type": "timeseries", + "title": "Request Rate by Endpoint", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (endpoint) (rate(http_requests_total{endpoint!=\"/metrics\"}[5m]))", + "legendFormat": "{{endpoint}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 2, + "type": "timeseries", + "title": "Error Rate (5xx)", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))", + "legendFormat": "5xx errors/sec", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + } + }, + { + "id": 3, + "type": "timeseries", + "title": "Request Duration p95 by Endpoint", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le, endpoint) (rate(http_request_duration_seconds_bucket{endpoint!=\"/metrics\"}[5m])))", + "legendFormat": "{{endpoint}} p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 4, + "type": "stat", + "title": "Active Requests", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(http_requests_in_progress)", + "legendFormat": "in-flight", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 5, + "type": "stat", + "title": "Application Uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "refId": "A", + "expr": "max(devops_info_uptime_seconds)", + "legendFormat": "uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 6, + "type": "heatmap", + "title": "Request Duration Heatmap", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 16 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (le) (rate(http_request_duration_seconds_bucket{endpoint!=\"/metrics\"}[5m]))", + "legendFormat": "{{le}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "calculate": false, + "legend": { + "show": false + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "unit": "s" + } + } + }, + { + "id": 7, + "type": "piechart", + "title": "Status Code Distribution", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 16 + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (status_code) (rate(http_requests_total[5m]))", + "legendFormat": "{{status_code}}", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + } + } + }, + { + "id": 8, + "type": "stat", + "title": "App Target Uptime", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "up{job=\"app\"}", + "legendFormat": "app", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "mappings": [ + { + "type": "value", + "options": { + "0": { + "text": "DOWN" + }, + "1": { + "text": "UP" + } + } + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "textMode": "auto" + } + }, + { + "id": 9, + "type": "timeseries", + "title": "System Info Collection p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le) (rate(devops_info_system_collection_seconds_bucket[5m])))", + "legendFormat": "system info p95", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + } + } + ], + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + } + } + ], + "templating": { + "list": [] + }, + "annotations": { + "list": [] + } +} diff --git a/monitoring/grafana/provisioning/datasources/loki.yml b/monitoring/grafana/provisioning/datasources/loki.yml index 9c54f7c886..abeeb463cf 100644 --- a/monitoring/grafana/provisioning/datasources/loki.yml +++ b/monitoring/grafana/provisioning/datasources/loki.yml @@ -3,6 +3,8 @@ apiVersion: 1 deleteDatasources: - name: Loki orgId: 1 + - name: Prometheus + orgId: 1 prune: true @@ -17,3 +19,15 @@ datasources: jsonData: maxLines: 1000 timeout: 60 + + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: false + editable: false + jsonData: + httpMethod: POST + prometheusType: Prometheus + timeInterval: 15s diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..405abc0fdf --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,27 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: app + metrics_path: /metrics + static_configs: + - targets: + - app-python:8000 + + - job_name: loki + metrics_path: /metrics + static_configs: + - targets: + - loki:3100 + + - job_name: grafana + metrics_path: /metrics + static_configs: + - targets: + - grafana:3000