Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ restore:
backup:
mongo:
enabled: true
linkerOutputEnabled: true
prefix: ""
public: true
bucket: sefaria-mongo-backup
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{{- if .Values.backup.mongo.linkerOutputEnabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: create-linker-output-dumps-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
data:
create-linker-output-dumps.sh: |-
#!/usr/bin/env bash
set -eo pipefail

{{- if .Values.restore.dynamicName }}
DATABASE="${SEFARIA_DB}-{{ .Values.deployEnv }}"
{{- else }}
DATABASE="${SEFARIA_DB}"
{{- end }}

if [[ -z "${MONGO_HOST:-}" ]]; then
echo "Mongo Host not specified"
exit 1
fi

URI="mongodb://"

if [[ -n "${SEFARIA_DB_USER:-}" ]]; then
URI="${URI}${SEFARIA_DB_USER}"
if [[ -n "${SEFARIA_DB_PASSWORD:-}" ]]; then
URI="${URI}:${SEFARIA_DB_PASSWORD}"
fi
URI="${URI}@"
fi

URI="${URI}${MONGO_HOST}/?ssl=false"

if [[ -n "${SEFARIA_DB_USER:-}" ]]; then
URI="${URI}&authSource=admin"
fi

if [[ -n "${MONGO_REPLICASET_NAME:-}" ]]; then
URI="${URI}&replicaSet=${MONGO_REPLICASET_NAME}"
fi

DATADIR="/mongodumps/shared_volume"

echo "Dumping linker_output collection"
until mongodump --uri="$URI" -v -d "$DATABASE" \
--collection=linker_output \
-o "${DATADIR}/dump"
do
echo "mongodump failed, retrying in 5s..."
rm -rf "${DATADIR}/dump"
sleep 5
done

echo "Dumping webpages_text collection"
until mongodump --uri="$URI" -v -d "$DATABASE" \
--collection=webpages_text \
-o "${DATADIR}/dump"
do
echo "mongodump failed, retrying in 5s..."
sleep 5
done

echo "${DATABASE}" > "${DATADIR}/db_name.txt"

echo "dump complete - upload should start"
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{{- if .Values.backup.mongo.linkerOutputEnabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: upload-linker-output-dumps-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
data:
upload-linker-output-dumps.sh: |-
#!/usr/bin/env bash
set -euo pipefail
set -x

DATADIR="/mongodumps/shared_volume"
today="$(date +'%d.%m.%y')"
# Keep 5 snapshots — delete the file from 5 days ago
five_days_ago="$(date --date='5 days ago' +'%d.%m.%y')"

if [ ! -f "${DATADIR}/db_name.txt" ]; then
echo "ERROR: db_name.txt missing"
exit 1
fi
DB=$(cat "${DATADIR}/db_name.txt")
if [ -z "${DB}" ]; then
echo "ERROR: db_name.txt is empty"
exit 1
fi

# Remove the snapshot from 5 days ago (rotation — keep 5 days)
gsutil rm "gs://${BUCKET}/${PREFIX}linker_output_${five_days_ago}.tar.gz" || true
gsutil rm "gs://${BUCKET}/${PREFIX}webpages_text_${five_days_ago}.tar.gz" || true

# Upload linker_output as its own tarball
echo "Uploading linker_output dump"
tar czf - -C "${DATADIR}" \
"./dump/${DB}/linker_output.bson" \
"./dump/${DB}/linker_output.metadata.json" \
| gsutil cp - "gs://${BUCKET}/${PREFIX}linker_output_${today}.tar.gz"

# Upload webpages_text as its own tarball
echo "Uploading webpages_text dump"
tar czf - -C "${DATADIR}" \
"./dump/${DB}/webpages_text.bson" \
"./dump/${DB}/webpages_text.metadata.json" \
| gsutil cp - "gs://${BUCKET}/${PREFIX}webpages_text_${today}.tar.gz"

echo "Linker output dump upload complete"
{{- end }}
100 changes: 100 additions & 0 deletions helm-chart/sefaria/templates/cronjob/mongo-backup-linker-output.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{{- if eq .Values.backup.mongo.linkerOutputEnabled true }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ .Values.deployEnv }}-mongobackup-linker-output
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
concurrencyPolicy: Replace
schedule: "0 2 * * *"
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccount: {{ .Values.backup.mongo.serviceAccount }}
affinity:
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongo
topologyKey: kubernetes.io/hostname
tolerations:
- key: schedule-on-database-vm
operator: "Equal"
value: "true"
effect: "NoSchedule"
- key: mongo-replicaset
operator: "Equal"
value: "reserved"
effect: "NoSchedule"
initContainers:
- name: mongo-linker-output-dumper
image: mongo:{{ .Values.backup.mongo.version }}
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
volumeMounts:
- name: shared-volume
mountPath: /mongodumps/shared_volume
- name: create-linker-output-dumps-script
mountPath: /scripts/create-linker-output-dumps.sh
subPath: create-linker-output-dumps.sh
readOnly: true
command: ["bash"]
args: ["-c", "/scripts/create-linker-output-dumps.sh"]
resources:
requests:
ephemeral-storage: 12Gi
limits:
memory: "500Mi"
containers:
- name: linker-output-dump-uploader
image: google/cloud-sdk
volumeMounts:
- name: shared-volume
mountPath: /mongodumps/shared_volume
- name: upload-linker-output-dumps-script
mountPath: /scripts/upload-linker-output-dumps.sh
subPath: upload-linker-output-dumps.sh
readOnly: true
env:
- name: PREFIX
value: {{ .Values.backup.mongo.prefix }}
- name: BUCKET
value: {{ .Values.backup.mongo.bucket }}
command: ["bash"]
args: ["-c", "/scripts/upload-linker-output-dumps.sh"]
resources:
limits:
memory: "500Mi"
restartPolicy: OnFailure
volumes:
- name: create-linker-output-dumps-script
configMap:
name: create-linker-output-dumps-{{ .Values.deployEnv }}
defaultMode: 0755
- name: upload-linker-output-dumps-script
configMap:
name: upload-linker-output-dumps-{{ .Values.deployEnv }}
defaultMode: 0755
- name: shared-volume
emptyDir:
sizeLimit: 12Gi
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
{{- end }}
2 changes: 2 additions & 0 deletions helm-chart/sefaria/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ restore:
backup:
mongo:
enabled: false
linkerOutputEnabled: false
historyEnabled: false
prefix: ""
public: true
bucket: sefaria-mongo-backup
Expand Down
Loading