Files
navidrome-litefs/entrypoint.sh
sstent 4538ad5909
All checks were successful
Build and Push Docker Image / build-and-push (push) Successful in 3m52s
feat: Add automated LiteFS backups and GitHub deployment workflow
2026-04-08 10:38:23 -07:00

154 lines
4.4 KiB
Bash

#!/bin/bash
set -e
# Configuration from environment
SERVICE_NAME="navidrome"
# Use Nomad allocation ID for a unique service ID
SERVICE_ID="${SERVICE_NAME}-${NOMAD_ALLOC_ID:-$(hostname)}"
PORT=4533
CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}"
NODE_IP="${ADVERTISE_IP}"
DB_LOCK_FILE="/data/.primary"
NAVIDROME_PID=0
# Tags for the Primary service (Traefik enabled)
PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]'
# --- Helper Functions ---
# Backup Database (Only on Primary)
run_backup() {
local backup_dir="/shared_data/backup"
local timestamp=$(date +%Y%m%d_%H%M%S)
local backup_file="${backup_dir}/navidrome.db_${timestamp}.bak"
echo "Backing up database to ${backup_file}..."
mkdir -p "$backup_dir"
if litefs export -name navidrome.db "$backup_file"; then
echo "Backup successful."
# Keep only last 7 days
find "$backup_dir" -name "navidrome.db_*.bak" -mtime +7 -delete
echo "Old backups cleaned."
else
echo "ERROR: Backup failed!"
fi
}
# Register Service with TTL Check
register_service() {
echo "Promoted! Registering service ${SERVICE_ID}..."
# Convert bash list string to JSON array if needed, but PRIMARY_TAGS is already JSON-like
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/register" -d "{
\"ID\": \"${SERVICE_ID}\",
\"Name\": \"${SERVICE_NAME}\",
\"Tags\": ${PRIMARY_TAGS},
\"Address\": \"${NODE_IP}\",
\"Port\": ${PORT},
\"Check\": {
\"DeregisterCriticalServiceAfter\": \"1m\",
\"TTL\": \"15s\"
}
}"
}
# Send Heartbeat to Consul
pass_ttl() {
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/check/pass/service:${SERVICE_ID}" > /dev/null
}
# Deregister Service
deregister_service() {
echo "Demoted/Stopping. Deregistering service ${SERVICE_ID}..."
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${SERVICE_ID}"
}
# Start Navidrome in Background
start_app() {
echo "Node is Primary. Starting Navidrome..."
# Ensure shared directories exist
mkdir -p /shared_data/plugins /shared_data/cache /shared_data/backup
/app/navidrome &
NAVIDROME_PID=$!
echo "Navidrome started with PID ${NAVIDROME_PID}"
}
# Stop Navidrome
stop_app() {
if [ "${NAVIDROME_PID}" -gt 0 ]; then
echo "Stopping Navidrome (PID ${NAVIDROME_PID})..."
kill -SIGTERM "${NAVIDROME_PID}"
wait "${NAVIDROME_PID}" 2>/dev/null || true
NAVIDROME_PID=0
fi
}
# --- Signal Handling (The Safety Net) ---
# If Nomad stops the container, we stop the app and deregister.
cleanup() {
echo "Caught signal, shutting down..."
stop_app
deregister_service
exit 0
}
trap cleanup TERM INT
# --- Main Loop ---
echo "Starting Supervisor. Waiting for leadership settle..."
echo "Node IP: $NODE_IP"
echo "Consul: $CONSUL_HTTP_ADDR"
# Small sleep to let LiteFS settle and leadership election complete
sleep 5
LAST_BACKUP_TIME=0
BACKUP_INTERVAL=86400 # 24 hours
while true; do
# In LiteFS 0.5, .primary file exists ONLY on replicas.
if [ ! -f "$DB_LOCK_FILE" ]; then
# === WE ARE PRIMARY ===
# 1. If App is not running, start it and register
if [ "${NAVIDROME_PID}" -eq 0 ] || ! kill -0 "${NAVIDROME_PID}" 2>/dev/null; then
if [ "${NAVIDROME_PID}" -gt 0 ]; then
echo "CRITICAL: Navidrome crashed! Restarting..."
fi
start_app
register_service
fi
# 2. Maintain the heartbeat (TTL)
pass_ttl
# 3. Handle periodic backup
CURRENT_TIME=$(date +%s)
if [ $((CURRENT_TIME - LAST_BACKUP_TIME)) -ge $BACKUP_INTERVAL ]; then
run_backup
LAST_BACKUP_TIME=$CURRENT_TIME
fi
else
# === WE ARE REPLICA ===
# If App is running (we were just demoted), stop it
if [ "${NAVIDROME_PID}" -gt 0 ]; then
echo "Lost leadership. Demoting..."
stop_app
deregister_service
# Reset backup timer so the next primary can start fresh or we start fresh if promoted again
LAST_BACKUP_TIME=0
fi
# No service registration exists for replicas to keep Consul clean.
fi
# Sleep short enough to update TTL (every 5s is safe for 15s TTL)
sleep 5 &
wait $! # Wait allows the 'trap' to interrupt the sleep instantly
done