#!/bin/bash # Configuration from environment SERVICE_NAME="navidrome" SERVICE_ID="${SERVICE_NAME}-${NOMAD_ALLOC_ID:-$(hostname)}" PORT=4533 CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}" NODE_IP="${ADVERTISE_IP}" NAVIDROME_PID=0 # Tags for the Primary service (Traefik enabled) PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]' # --- Helper Functions --- # Check if this node is the LiteFS Primary check_primary() { local status=$(curl -s http://localhost:20202/info || echo "{}") local is_primary=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // false) else false end' 2>/dev/null || echo "false") if [ "$is_primary" = "true" ]; then return 0 # We are the primary fi return 1 # We are a replica } # Wait for LiteFS to settle and determine its role wait_for_litefs() { echo "Waiting for LiteFS to settle..." local timeout=60 local count=0 while [ $count -lt $timeout ]; do local status=$(curl -s http://localhost:20202/info || echo "null") local is_primary_val=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // "null") else "null" end' 2>/dev/null || echo "null") if [ "$is_primary_val" != "null" ]; then local role="replica" if [ "$is_primary_val" = "true" ]; then role="primary"; fi echo "LiteFS initialized. Role: $role" return 0 fi sleep 2 count=$((count + 2)) echo -n "." done echo "ERROR: LiteFS failed to settle after ${timeout}s" return 1 } # Register Service with TTL Check register_service() { echo "Registering service ${SERVICE_ID} with Consul..." curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/register" -d "{ \"ID\": \"${SERVICE_ID}\", \"Name\": \"${SERVICE_NAME}\", \"Tags\": ${PRIMARY_TAGS}, \"Address\": \"${NODE_IP}\", \"Port\": ${PORT}, \"Check\": { \"DeregisterCriticalServiceAfter\": \"1m\", \"TTL\": \"15s\" } }" } # Send Heartbeat to Consul pass_ttl() { curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/check/pass/service:${SERVICE_ID}" > /dev/null } # Deregister Service deregister_service() { echo "Deregistering service ${SERVICE_ID} from Consul..." curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${SERVICE_ID}" } # Start Navidrome in Background start_app() { echo "Node is Primary. Starting Navidrome..." # Ensure shared directories exist mkdir -p /shared_data/plugins /shared_data/cache /shared_data/backup /shared_data/artist_images # SEEDING LOGIC: If DB doesn't exist, restore from backup if [ ! -f /data/navidrome.db ]; then echo "Database /data/navidrome.db not found. Looking for backups to seed..." local latest_backup=$(ls -t /shared_data/backup/navidrome.db_*.bak 2>/dev/null | head -n 1) if [ -n "$latest_backup" ]; then echo "Seeding from $latest_backup..." # We use litefs import to properly initialize the distributed state litefs import -name navidrome.db "$latest_backup" else echo "No backups found. Navidrome will start with a fresh database." fi fi # Wait for LiteFS to expose the DB file in the FUSE mount echo "Waiting for /data/navidrome.db..." local db_timeout=30 local db_count=0 while [ ! -f /data/navidrome.db ] && [ $db_count -lt $db_timeout ]; do sleep 1 db_count=$((db_count + 1)) done if [ ! -f /data/navidrome.db ]; then echo "ERROR: /data/navidrome.db did not appear after ${db_timeout}s" return 1 fi # Bind mount the DB so Navidrome finds it in its DataFolder touch /shared_data/navidrome.db mount --bind /data/navidrome.db /shared_data/navidrome.db # Configuration export ND_DATAFOLDER="/shared_data" export ND_CACHEFOLDER="/shared_data/cache" export ND_BACKUP_PATH="/shared_data/backup" export ND_PLUGINS_FOLDER="/shared_data/plugins" export ND_ARTISTIMAGEFOLDER="artist_images" /app/navidrome & NAVIDROME_PID=$! echo "Navidrome running (PID: $NAVIDROME_PID) with data folder at /shared_data (DB bind-mounted to /data)" } # Stop Navidrome stop_app() { if [ "${NAVIDROME_PID}" -gt 0 ]; then echo "Stopping Navidrome (PID ${NAVIDROME_PID})..." kill -SIGTERM "${NAVIDROME_PID}" wait "${NAVIDROME_PID}" 2>/dev/null || true NAVIDROME_PID=0 umount /shared_data/navidrome.db 2>/dev/null || true fi } # --- Cleanup --- cleanup() { echo "Shutting down..." stop_app deregister_service exit 0 } trap cleanup TERM INT # --- Main Loop --- echo "Starting Supervisor. Waiting for leadership settle..." wait_for_litefs || exit 1 while true; do if check_primary; then # === WE ARE PRIMARY === if [ "${NAVIDROME_PID}" -eq 0 ] || ! kill -0 "${NAVIDROME_PID}" 2>/dev/null; then start_app register_service fi pass_ttl else # === WE ARE REPLICA === if [ "${NAVIDROME_PID}" -gt 0 ]; then echo "Lost leadership. Demoting..." stop_app deregister_service fi fi sleep 10 done