124 lines
3.5 KiB
Bash
124 lines
3.5 KiB
Bash
#!/bin/bash
|
|
set -e
|
|
|
|
# Configuration from environment
|
|
SERVICE_NAME="navidrome"
|
|
# Use Nomad allocation ID for a unique service ID
|
|
SERVICE_ID="${SERVICE_NAME}-${NOMAD_ALLOC_ID:-$(hostname)}"
|
|
PORT=4533
|
|
CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}"
|
|
NODE_IP="${ADVERTISE_IP}"
|
|
DB_LOCK_FILE="/data/.primary"
|
|
NAVIDROME_PID=0
|
|
|
|
# Tags for the Primary service (Traefik enabled)
|
|
PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]'
|
|
|
|
# --- Helper Functions ---
|
|
|
|
# Register Service with TTL Check
|
|
register_service() {
|
|
echo "Promoted! Registering service ${SERVICE_ID}..."
|
|
# Convert bash list string to JSON array if needed, but PRIMARY_TAGS is already JSON-like
|
|
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/register" -d "{
|
|
\"ID\": \"${SERVICE_ID}\",
|
|
\"Name\": \"${SERVICE_NAME}\",
|
|
\"Tags\": ${PRIMARY_TAGS},
|
|
\"Address\": \"${NODE_IP}\",
|
|
\"Port\": ${PORT},
|
|
\"Check\": {
|
|
\"DeregisterCriticalServiceAfter\": \"1m\",
|
|
\"TTL\": \"15s\"
|
|
}
|
|
}"
|
|
}
|
|
|
|
# Send Heartbeat to Consul
|
|
pass_ttl() {
|
|
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/check/pass/service:${SERVICE_ID}" > /dev/null
|
|
}
|
|
|
|
# Deregister Service
|
|
deregister_service() {
|
|
echo "Demoted/Stopping. Deregistering service ${SERVICE_ID}..."
|
|
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${SERVICE_ID}"
|
|
}
|
|
|
|
# Start Navidrome in Background
|
|
start_app() {
|
|
echo "Node is Primary. Starting Navidrome..."
|
|
|
|
# Ensure DB path and local data folder are set
|
|
export ND_DATABASE_PATH="/data/navidrome.db"
|
|
export ND_DATAFOLDER="/local/data"
|
|
mkdir -p /local/data
|
|
|
|
/app/navidrome &
|
|
NAVIDROME_PID=$!
|
|
echo "Navidrome started with PID ${NAVIDROME_PID}"
|
|
}
|
|
|
|
# Stop Navidrome
|
|
stop_app() {
|
|
if [ "${NAVIDROME_PID}" -gt 0 ]; then
|
|
echo "Stopping Navidrome (PID ${NAVIDROME_PID})..."
|
|
kill -SIGTERM "${NAVIDROME_PID}"
|
|
wait "${NAVIDROME_PID}" 2>/dev/null || true
|
|
NAVIDROME_PID=0
|
|
fi
|
|
}
|
|
|
|
# --- Signal Handling (The Safety Net) ---
|
|
# If Nomad stops the container, we stop the app and deregister.
|
|
cleanup() {
|
|
echo "Caught signal, shutting down..."
|
|
stop_app
|
|
deregister_service
|
|
exit 0
|
|
}
|
|
|
|
trap cleanup TERM INT
|
|
|
|
# --- Main Loop ---
|
|
|
|
echo "Starting Supervisor. Waiting for leadership settle..."
|
|
echo "Node IP: $NODE_IP"
|
|
echo "Consul: $CONSUL_HTTP_ADDR"
|
|
|
|
# Small sleep to let LiteFS settle and leadership election complete
|
|
sleep 5
|
|
|
|
while true; do
|
|
# In LiteFS 0.5, .primary file exists ONLY on replicas.
|
|
if [ ! -f "$DB_LOCK_FILE" ]; then
|
|
# === WE ARE PRIMARY ===
|
|
|
|
# 1. If App is not running, start it and register
|
|
if [ "${NAVIDROME_PID}" -eq 0 ] || ! kill -0 "${NAVIDROME_PID}" 2>/dev/null; then
|
|
if [ "${NAVIDROME_PID}" -gt 0 ]; then
|
|
echo "CRITICAL: Navidrome crashed! Restarting..."
|
|
fi
|
|
start_app
|
|
register_service
|
|
fi
|
|
|
|
# 2. Maintain the heartbeat (TTL)
|
|
pass_ttl
|
|
|
|
else
|
|
# === WE ARE REPLICA ===
|
|
|
|
# If App is running (we were just demoted), stop it
|
|
if [ "${NAVIDROME_PID}" -gt 0 ]; then
|
|
echo "Lost leadership. Demoting..."
|
|
stop_app
|
|
deregister_service
|
|
fi
|
|
|
|
# No service registration exists for replicas to keep Consul clean.
|
|
fi
|
|
|
|
# Sleep short enough to update TTL (every 5s is safe for 15s TTL)
|
|
sleep 5 &
|
|
wait $! # Wait allows the 'trap' to interrupt the sleep instantly
|
|
done |