Files
navidrome-litefs/entrypoint.sh
sstent 37f0dcb1e7
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled
fix: revert to robust manual leadership detection to prevent multiple Navidrome instances
2026-04-27 08:54:55 -07:00

138 lines
3.9 KiB
Bash

#!/bin/bash
set -e
# Configuration from environment
SERVICE_NAME="navidrome"
SERVICE_ID="${SERVICE_NAME}-${NOMAD_ALLOC_ID:-$(hostname)}"
PORT=4533
CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}"
NODE_IP="${ADVERTISE_IP}"
DB_LOCK_FILE="/litefs/.primary"
NAVIDROME_PID=0
# Tags for the Primary service (Traefik enabled)
PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]'
# --- Helper Functions ---
# Check if this node is the LiteFS Primary
# LiteFS 0.5 status API returns a flat object: {"isPrimary": true, ...}
check_primary() {
local status=$(curl -s http://localhost:20202/status || echo "{}")
local is_primary=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // false) else false end' 2>/dev/null || echo "false")
if [ "$is_primary" = "true" ]; then
return 0 # We are the primary
fi
return 1 # We are a replica
}
# Wait for LiteFS to settle and determine its role
wait_for_litefs() {
echo "Waiting for LiteFS to settle..."
local timeout=60
local count=0
while [ $count -lt $timeout ]; do
local status=$(curl -s http://localhost:20202/status || echo "null")
local is_primary_val=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // "null") else "null" end' 2>/dev/null || echo "null")
if [ "$is_primary_val" != "null" ]; then
local role="replica"
if [ "$is_primary_val" = "true" ]; then role="primary"; fi
echo "LiteFS initialized. Role: $role"
return 0
fi
sleep 2
count=$((count + 2))
echo -n "."
done
echo "ERROR: LiteFS failed to settle after ${timeout}s"
return 1
}
# Register Service with TTL Check
register_service() {
echo "Registering service ${SERVICE_ID} with Consul..."
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/register" -d "{
\"ID\": \"${SERVICE_ID}\",
\"Name\": \"${SERVICE_NAME}\",
\"Tags\": ${PRIMARY_TAGS},
\"Address\": \"${NODE_IP}\",
\"Port\": ${PORT},
\"Check\": {
\"DeregisterCriticalServiceAfter\": \"1m\",
\"TTL\": \"15s\"
}
}"
}
# Send Heartbeat to Consul
pass_ttl() {
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/check/pass/service:${SERVICE_ID}" > /dev/null
}
# Deregister Service
deregister_service() {
echo "Deregistering service ${SERVICE_ID} from Consul..."
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${SERVICE_ID}"
}
# Start Navidrome in Background
start_app() {
echo "Node is Primary. Starting Navidrome..."
# Ensure shared directories exist on persistent host volume
mkdir -p /data/plugins /data/cache /data/backup
# Tell Navidrome to use the database directly from the LiteFS mount.
export ND_DBPATH="/litefs/navidrome.db?cache=shared&_busy_timeout=15000&_journal_mode=WAL&_foreign_keys=on"
/app/navidrome &
NAVIDROME_PID=$!
echo "Navidrome running (PID: $NAVIDROME_PID)"
}
# Stop Navidrome
stop_app() {
if [ "${NAVIDROME_PID}" -gt 0 ]; then
echo "Stopping Navidrome (PID ${NAVIDROME_PID})..."
kill -SIGTERM "${NAVIDROME_PID}"
wait "${NAVIDROME_PID}" 2>/dev/null || true
NAVIDROME_PID=0
fi
}
# --- Cleanup ---
cleanup() {
echo "Shutting down..."
stop_app
deregister_service
exit 0
}
trap cleanup TERM INT
# --- Main Loop ---
echo "Starting Supervisor. Waiting for leadership settle..."
wait_for_litefs || exit 1
while true; do
if check_primary; then
# === WE ARE PRIMARY ===
if [ "${NAVIDROME_PID}" -eq 0 ] || ! kill -0 "${NAVIDROME_PID}" 2>/dev/null; then
start_app
register_service
fi
pass_ttl
else
# === WE ARE REPLICA ===
if [ "${NAVIDROME_PID}" -gt 0 ]; then
echo "Lost leadership. Demoting..."
stop_app
deregister_service
fi
fi
sleep 10
done