fix: revert to robust manual leadership detection to prevent multiple Navidrome instances
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled
This commit is contained in:
103
entrypoint.sh
103
entrypoint.sh
@@ -7,12 +7,49 @@ SERVICE_ID="${SERVICE_NAME}-${NOMAD_ALLOC_ID:-$(hostname)}"
|
||||
PORT=4533
|
||||
CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}"
|
||||
NODE_IP="${ADVERTISE_IP}"
|
||||
DB_LOCK_FILE="/litefs/.primary"
|
||||
NAVIDROME_PID=0
|
||||
|
||||
# Tags for the Primary service (Traefik enabled)
|
||||
PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]'
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
# Check if this node is the LiteFS Primary
|
||||
# LiteFS 0.5 status API returns a flat object: {"isPrimary": true, ...}
|
||||
check_primary() {
|
||||
local status=$(curl -s http://localhost:20202/status || echo "{}")
|
||||
local is_primary=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // false) else false end' 2>/dev/null || echo "false")
|
||||
|
||||
if [ "$is_primary" = "true" ]; then
|
||||
return 0 # We are the primary
|
||||
fi
|
||||
return 1 # We are a replica
|
||||
}
|
||||
|
||||
# Wait for LiteFS to settle and determine its role
|
||||
wait_for_litefs() {
|
||||
echo "Waiting for LiteFS to settle..."
|
||||
local timeout=60
|
||||
local count=0
|
||||
while [ $count -lt $timeout ]; do
|
||||
local status=$(curl -s http://localhost:20202/status || echo "null")
|
||||
local is_primary_val=$(echo "$status" | jq -r 'if type == "object" then (.isPrimary // "null") else "null" end' 2>/dev/null || echo "null")
|
||||
|
||||
if [ "$is_primary_val" != "null" ]; then
|
||||
local role="replica"
|
||||
if [ "$is_primary_val" = "true" ]; then role="primary"; fi
|
||||
echo "LiteFS initialized. Role: $role"
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
count=$((count + 2))
|
||||
echo -n "."
|
||||
done
|
||||
echo "ERROR: LiteFS failed to settle after ${timeout}s"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Register Service with TTL Check
|
||||
register_service() {
|
||||
echo "Registering service ${SERVICE_ID} with Consul..."
|
||||
@@ -40,37 +77,61 @@ deregister_service() {
|
||||
curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${SERVICE_ID}"
|
||||
}
|
||||
|
||||
# Start Navidrome in Background
|
||||
start_app() {
|
||||
echo "Node is Primary. Starting Navidrome..."
|
||||
|
||||
# Ensure shared directories exist on persistent host volume
|
||||
mkdir -p /data/plugins /data/cache /data/backup
|
||||
|
||||
# Tell Navidrome to use the database directly from the LiteFS mount.
|
||||
export ND_DBPATH="/litefs/navidrome.db?cache=shared&_busy_timeout=15000&_journal_mode=WAL&_foreign_keys=on"
|
||||
|
||||
/app/navidrome &
|
||||
NAVIDROME_PID=$!
|
||||
echo "Navidrome running (PID: $NAVIDROME_PID)"
|
||||
}
|
||||
|
||||
# Stop Navidrome
|
||||
stop_app() {
|
||||
if [ "${NAVIDROME_PID}" -gt 0 ]; then
|
||||
echo "Stopping Navidrome (PID ${NAVIDROME_PID})..."
|
||||
kill -SIGTERM "${NAVIDROME_PID}"
|
||||
wait "${NAVIDROME_PID}" 2>/dev/null || true
|
||||
NAVIDROME_PID=0
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Cleanup ---
|
||||
cleanup() {
|
||||
echo "Shutting down..."
|
||||
stop_app
|
||||
deregister_service
|
||||
exit 0
|
||||
}
|
||||
|
||||
trap cleanup TERM INT
|
||||
|
||||
# --- Main Logic ---
|
||||
# --- Main Loop ---
|
||||
|
||||
echo "Starting Navidrome Primary Node..."
|
||||
echo "Starting Supervisor. Waiting for leadership settle..."
|
||||
wait_for_litefs || exit 1
|
||||
|
||||
# 1. Ensure shared directories exist on persistent host volume
|
||||
mkdir -p /data/plugins /data/cache /data/backup
|
||||
|
||||
# 2. Tell Navidrome to use the database directly from the LiteFS mount.
|
||||
export ND_DBPATH="/litefs/navidrome.db?cache=shared&_busy_timeout=15000&_journal_mode=WAL&_foreign_keys=on"
|
||||
|
||||
# 3. Register with Consul
|
||||
register_service
|
||||
|
||||
# 4. Start Navidrome and Maintain TTL
|
||||
/app/navidrome &
|
||||
NAVIDROME_PID=$!
|
||||
|
||||
echo "Navidrome running (PID: $NAVIDROME_PID)"
|
||||
|
||||
while kill -0 $NAVIDROME_PID 2>/dev/null; do
|
||||
pass_ttl
|
||||
while true; do
|
||||
if check_primary; then
|
||||
# === WE ARE PRIMARY ===
|
||||
if [ "${NAVIDROME_PID}" -eq 0 ] || ! kill -0 "${NAVIDROME_PID}" 2>/dev/null; then
|
||||
start_app
|
||||
register_service
|
||||
fi
|
||||
pass_ttl
|
||||
else
|
||||
# === WE ARE REPLICA ===
|
||||
if [ "${NAVIDROME_PID}" -gt 0 ]; then
|
||||
echo "Lost leadership. Demoting..."
|
||||
stop_app
|
||||
deregister_service
|
||||
fi
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
cleanup
|
||||
|
||||
@@ -36,4 +36,3 @@ proxy:
|
||||
# Commands to run only on the primary node.
|
||||
exec:
|
||||
- cmd: "/usr/local/bin/entrypoint.sh"
|
||||
if-candidate: true
|
||||
|
||||
Reference in New Issue
Block a user