feat: Add automated LiteFS backups and GitHub deployment workflow

2026-04-08 10:38:23 -07:00
parent f0b02904a8
commit 4538ad5909
13 changed files with 125 additions and 400 deletions
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,50 @@
+name: Deploy to Nomad
+
+on:
+  workflow_run:
+    workflows: ["Build and Push Docker Image"]
+    types:
+      - completed
+  workflow_dispatch:
+    inputs:
+      container_sha:
+        description: 'Container SHA to deploy (leave empty for latest commit)'
+        required: false
+        type: string
+
+jobs:
+  nomad:
+    runs-on: ubuntu-latest
+    name: Deploy to Nomad
+    if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        
+      - name: Setup Nomad CLI
+        uses: hashicorp/setup-nomad@v2
+        with:
+          version: '1.10.5'
+
+      - name: Set Container Version
+        id: container_version
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.container_sha }}" ]; then
+            echo "sha=${{ inputs.container_sha }}" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event_name }}" = "workflow_run" ]; then
+            echo "sha=${{ github.event.workflow_run.head_sha }}" >> $GITHUB_OUTPUT
+          else
+            echo "sha=${{ github.sha }}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Deploy Nomad Job
+        id: deploy
+        env:
+          NOMAD_ADDR: http://192.168.4.36:4646
+          NOMAD_TOKEN: ${{ secrets.NOMAD_TOKEN }}
+        run: |
+          echo "Deploying container version: ${{ steps.container_version.outputs.sha }}"
+          nomad job run \
+            -var="container_sha=${{ steps.container_version.outputs.sha }}" \
+            navidrome-litefs.nomad
--- a/conductor/tracks/diagnose_and_enhance_20260208/plan.md
+++ b/conductor/tracks/diagnose_and_enhance_20260208/plan.md
@@ -1,30 +0,0 @@
-# Plan: Cluster Diagnosis and Script Enhancement (`diagnose_and_enhance`)
-
-## Phase 1: Enhanced Diagnostics (Consul) [x] [checkpoint: a686c5b]
- [x] Task: Update `consul_client.py` to fetch detailed health check output
-    - [x] Write tests for fetching `Output` field from Consul checks
-    - [x] Implement logic to extract and store the `Output` (error message)
- [x] Task: Update aggregator and formatter to display Consul errors
-    - [x] Update aggregation logic to include `consul_error`
-    - [x] Update table formatter to indicate an error (maybe a flag or color)
-    - [x] Add a "Diagnostics" section to the output to print full error details
- [x] Task: Conductor - User Manual Verification 'Phase 1: Enhanced Diagnostics (Consul)' (Protocol in workflow.md)
-
-## Phase 2: Nomad Integration and Logs [x] [checkpoint: 6d77729]
- [x] Task: Implement `nomad_client.py` wrapper
-    - [x] Write tests for `get_allocation_logs`, `get_node_status`, and `restart_allocation` (mocking subprocess)
-    - [x] Implement `subprocess.run(["nomad", ...])` logic to fetch logs and restart allocations
- [x] Task: Integrate Nomad logs into diagnosis
-    - [x] Update aggregator to call Nomad client for critical nodes
-    - [x] Update "Diagnostics" section to display the last 20 lines of stderr
- [x] Task: Conductor - User Manual Verification 'Phase 2: Nomad Integration and Logs' (Protocol in workflow.md)
-
-## Phase 3: Advanced LiteFS Status [ ]
- [ ] Task: Implement `litefs_status` via `nomad alloc exec`
-    - [ ] Write tests for executing remote commands via Nomad
-    - [ ] Update `litefs_client.py` to fallback to `nomad alloc exec` if HTTP fails
-    - [ ] Parse `litefs status` output (text/json) to extract uptime and replication lag
- [ ] Task: Final Polish and Diagnosis Run
-    - [ ] Ensure all pieces work together
-    - [ ] Run the script to diagnose `odroid8`
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Advanced LiteFS Status' (Protocol in workflow.md)
--- a/conductor/tracks/fix_litefs_config_20260208/plan.md
+++ b/conductor/tracks/fix_litefs_config_20260208/plan.md
@@ -1,22 +0,0 @@
-# Plan: Fix LiteFS Configuration and Process Management (`fix_litefs_config`)
-
-## Phase 1: Configuration and Image Structure [ ]
- [x] Task: Update `litefs.yml` to include the `exec` block (396dfeb)
- [x] Task: Update `Dockerfile` to use LiteFS as the supervisor (`ENTRYPOINT ["litefs", "mount"]`) (ef91b8e)
- [x] Task: Update `navidrome-litefs-v2.nomad` with corrected storage paths (`ND_DATAFOLDER`, `ND_CACHEFOLDER`, `ND_BACKUP_PATH`) (5cbb657)
- [ ] Task: Conductor - User Manual Verification 'Phase 1: Configuration and Image Structure' (Protocol in workflow.md)
-
-## Phase 2: Entrypoint and Registration Logic [x] [checkpoint: 9cd5455]
- [x] Task: Refactor `entrypoint.sh` to handle leadership-aware process management (9cd5455)
-    - [x] Integrate Consul registration logic (from `register.sh`)
-    - [x] Implement loop to start/stop Navidrome based on `/data/.primary` existence
-    - [x] Ensure proper signal handling for Navidrome shutdown
- [x] Task: Clean up redundant scripts (e.g., `register.sh` if fully integrated) (9cd5455)
- [ ] Task: Conductor - User Manual Verification 'Phase 2: Entrypoint and Registration Logic' (Protocol in workflow.md)
-
-## Phase 3: Deployment and Failover Verification [ ]
- [ ] Task: Build and push the updated Docker image via Gitea Actions (if possible) or manual trigger
- [ ] Task: Deploy the updated Nomad job
- [ ] Task: Verify cluster health and process distribution using `cluster_status` script
- [ ] Task: Perform a manual failover (stop primary allocation) and verify Navidrome migrates correctly
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Deployment and Failover Verification' (Protocol in workflow.md)
--- a/conductor/tracks/fix_navidrome_paths_20260209/index.md
+++ b/conductor/tracks/fix_navidrome_paths_20260209/index.md
@@ -1,5 +0,0 @@
-# Track fix_navidrome_paths_20260209 Context
-
- [Specification](./spec.md)
- [Implementation Plan](./plan.md)
- [Metadata](./metadata.json)
--- a/conductor/tracks/fix_navidrome_paths_20260209/metadata.json
+++ b/conductor/tracks/fix_navidrome_paths_20260209/metadata.json
@@ -1,8 +0,0 @@
-{
-  "track_id": "fix_navidrome_paths_20260209",
-  "type": "bug",
-  "status": "new",
-  "created_at": "2026-02-09T14:30:00Z",
-  "updated_at": "2026-02-09T14:30:00Z",
-  "description": "Fix Navidrome database location to ensure it uses LiteFS mount and resolve process path conflicts."
-}
--- a/conductor/tracks/fix_navidrome_paths_20260209/plan.md
+++ b/conductor/tracks/fix_navidrome_paths_20260209/plan.md
@@ -1,17 +0,0 @@
-# Plan: Correct Navidrome Database and Plugins Location (`fix_navidrome_paths`)
-
-## Phase 1: Configuration Updates [x]
- [x] Task: Update `navidrome-litefs-v2.nomad` with corrected paths (76398de)
- [x] Task: Update `entrypoint.sh` to handle plugins folder and environment cleanup (decb9f5)
- [x] Task: Conductor - User Manual Verification 'Phase 1: Configuration Updates' (Protocol in workflow.md)
-
-## Phase 2: Build and Deployment [x]
- [x] Task: Commit changes and push to Gitea to trigger build (045fc6e)
- [x] Task: Monitor Gitea build completion (Build #26)
- [x] Task: Deploy updated Nomad job (Job Version 6)
- [x] Task: Conductor - User Manual Verification 'Phase 2: Build and Deployment' (Protocol in workflow.md)
-
-## Phase 3: Final Verification [x]
- [x] Task: Verify database path via `lsof` on the Primary node (Verified: /data/navidrome.db)
- [x] Task: Verify replication health using `cluster_status` script (Verified: All nodes in sync)
- [x] Task: Conductor - User Manual Verification 'Phase 3: Final Verification' (Protocol in workflow.md)
--- a/conductor/tracks/fix_navidrome_paths_20260209/spec.md
+++ b/conductor/tracks/fix_navidrome_paths_20260209/spec.md
@@ -1,25 +0,0 @@
-# Specification: Correct Navidrome Database and Plugins Location (`fix_navidrome_paths`)
-
-## Overview
-Force Navidrome to use the `/data` LiteFS mount for its SQLite database by setting the `DATAFOLDER` to `/data`. To avoid the "Operation not permitted" error caused by LiteFS's restriction on directory creation, redirect the Navidrome plugins folder to persistent shared storage.
-
-## Functional Requirements
- **Nomad Job Configuration (`navidrome-litefs-v2.nomad`):**
-    - Set `ND_DATAFOLDER="/data"`. This will force Navidrome to create and use `navidrome.db` on the LiteFS mount.
-    - Set `ND_PLUGINSFOLDER="/shared_data/plugins"`. This prevents Navidrome from attempting to create a `plugins` directory in the read-only/virtual `/data` mount.
-    - Keep `ND_CACHEFOLDER` and `ND_BACKUP_PATH` pointing to `/shared_data` subdirectories.
- **Entrypoint Logic (`entrypoint.sh`):**
-    - Ensure it creates `/shared_data/plugins` if it doesn't exist.
-    - Remove the explicit `export ND_DATABASE_PATH` if it conflicts with the new `DATAFOLDER` logic, or keep it as an explicit override.
- **Verification:**
-    - Confirm via `lsof` that Navidrome is finally using `/data/navidrome.db`.
-    - Confirm that LiteFS `/debug/vars` now reports the database in its active set.
-
-## Non-Functional Requirements
- **Persistence:** Ensure all non-database files (plugins, cache, backups) are stored on the shared host mount (`/shared_data`) to survive container restarts and migrations.
-
-## Acceptance Criteria
- [ ] Navidrome successfully starts with `/data` as its data folder.
- [ ] No "Operation not permitted" errors occur during startup.
- [ ] `lsof` confirms `/data/navidrome.db` is open by the Navidrome process.
- [ ] LiteFS `txid` increases on the Primary and replicates to Replicas when Navidrome writes to the DB.
--- a/conductor/tracks/fix_odroid8_and_script_20260208/plan.md
+++ b/conductor/tracks/fix_odroid8_and_script_20260208/plan.md
@@ -1,26 +0,0 @@
-# Plan: Fix Odroid8 and Script Robustness (`fix_odroid8_and_script`)
-
-## Phase 1: Script Robustness [x] [checkpoint: 860000b]
- [x] Task: Update `nomad_client.py` to handle subprocess errors gracefully
-    - [x] Write tests for handling Nomad CLI absence/failure
-    - [x] Update implementation to return descriptive error objects or `None` without crashing
- [x] Task: Update aggregator and formatter to handle Nomad errors
-    - [x] Update `cluster_aggregator.py` to gracefully skip Nomad calls if they fail
-    - [x] Update `output_formatter.py` to display "Nomad Error" in relevant cells
-    - [x] Add a global "Nomad Connectivity Warning" to the summary
- [x] Task: Conductor - User Manual Verification 'Phase 1: Script Robustness' (Protocol in workflow.md)
-
-## Phase 2: Odroid8 Recovery [ ]
- [x] Task: Identify and verify `odroid8` LiteFS data path
-    - [x] Run `nomad alloc status` to find the volume mount for `odroid8`
-    - [x] Provide the user with the exact host path to the LiteFS data
- [x] Task: Guide user through manual cleanup
-    - [x] Provide steps to stop the allocation
-    - [x] Provide the `rm` command to clear the LiteFS metadata
-    - [x] Provide steps to restart and verify the node
- [~] Task: Conductor - User Manual Verification 'Phase 2: Odroid8 Recovery' (Protocol in workflow.md)
-
-## Phase 3: Final Verification [x]
- [x] Task: Final verification run of the script
- [x] Task: Verify cluster health in Consul and LiteFS API
- [x] Task: Conductor - User Manual Verification 'Phase 3: Final Verification' (Protocol in workflow.md)
--- a/conductor/tracks/implement_ttl_heartbeat_20260208/plan.md
+++ b/conductor/tracks/implement_ttl_heartbeat_20260208/plan.md
@@ -1,22 +0,0 @@
-# Plan: Implement TTL Heartbeat Service Registration (`implement_ttl_heartbeat`)
-
-## Phase 1: Container Environment Preparation [x] [checkpoint: 51b8fce]
- [x] Task: Update `Dockerfile` to install `curl` and `jq` (f7fe258)
- [x] Task: Verify `litefs.yml` points to `entrypoint.sh` (should already be correct) (verified)
- [x] Task: Conductor - User Manual Verification 'Phase 1: Container Environment Preparation' (Protocol in workflow.md)
-
-## Phase 2: Script Implementation [x] [checkpoint: 139016f]
- [x] Task: Refactor `entrypoint.sh` with the TTL Heartbeat logic (d977301)
-    - [x] Implement `register_service` with TTL check definition
-    - [x] Implement `pass_ttl` loop
-    - [x] Implement robust `stop_app` and signal trapping
-    - [x] Ensure correct Primary/Replica detection logic (LiteFS 0.5: Primary = No `.primary` file)
- [x] Task: Conductor - User Manual Verification 'Phase 2: Script Implementation' (Protocol in workflow.md)
-
-## Phase 3: Deployment and Verification [ ]
- [ ] Task: Commit changes and push to Gitea to trigger build
- [ ] Task: Monitor Gitea build completion
- [ ] Task: Deploy updated Nomad job (forcing update if necessary)
- [ ] Task: Verify "Clean" state in Consul (only one primary registered)
- [ ] Task: Verify Failover/Stop behavior (immediate deregistration vs TTL expiry)
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Deployment and Verification' (Protocol in workflow.md)
--- a/conductor/tracks/update_monitor_discovery_20260208/plan.md
+++ b/conductor/tracks/update_monitor_discovery_20260208/plan.md
@@ -1,23 +0,0 @@
-# Plan: Update Monitor Discovery Logic (`update_monitor_discovery`)
-
-## Phase 1: Nomad Discovery Enhancement [x] [checkpoint: 353683e]
- [x] Task: Update `nomad_client.py` to fetch job allocations with IPs (353683e)
-    - [x] Write tests for parsing allocation IPs from `nomad job status` or `nomad alloc status`
-    - [x] Implement `get_job_allocations(job_id)` returning a list of dicts (id, node, ip)
- [x] Task: Conductor - User Manual Verification 'Phase 1: Nomad Discovery Enhancement' (Protocol in workflow.md)
-
-## Phase 2: Aggregator Refactor [x] [checkpoint: 655a9b2]
- [x] Task: Refactor `cluster_aggregator.py` to drive discovery via Nomad (655a9b2)
-    - [x] Update `get_cluster_status` to call `nomad_client.get_job_allocations` first
-    - [x] Update loop to iterate over allocations and supplement with LiteFS and Consul data
- [x] Task: Update `consul_client.py` to fetch all services once and allow lookup by IP/ID (655a9b2)
- [x] Task: Update tests for the new discovery flow (655a9b2)
- [x] Task: Conductor - User Manual Verification 'Phase 2: Aggregator Refactor' (Protocol in workflow.md)
-
-## Phase 3: UI and Health Logic [x] [checkpoint: 21e9c3d]
- [x] Task: Update `output_formatter.py` for "Standby" nodes (21e9c3d)
-    - [x] Update table formatting to handle missing Consul status for replicas
- [x] Task: Update Cluster Health calculation (21e9c3d)
-    - [x] "Healthy" = 1 Primary (Consul passing) + N Replicas (LiteFS connected)
- [x] Task: Final verification run (21e9c3d)
- [x] Task: Conductor - User Manual Verification 'Phase 3: Final Verification' (Protocol in workflow.md)
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -16,6 +16,25 @@ PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","t

 # --- Helper Functions ---

+# Backup Database (Only on Primary)
+run_backup() {
+  local backup_dir="/shared_data/backup"
+  local timestamp=$(date +%Y%m%d_%H%M%S)
+  local backup_file="${backup_dir}/navidrome.db_${timestamp}.bak"
+  
+  echo "Backing up database to ${backup_file}..."
+  mkdir -p "$backup_dir"
+  
+  if litefs export -name navidrome.db "$backup_file"; then
+    echo "Backup successful."
+    # Keep only last 7 days
+    find "$backup_dir" -name "navidrome.db_*.bak" -mtime +7 -delete
+    echo "Old backups cleaned."
+  else
+    echo "ERROR: Backup failed!"
+  fi
+}
+
 # Register Service with TTL Check
 register_service() {
  echo "Promoted! Registering service ${SERVICE_ID}..."
@@ -86,6 +105,9 @@ echo "Consul: $CONSUL_HTTP_ADDR"
 # Small sleep to let LiteFS settle and leadership election complete
 sleep 5

+LAST_BACKUP_TIME=0
+BACKUP_INTERVAL=86400 # 24 hours
+
 while true; do
  # In LiteFS 0.5, .primary file exists ONLY on replicas.
  if [ ! -f "$DB_LOCK_FILE" ]; then
@@ -103,6 +125,13 @@ while true; do
    # 2. Maintain the heartbeat (TTL)
    pass_ttl
    
+    # 3. Handle periodic backup
+    CURRENT_TIME=$(date +%s)
+    if [ $((CURRENT_TIME - LAST_BACKUP_TIME)) -ge $BACKUP_INTERVAL ]; then
+      run_backup
+      LAST_BACKUP_TIME=$CURRENT_TIME
+    fi
+    
  else
    # === WE ARE REPLICA ===
    
@@ -111,6 +140,8 @@ while true; do
      echo "Lost leadership. Demoting..."
      stop_app
      deregister_service
+      # Reset backup timer so the next primary can start fresh or we start fresh if promoted again
+      LAST_BACKUP_TIME=0
    fi
    
    # No service registration exists for replicas to keep Consul clean.
--- a/navidrome-litefs-v2.nomad
+++ b/navidrome-litefs-v2.nomad
@@ -1,82 +0,0 @@
-job "navidrome-litefs" {
-  datacenters = ["dc1"]
-  type        = "service"
-
-  constraint {
-    attribute = "${attr.kernel.name}"
-    value     = "linux"
-  }
-
-  group "navidrome" {
-    count = 4
-
-    update {
-      max_parallel     = 1
-      min_healthy_time = "30s"
-      healthy_deadline = "5m"
-      auto_revert      = false
-    }
-
-    constraint {
-      distinct_hosts = true
-    }
-
-    network {
-      # Request static ports on the host
-      port "http" {
-        static = 4533
-        to     = 4533 # Direct to Navidrome
-      }
-      port "litefs" {
-        static = 20202
-        to     = 20202 # Maps host 20202 to container 20202 (LiteFS Replication)
-      }
-    }
-
-    task "navidrome" {
-      driver = "docker"
-
-      config {
-        image      = "gitea.service.dc1.fbleagh.duckdns.org/sstent/navidrome-litefs:045fc6e82b9ecb6bebc1f095f62498935df70bbf"
-        privileged = true # Still needed for FUSE
-        ports      = ["http", "litefs"]
-        force_pull = true
-
-        volumes = [
-          "/mnt/configs/navidrome_litefs:/var/lib/litefs",
-          "/mnt/Public/configs/navidrome:/shared_data",
-          "/mnt/Public/Downloads/Clean_Music:/music/CleanMusic:ro",
-          "/mnt/Public/Downloads/news/slskd/downloads:/music/slskd:ro",
-          "/mnt/Public/Downloads/incoming_music:/music/incomingmusic:ro"
-        ]
-      }
-
-      env {
-        # LiteFS Config
-        CONSUL_URL   = "http://${attr.unique.network.ip-address}:8500"
-        ADVERTISE_IP = "${attr.unique.network.ip-address}"
-        PORT         = "8080" # Internal proxy port (unused but kept)
-
-        # Navidrome Config
-        ND_DATAFOLDER    = "/data"
-        ND_PLUGINS_FOLDER = "/shared_data/plugins"
-        ND_CACHEFOLDER   = "/shared_data/cache"
-        ND_BACKUP_PATH   = "/shared_data/backup"
-        
-        ND_SCANSCHEDULE              = "0"
-        ND_SCANNER_FSWATCHER_ENABLED = "false"
-        ND_FORCE_REDEPLOY            = "5"
-        ND_LOGLEVEL                  = "info"
-        ND_REVERSEPROXYWHITELIST     = "0.0.0.0/0"
-        ND_REVERSEPROXYUSERHEADER    = "X-Forwarded-User"
-      }
-
-      # NO service block here! Managed by register.sh inside the container.
-
-      resources {
-        cpu    = 500
-        memory = 512
-      }
-    }
-  }
-}
--- a/navidrome-litefs.nomad
+++ b/navidrome-litefs.nomad
@@ -2,178 +2,82 @@ job "navidrome-litefs" {
  datacenters = ["dc1"]
  type        = "service"

-  # We pin to Linux because LiteFS requires FUSE
+  variable "container_sha" {
+    type    = string
+    default = "045fc6e82b9ecb6bebc1f095f62498935df70bbf"
+  }
+
  constraint {
    attribute = "${attr.kernel.name}"
    value     = "linux"
  }

  group "navidrome" {
-    count = 2 
+    count = 4
+
+    update {
+      max_parallel     = 1
+      min_healthy_time = "30s"
+      healthy_deadline = "5m"
+      auto_revert      = false
+    }

    constraint {
      distinct_hosts = true
    }

    network {
-      mode = "host"
-      port "http" {}
-    }
-
-    # --- Setup Task ---
-    task "setup" {
-      driver = "docker"
-      lifecycle {
-        hook    = "prestart"
-        sidecar = false
+      # Request static ports on the host
+      port "http" {
+        static = 4533
+        to     = 4533 # Direct to Navidrome
      }
-      config {
-        image        = "busybox"
-        command      = "mkdir"
-        args         = ["-p", "/alloc/sqlite"]
-        network_mode = "host"
+      port "litefs" {
+        static = 20202
+        to     = 20202 # Maps host 20202 to container 20202 (LiteFS Replication)
      }
    }

-    # --- LiteFS Task ---
-    task "litefs" {
-      driver = "docker"
-      
-      config {
-        image        = "flyio/litefs:0.5"
-        privileged   = true # Needed for FUSE
-        ports        = ["http"]
-        network_mode = "host"
-        
-        # 1. Bind mount for LiteFS internal data (chunks/WAL)
-        # 2. Bind mount for the config
-        # 3. Mount the shared alloc dir so we can mount FUSE on it
-        volumes = [
-          "/mnt/configs/navidrome_litefs:/var/lib/litefs",
-          "local/litefs.yml:/etc/litefs.yml"
-        ]
-        
-        mounts = [
-          {
-            type   = "bind"
-            source = "../alloc/sqlite"
-            target = "/mnt/sqlite"
-            bind_options = {
-              propagation = "shared"
-            }
-          }
-        ]
-      }
-
-      # Create the config file
-      template {
-        left_delimiter  = "[["
-        right_delimiter = "]]"
-        data = <<EOF
-fuse:
-  # This matches the internal mount point in the container
-  dir: "/mnt/sqlite"
-
-data:
-  # Internal data storage
-  dir: "/var/lib/litefs"
-
-# Use Consul for leader election
-lease:
-  type: "consul"
-  consul:
-    url: "http://[[ env `attr.unique.network.ip-address` ]]:8500"
-    key: "litefs/navidrome"
-
-# The HTTP Proxy routes traffic
-proxy:
-  addr: ":[[ env `NOMAD_PORT_http` ]]"
-  target: "127.0.0.1:4533" # Navidrome's internal port
-  db: "navidrome.db"       # The DB to track for transaction consistency
-  passthrough:             # Paths that don't need write-forwarding (optional optimizations)
-    - "*.js"
-    - "*.css"
-    - "*.png"
-EOF
-        destination = "local/litefs.yml"
-      }
-
-      resources {
-        cpu    = 200
-        memory = 256
-      }
-    }
-
-    # --- Navidrome Task (The App) ---
    task "navidrome" {
      driver = "docker"

      config {
-        image             = "ghcr.io/navidrome/navidrome:latest"
-        memory_hard_limit = "2048"
-        ports             = [] # No ports exposed directly!
-        network_mode      = "host"
+        image      = "gitea.service.dc1.fbleagh.duckdns.org/sstent/navidrome-litefs:${var.container_sha}"
+        privileged = true # Still needed for FUSE
+        ports      = ["http", "litefs"]
+        force_pull = true

-        # We mount the sqlite dir from the allocation directory
-        mounts = [
-          {
-            type   = "bind"
-            source = "../alloc/sqlite"
-            target = "/data"
-            bind_options = {
-              propagation = "shared"
-            }
-          }
-        ]
-
-        # Shared Music and Configs
        volumes = [
+          "/mnt/configs/navidrome_litefs:/var/lib/litefs",
+          "/mnt/Public/configs/navidrome:/shared_data",
          "/mnt/Public/Downloads/Clean_Music:/music/CleanMusic:ro",
          "/mnt/Public/Downloads/news/slskd/downloads:/music/slskd:ro",
-          "/mnt/Public/Downloads/incoming_music:/music/incomingmusic:ro",
-          "/mnt/Public/configs/navidrome:/shared_data"
+          "/mnt/Public/Downloads/incoming_music:/music/incomingmusic:ro"
        ]
      }

      env {
-        ND_DATAFOLDER = "/local/data"
-        ND_CACHEFOLDER = "/shared_data/cache"
-        ND_CONFIGFILE= "/local/data/navidrome.toml"
+        # LiteFS Config
+        CONSUL_URL   = "http://${attr.unique.network.ip-address}:8500"
+        ADVERTISE_IP = "${attr.unique.network.ip-address}"
+        PORT         = "8080" # Internal proxy port (unused but kept)
+
+        # Navidrome Config
+        ND_DATAFOLDER    = "/data"
+        ND_PLUGINS_FOLDER = "/shared_data/plugins"
+        ND_CACHEFOLDER   = "/shared_data/cache"
+        ND_BACKUP_PATH   = "/shared_data/backup"
+        ND_BACKUPSCHEDULE = ""
        
-        # Important: LiteFS handles locking, but we still want WAL mode.
-        ND_DBPATH = "/data/navidrome.db?_busy_timeout=30000&_journal_mode=WAL&_foreign_keys=on&synchronous=NORMAL"
-        
-        # Disable internal scheduling to prevent redundant scans on secondary nodes.
        ND_SCANSCHEDULE              = "0"
        ND_SCANNER_FSWATCHER_ENABLED = "false"
-        
-        ND_LOGLEVEL               = "info"
-        ND_REVERSEPROXYWHITELIST  = "0.0.0.0/0"
-        ND_REVERSEPROXYUSERHEADER = "X-Forwarded-User"
+        ND_FORCE_REDEPLOY            = "5"
+        ND_LOGLEVEL                  = "info"
+        ND_REVERSEPROXYWHITELIST     = "0.0.0.0/0"
+        ND_REVERSEPROXYUSERHEADER    = "X-Forwarded-User"
      }

-      service {
-        name = "navidrome"
-        tags = [
-          "navidrome",
-          "web",
-          "traefik.enable=true",
-          "urlprefix-/navidrome",
-          "tools",
-          "traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)",
-          "traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)",
-          "traefik.http.routers.navidromewan.middlewares=dex@consulcatalog",
-          "traefik.http.routers.navidromewan.tls=true",
-        ]
-        port = "http" # This maps to the LiteFS proxy port defined in network block
-        
-        check {
-          type     = "http"
-          path     = "/app" # LiteFS proxy passes this through
-          interval = "10s"
-          timeout  = "2s"
-        }
-      }
+      # NO service block here! Managed by register.sh inside the container.

      resources {
        cpu    = 500
@@ -181,4 +85,4 @@ EOF
      }
    }
  }
-}
+}