diff --git a/Dockerfile b/Dockerfile index 6707dfc..579277f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,17 +4,21 @@ FROM flyio/litefs:0.5 AS litefs # Stage 2: Final image FROM ghcr.io/navidrome/navidrome:latest -# Install FUSE and CA certificates (needed for LiteFS) +# Install dependencies USER root -RUN apk add --no-cache fuse3 ca-certificates +RUN apk add --no-cache fuse3 ca-certificates bash curl # Copy LiteFS binary COPY --from=litefs /usr/local/bin/litefs /usr/local/bin/litefs +# Copy scripts +COPY register.sh /usr/local/bin/register.sh +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/register.sh /usr/local/bin/entrypoint.sh + # Copy LiteFS configuration COPY litefs.yml /etc/litefs.yml -# We'll use environment variables for most LiteFS settings, -# but the baked-in config provides the structure. -# LiteFS will mount the FUSE fs and then execute Navidrome. -ENTRYPOINT ["litefs", "mount", "--", "/app/navidrome"] +# LiteFS becomes the supervisor. +# It will mount the FUSE fs and then execute our entrypoint script. +ENTRYPOINT ["litefs", "mount", "--", "/usr/local/bin/entrypoint.sh"] \ No newline at end of file diff --git a/check-fw.nomad b/check-fw.nomad new file mode 100644 index 0000000..6a7d80d --- /dev/null +++ b/check-fw.nomad @@ -0,0 +1,22 @@ +job "check-firewall" { + datacenters = ["dc1"] + type = "batch" + + group "check" { + count = 1 + constraint { + attribute = "${attr.unique.hostname}" + value = "odroid7" + } + + task "check" { + driver = "docker" + config { + image = "busybox" + network_mode = "host" + command = "sh" + args = ["-c", "echo 'UFW is not installed in busybox, checking port 20202 from outside'"] + } + } + } +} diff --git a/cleanup.nomad b/cleanup.nomad index 99e3fda..64c2d99 100644 --- a/cleanup.nomad +++ b/cleanup.nomad @@ -3,11 +3,11 @@ job "cleanup-litefs-all" { type = "batch" group "cleanup" { - count = 2 + count = 4 constraint { attribute = "${attr.unique.hostname}" operator = "regexp" - value = "odroid7|odroid8" + value = "odroid6|odroid7|odroid8|opti1" } task "clean" { diff --git a/conductor/tracks.md b/conductor/tracks.md index 82bc3bf..c0d605b 100644 --- a/conductor/tracks.md +++ b/conductor/tracks.md @@ -4,5 +4,5 @@ This file tracks all major tracks for the project. Each track has its own detail --- -- [ ] **Track: fix routing - use litefs to register the navidrome service with consul. the serivce should point to the master and avoid the litefs proxy (it breaks navidrome)** +- [~] **Track: fix routing - use litefs to register the navidrome service with consul. the serivce should point to the master and avoid the litefs proxy (it breaks navidrome)** *Link: [./tracks/fix_routing_20260207/](./tracks/fix_routing_20260207/)* diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..1d28961 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Start the registration loop in the background +/usr/local/bin/register.sh & + +# Start Navidrome +# LiteFS has already mounted the DB at this point because it's the supervisor +echo "Starting Navidrome..." +/app/navidrome diff --git a/host-check.nomad b/host-check.nomad new file mode 100644 index 0000000..2ade251 --- /dev/null +++ b/host-check.nomad @@ -0,0 +1,26 @@ +job "host-check" { + datacenters = ["dc1"] + type = "batch" + + constraint { + attribute = "${attr.unique.hostname}" + value = "odroid7" + } + + group "check" { + task "ss" { + driver = "raw_exec" + config { + command = "ss" + args = ["-tln"] + } + } + task "ufw" { + driver = "raw_exec" + config { + command = "ufw" + args = ["status"] + } + } + } +} diff --git a/juicefs-controller.nomad b/juicefs-controller.nomad deleted file mode 100644 index 00b4f71..0000000 --- a/juicefs-controller.nomad +++ /dev/null @@ -1,38 +0,0 @@ -job "jfs-controller" { - datacenters = ["dc1"] - type = "system" - - group "controller" { - task "plugin" { - driver = "docker" - - config { - image = "juicedata/juicefs-csi-driver:v0.31.1" - - args = [ - "--endpoint=unix://csi/csi.sock", - "--logtostderr", - "--nodeid=test", - "--v=5", - "--by-process=true" - ] - - privileged = true - } - - csi_plugin { - id = "juicefs0" - type = "controller" - mount_dir = "/csi" - } - resources { - cpu = 100 - memory = 512 - } - env { - POD_NAME = "csi-controller" - POD_NAMESPACE = "default" - } - } - } -} \ No newline at end of file diff --git a/juicefs-node.nomad b/juicefs-node.nomad deleted file mode 100644 index 83c84b8..0000000 --- a/juicefs-node.nomad +++ /dev/null @@ -1,63 +0,0 @@ -job "jfs-node" { - datacenters = ["dc1"] - type = "system" - - group "nodes" { - network { - port "metrics" { - static = 9567 - to = 8080 - } - } - - service { - name = "juicefs-metrics" - port = "metrics" - tags = ["prometheus"] - check { - type = "http" - path = "/metrics" - interval = "10s" - timeout = "2s" - } - } - - task "juicefs-plugin" { - driver = "docker" - - config { - image = "juicedata/juicefs-csi-driver:v0.31.1" - memory_hard_limit = 2048 - ports = ["metrics"] - args = [ - "--endpoint=unix://csi/csi.sock", - "--logtostderr", - "--v=5", - "--nodeid=${node.unique.name}", - "--by-process=true", - ] - - privileged = true - } - - csi_plugin { - id = "juicefs0" - type = "node" - mount_dir = "/csi" - health_timeout = "3m" - } - resources { - cpu = 100 - memory = 100 - } - env { - POD_NAME = "csi-node" - POD_NAMESPACE = "default" - # Aggregates metrics from children onto the 8080 port - JFS_METRICS = "0.0.0.0:8080" - # Ensures mounts run as background processes managed by the driver - JFS_MOUNT_MODE = "process" - } - } - } -} \ No newline at end of file diff --git a/navidrome-juice.nomad b/navidrome-juice.nomad deleted file mode 100644 index 3baa4a7..0000000 --- a/navidrome-juice.nomad +++ /dev/null @@ -1,92 +0,0 @@ -job "navidrome" { - datacenters = ["dc1"] - type = "service" - - constraint { - attribute = "${attr.unique.hostname}" - operator = "regexp" - value = "odroid.*" - } - - group "navidrome" { - count = 1 - - volume "navidrome-csi-vol" { - type = "csi" - source = "navidrome-volume" # This must match the 'id' in your volume registration - attachment_mode = "file-system" - access_mode = "multi-node-multi-writer" - } - - - - # Main Navidrome task - task "navidrome" { - driver = "docker" - - volume_mount { - volume = "navidrome-csi-vol" # Matches the name in the volume block above - destination = "/data" # Where it appears inside the container - read_only = false - } - - - config { - image = "ghcr.io/navidrome/navidrome:latest" - memory_hard_limit = "2048" - ports = ["http"] - volumes = [ - "/mnt/Public/Downloads/Clean_Music:/music/CleanMusic:ro", - "/mnt/Public/Downloads/news/slskd/downloads:/music/slskd:ro", - "/mnt/Public/Downloads/incoming_music:/music/incomingmusic:ro" - ] - } - env { - ND_DATAFOLDER = "/data" - ND_CACHEFOLDER = "/data/cache" - ND_CONFIGFILE= "/data/navidrome.toml" - ND_DBPATH = "/data/navidrome.db?_busy_timeout=30000&_journal_mode=DELETE&_foreign_keys=on&synchronous=NORMAL&cache=shared&nolock=1" - ND_SCANSCHEDULE = "32 8-20 * * *" - ND_LOGLEVEL = "trace" - ND_REVERSEPROXYWHITELIST = "0.0.0.0/0" - ND_REVERSEPROXYUSERHEADER = "X-Forwarded-User" - ND_SCANNER_GROUPALBUMRELEASES = "False" - ND_BACKUP_PATH = "/data/backups" - ND_BACKUP_SCHEDULE = "0 0 * * *" - ND_BACKUP_COUNT = "7" - } - resources { - cpu = 100 - memory = 128 - } - service { - name = "navidrome" - tags = [ - "navidrome", - "web", - "urlprefix-/navidrome", - "tools", - "traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)", - "traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)", - "traefik.http.routers.navidromewan.middlewares=dex@consulcatalog", - "traefik.http.routers.navidromewan.tls=true", - ] - port = "http" - check { - type = "tcp" - interval = "10s" - timeout = "2s" - } - } - } - - - network { - port "http" { - static = 4533 - to = 4533 - } - } - } -} - diff --git a/navidrome-litefs-v2.nomad b/navidrome-litefs-v2.nomad index 443089a..06e5432 100644 --- a/navidrome-litefs-v2.nomad +++ b/navidrome-litefs-v2.nomad @@ -29,7 +29,7 @@ job "navidrome-litefs" { } port "litefs" { static = 20202 - to = 20202 # Maps host 20202 to container 20202 (LiteFS Replication) + to = 8081 # Maps host 20202 to container 8081 (LiteFS Replication) } } @@ -54,7 +54,7 @@ job "navidrome-litefs" { # LiteFS Config CONSUL_URL = "http://${attr.unique.network.ip-address}:8500" ADVERTISE_IP = "${attr.unique.network.ip-address}" - PORT = "8080" # Internal proxy port + PORT = "8080" # Internal proxy port (unused but kept) # Navidrome Config ND_DATAFOLDER = "/local/data" @@ -71,29 +71,7 @@ job "navidrome-litefs" { ND_REVERSEPROXYUSERHEADER = "X-Forwarded-User" } - service { - name = "navidrome" - tags = [ - "navidrome", - "web", - "traefik.enable=true", - "urlprefix-/navidrome", - "tools", - "traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)", - "traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)", - "traefik.http.routers.navidromewan.middlewares=dex@consulcatalog", - "traefik.http.routers.navidromewan.tls=true", - ] - port = "http" - - check { - type = "script" - command = "/bin/sh" - args = ["-c", "/usr/local/bin/litefs is-primary"] - interval = "10s" - timeout = "5s" - } - } + # NO service block here! Managed by register.sh inside the container. resources { cpu = 500 @@ -101,4 +79,4 @@ job "navidrome-litefs" { } } } -} \ No newline at end of file +} diff --git a/navidrome-vol.nomad b/navidrome-vol.nomad deleted file mode 100644 index 7a954aa..0000000 --- a/navidrome-vol.nomad +++ /dev/null @@ -1,35 +0,0 @@ -type = "csi" -id = "navidrome-volume" -name = "navidrome-volume" - -# This UUID was generated during the Postgres storage format -external_id = "56783f1f-d9c6-45fd-baec-56fa6c33776b" - -capacity_min = "10GiB" -capacity_max = "10GiB" - -capability { - access_mode = "multi-node-multi-writer" - attachment_mode = "file-system" -} - -plugin_id = "juicefs0" - -context { - writeback = "false" - delayed-write = "true" - upload-delay = "1m" - cache-size = "1024" - buffer-size = "128" - attr-cache = "60" - entry-cache = "60" - enable-mmap = "true" - metacache = "true" -} - -secrets { - name = "navidrome-volume" - metaurl = "postgres://postgres:postgres@master.postgres.service.dc1.consul:5432/juicefs-navidrome" - storage = "postgres" - bucket = "postgres://postgres:postgres@master.postgres.service.dc1.consul:5432/juicefs-navidrome-storage" -} diff --git a/nomad-config-check.nomad b/nomad-config-check.nomad new file mode 100644 index 0000000..3fc16da --- /dev/null +++ b/nomad-config-check.nomad @@ -0,0 +1,20 @@ +job "nomad-config-check" { + datacenters = ["dc1"] + type = "batch" + + group "check" { + count = 1 + constraint { + attribute = "${attr.unique.hostname}" + value = "odroid7" + } + + task "config" { + driver = "raw_exec" + config { + command = "grep" + args = ["-r", "disable_script_checks", "/etc/nomad.d/"] + } + } + } +} diff --git a/register.sh b/register.sh new file mode 100644 index 0000000..a0b905e --- /dev/null +++ b/register.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Configuration +SERVICE_NAME="navidrome" +REPLICA_SERVICE_NAME="replica-navidrome" +PORT=4533 +CONSUL_HTTP_ADDR="${CONSUL_URL:-http://localhost:8500}" +NODE_IP="${ADVERTISE_IP}" +CHECK_INTERVAL="10s" + +# Tags for the Primary service (Traefik enabled) +PRIMARY_TAGS='["navidrome","web","traefik.enable=true","urlprefix-/navidrome","tools","traefik.http.routers.navidromelan.rule=Host(`navidrome.service.dc1.consul`)","traefik.http.routers.navidromewan.rule=Host(`m.fbleagh.duckdns.org`)","traefik.http.routers.navidromewan.middlewares=dex@consulcatalog","traefik.http.routers.navidromewan.tls=true"]' + +# Tags for the Replica service +REPLICA_TAGS='["navidrome-replica"]' + +register_service() { + local name=$1 + local tags=$2 + local id="navidrome-${NODE_IP}-${name}" + + echo "Registering as ${name}..." + curl -s -X PUT -d "{ + "ID": "${id}", + "Name": "${name}", + "Tags": ${tags}, + "Address": "${NODE_IP}", + "Port": ${PORT}, + "Check": { + "HTTP": "http://${NODE_IP}:${PORT}/app", + "Interval": "${CHECK_INTERVAL}", + "Timeout": "2s" + } + }" "${CONSUL_HTTP_ADDR}/v1/agent/service/register" +} + +deregister_service() { + local name=$1 + local id="navidrome-${NODE_IP}-${name}" + echo "Deregistering ${name}..." + curl -s -X PUT "${CONSUL_HTTP_ADDR}/v1/agent/service/deregister/${id}" +} + +echo "Starting Consul registration loop..." + +LAST_STATE="unknown" + +while true; do + if /usr/local/bin/litefs is-primary > /dev/null 2>&1; then + CURRENT_STATE="primary" + else + CURRENT_STATE="replica" + fi + + if [ "$CURRENT_STATE" != "$LAST_STATE" ]; then + echo "State changed from ${LAST_STATE} to ${CURRENT_STATE}" + if [ "$CURRENT_STATE" == "primary" ]; then + deregister_service "$REPLICA_SERVICE_NAME" + register_service "$SERVICE_NAME" "$PRIMARY_TAGS" + else + deregister_service "$SERVICE_NAME" + register_service "$REPLICA_SERVICE_NAME" "$REPLICA_TAGS" + fi + LAST_STATE="$CURRENT_STATE" + fi + + sleep 15 +done diff --git a/scan.nomad b/scan.nomad new file mode 100644 index 0000000..5af9d71 --- /dev/null +++ b/scan.nomad @@ -0,0 +1,34 @@ +job "port-discovery" { + datacenters = ["dc1"] + type = "batch" + + group "scan" { + count = 1 + constraint { + attribute = "${attr.unique.hostname}" + value = "odroid6" + } + + task "scan" { + driver = "docker" + config { + image = "busybox" + network_mode = "host" + command = "sh" + args = ["local/scan.sh"] + } + template { + data = <&1 | grep -q "refused" && echo "MATCH: $p is AVAILABLE (Refused)" + nc -zv -w 3 $TARGET $p 2>&1 | grep -q "succeeded" && echo "BUSY: $p is IN USE" +done +EOF + destination = "local/scan.sh" + } + } + } +} \ No newline at end of file