From e71d5e2ffc8e7b2704ed1e9504a57ebfdb565b51 Mon Sep 17 00:00:00 2001 From: sstent Date: Sun, 8 Feb 2026 05:53:27 -0800 Subject: [PATCH] conductor(checkpoint): Checkpoint end of Phase 1 --- .gitignore | 3 ++ conductor/tracks.md | 6 ++- .../cluster_status_python_20260208/index.md | 5 +++ .../metadata.json | 8 ++++ .../cluster_status_python_20260208/plan.md | 31 ++++++++++++++ .../cluster_status_python_20260208/spec.md | 40 +++++++++++++++++++ scripts/cluster_status/Makefile | 11 +++++ scripts/cluster_status/__init__.py | 0 scripts/cluster_status/config.py | 15 +++++++ scripts/cluster_status/requirements.txt | 4 ++ scripts/cluster_status/tests/__init__.py | 0 scripts/cluster_status/tests/test_config.py | 27 +++++++++++++ 12 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 conductor/tracks/cluster_status_python_20260208/index.md create mode 100644 conductor/tracks/cluster_status_python_20260208/metadata.json create mode 100644 conductor/tracks/cluster_status_python_20260208/plan.md create mode 100644 conductor/tracks/cluster_status_python_20260208/spec.md create mode 100644 scripts/cluster_status/Makefile create mode 100644 scripts/cluster_status/__init__.py create mode 100644 scripts/cluster_status/config.py create mode 100644 scripts/cluster_status/requirements.txt create mode 100644 scripts/cluster_status/tests/__init__.py create mode 100644 scripts/cluster_status/tests/test_config.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77ac754 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv/ +__pycache__/ +*.pyc diff --git a/conductor/tracks.md b/conductor/tracks.md index b0b15a0..1595b2f 100644 --- a/conductor/tracks.md +++ b/conductor/tracks.md @@ -1,3 +1,7 @@ # Project Tracks -This file tracks all major tracks for the project. Each track has its own detailed plan in its respective folder. \ No newline at end of file +This file tracks all major tracks for the project. Each track has its own detailed plan in its respective folder. +--- + +- [~] **Track: create a script that runs on my local system (i don't run consul locally) that: - check consul services are registered correctly - diplays the expected state (who is primary, what replicas exist) - show basic litefs status info for each node** +*Link: [./tracks/cluster_status_python_20260208/](./tracks/cluster_status_python_20260208/)* diff --git a/conductor/tracks/cluster_status_python_20260208/index.md b/conductor/tracks/cluster_status_python_20260208/index.md new file mode 100644 index 0000000..d5cc09d --- /dev/null +++ b/conductor/tracks/cluster_status_python_20260208/index.md @@ -0,0 +1,5 @@ +# Track cluster_status_python_20260208 Context + +- [Specification](./spec.md) +- [Implementation Plan](./plan.md) +- [Metadata](./metadata.json) diff --git a/conductor/tracks/cluster_status_python_20260208/metadata.json b/conductor/tracks/cluster_status_python_20260208/metadata.json new file mode 100644 index 0000000..ba6a86f --- /dev/null +++ b/conductor/tracks/cluster_status_python_20260208/metadata.json @@ -0,0 +1,8 @@ +{ + "track_id": "cluster_status_python_20260208", + "type": "feature", + "status": "new", + "created_at": "2026-02-08T15:00:00Z", + "updated_at": "2026-02-08T15:00:00Z", + "description": "create a script that runs on my local system (i don't run consul locally) that: - check consul services are registered correctly - diplays the expected state (who is primary, what replicas exist) - show basic litefs status info for each node" +} diff --git a/conductor/tracks/cluster_status_python_20260208/plan.md b/conductor/tracks/cluster_status_python_20260208/plan.md new file mode 100644 index 0000000..e79654b --- /dev/null +++ b/conductor/tracks/cluster_status_python_20260208/plan.md @@ -0,0 +1,31 @@ +# Plan: Cluster Status Script (`cluster_status_python`) + +## Phase 1: Environment and Project Structure [ ] +- [x] Task: Initialize Python project structure (venv, requirements.txt) +- [x] Task: Create initial configuration for Consul connectivity (default URLs and env var support) +- [~] Task: Conductor - User Manual Verification 'Phase 1: Environment and Project Structure' (Protocol in workflow.md) + +## Phase 2: Core Data Fetching [ ] +- [ ] Task: Implement Consul API client to fetch `navidrome` and `replica-navidrome` services + - [ ] Write tests for fetching services from Consul (mocking API) + - [ ] Implement service discovery logic +- [ ] Task: Implement LiteFS HTTP API client to fetch node status + - [ ] Write tests for fetching LiteFS status (mocking API) + - [ ] Implement logic to query `:20202/status` for each discovered node +- [ ] Task: Conductor - User Manual Verification 'Phase 2: Core Data Fetching' (Protocol in workflow.md) + +## Phase 3: Data Processing and Formatting [ ] +- [ ] Task: Implement data aggregation logic + - [ ] Write tests for aggregating Consul and LiteFS data into a single cluster state object + - [ ] Implement logic to calculate overall cluster health and role assignment +- [ ] Task: Implement CLI output formatting (Table and Color) + - [ ] Write tests for table formatting and color-coding logic + - [ ] Implement `tabulate` based output with a health summary +- [ ] Task: Conductor - User Manual Verification 'Phase 3: Data Processing and Formatting' (Protocol in workflow.md) + +## Phase 4: CLI Interface and Final Polishing [ ] +- [ ] Task: Implement command-line arguments (argparse) + - [ ] Write tests for CLI argument parsing (Consul URL overrides, etc.) + - [ ] Finalize the `main` entry point +- [ ] Task: Final verification of script against requirements +- [ ] Task: Conductor - User Manual Verification 'Phase 4: CLI Interface and Final Polishing' (Protocol in workflow.md) diff --git a/conductor/tracks/cluster_status_python_20260208/spec.md b/conductor/tracks/cluster_status_python_20260208/spec.md new file mode 100644 index 0000000..33a634b --- /dev/null +++ b/conductor/tracks/cluster_status_python_20260208/spec.md @@ -0,0 +1,40 @@ +# Specification: Cluster Status Script (`cluster_status_python`) + +## Overview +Create a Python-based CLI script to be run on a local system (outside the cluster) to monitor the health and status of the Navidrome LiteFS/Consul cluster. This tool will bridge the gap for local monitoring without needing a local Consul instance. + +## Functional Requirements +- **Consul Connectivity:** + - Connect to a remote Consul instance. + - Default to a hardcoded URL with support for overrides via command-line arguments (e.g., `--consul-url`) or environment variables (`CONSUL_HTTP_ADDR`). + - Assume no Consul authentication token is required. +- **Service Discovery:** + - Query Consul for the `navidrome` (Primary) and `replica-navidrome` (Replica) services. + - Verify that services are registered correctly and health checks are passing. +- **Status Reporting:** + - Display a text-based table summarizing the state of all nodes in the cluster. + - Color-coded output for quick health assessment. + - Include a summary section at the top indicating overall cluster health. +- **Node-Level Details:** + - Role identification (Primary vs. Replica). + - Uptime of the LiteFS process. + - Advertise URL for each node. + - Replication Lag (for Replicas). + - Write-forwarding proxy target (for Replicas). + +## Non-Functional Requirements +- **Language:** Python 3.x. +- **Dependencies:** Use standard libraries or common packages like `requests` for API calls and `tabulate` for table formatting. +- **Portability:** Must run on Linux (user's OS) without requiring local Consul or Nomad binaries. + +## Acceptance Criteria +- [ ] Script successfully retrieves service list from remote Consul. +- [ ] Script correctly identifies the current Primary node based on Consul tags/service names. +- [ ] Script queries the LiteFS HTTP API (`:20202/status`) on each node to gather internal metrics. +- [ ] Output is formatted as a clear, readable text table. +- [ ] Overrides for Consul URL are functional. + +## Out of Scope +- Direct interaction with Nomad API (Consul is the source of truth for this script). +- Database-level inspection (SQL queries). +- Remote log tailing. diff --git a/scripts/cluster_status/Makefile b/scripts/cluster_status/Makefile new file mode 100644 index 0000000..df30d3f --- /dev/null +++ b/scripts/cluster_status/Makefile @@ -0,0 +1,11 @@ +.PHONY: setup test run + +setup: + python3 -m venv .venv + . .venv/bin/activate && pip install -r requirements.txt + +test: + . .venv/bin/activate && pytest -v --cov=. + +run: + . .venv/bin/activate && python3 cluster_status.py diff --git a/scripts/cluster_status/__init__.py b/scripts/cluster_status/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/cluster_status/config.py b/scripts/cluster_status/config.py new file mode 100644 index 0000000..0cd2fb8 --- /dev/null +++ b/scripts/cluster_status/config.py @@ -0,0 +1,15 @@ +import os + +DEFAULT_CONSUL_URL = "http://consul.service.dc1.consul:8500" + +def get_consul_url(url_arg=None): + """ + Resolves the Consul URL in the following order: + 1. CLI Argument (url_arg) + 2. Environment Variable (CONSUL_HTTP_ADDR) + 3. Default (http://localhost:8500) + """ + if url_arg: + return url_arg + + return os.environ.get("CONSUL_HTTP_ADDR", DEFAULT_CONSUL_URL) diff --git a/scripts/cluster_status/requirements.txt b/scripts/cluster_status/requirements.txt new file mode 100644 index 0000000..74a702c --- /dev/null +++ b/scripts/cluster_status/requirements.txt @@ -0,0 +1,4 @@ +requests +tabulate +pytest +pytest-cov diff --git a/scripts/cluster_status/tests/__init__.py b/scripts/cluster_status/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/cluster_status/tests/test_config.py b/scripts/cluster_status/tests/test_config.py new file mode 100644 index 0000000..bea269f --- /dev/null +++ b/scripts/cluster_status/tests/test_config.py @@ -0,0 +1,27 @@ +import os +import pytest +import config + +def test_default_consul_url(): + """Test that the default Consul URL is returned when no env var is set.""" + # Ensure env var is not set + if "CONSUL_HTTP_ADDR" in os.environ: + del os.environ["CONSUL_HTTP_ADDR"] + + assert config.get_consul_url() == "http://consul.service.dc1.consul:8500" + +def test_env_var_consul_url(): + """Test that the environment variable overrides the default.""" + os.environ["CONSUL_HTTP_ADDR"] = "http://10.0.0.1:8500" + try: + assert config.get_consul_url() == "http://10.0.0.1:8500" + finally: + del os.environ["CONSUL_HTTP_ADDR"] + +def test_cli_arg_consul_url(): + """Test that the CLI argument overrides everything.""" + os.environ["CONSUL_HTTP_ADDR"] = "http://10.0.0.1:8500" + try: + assert config.get_consul_url("http://cli-override:8500") == "http://cli-override:8500" + finally: + del os.environ["CONSUL_HTTP_ADDR"] \ No newline at end of file