diff --git a/scripts/cluster_status/consul_client.py b/scripts/cluster_status/consul_client.py new file mode 100644 index 0000000..21f69d6 --- /dev/null +++ b/scripts/cluster_status/consul_client.py @@ -0,0 +1,49 @@ +import requests + +def get_cluster_services(consul_url): + """ + Queries Consul health API for navidrome and replica-navidrome services. + Returns a list of dictionaries with node info. + """ + services = [] + + # Define roles to fetch + role_map = { + "navidrome": "primary", + "replica-navidrome": "replica" + } + + for service_name, role in role_map.items(): + url = f"{consul_url}/v1/health/service/{service_name}" + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + data = response.json() + + for item in data: + node_name = item["Node"]["Node"] + address = item["Node"]["Address"] + port = item["Service"]["Port"] + + # Determine overall status from checks + checks = item.get("Checks", []) + status = "passing" + for check in checks: + if check["Status"] != "passing": + status = check["Status"] + break + + services.append({ + "node": node_name, + "address": address, + "port": port, + "role": role, + "status": status, + "service_id": item["Service"]["ID"] + }) + except Exception as e: + # For now, we just don't add the service if it fails to fetch + # In a real script we might want to report the error + print(f"Error fetching {service_name}: {e}") + + return services diff --git a/scripts/cluster_status/litefs_client.py b/scripts/cluster_status/litefs_client.py new file mode 100644 index 0000000..1386948 --- /dev/null +++ b/scripts/cluster_status/litefs_client.py @@ -0,0 +1,51 @@ +import requests + +def get_node_status(node_address, port=20202): + """ + Queries the LiteFS HTTP API on a specific node for its status. + Tries /status first, then falls back to /debug/vars. + """ + # Try /status first + url = f"http://{node_address}:{port}/status" + try: + response = requests.get(url, timeout=3) + if response.status_code == 200: + data = response.json() + status = { + "is_primary": data.get("primary", False), + "uptime": data.get("uptime", 0), + "advertise_url": data.get("advertiseURL", "") + } + if "replicationLag" in data: + status["replication_lag"] = data["replicationLag"] + if "primaryURL" in data: + status["primary_url"] = data["primaryURL"] + return status + except Exception: + pass + + # Fallback to /debug/vars + url = f"http://{node_address}:{port}/debug/vars" + try: + response = requests.get(url, timeout=3) + response.raise_for_status() + data = response.json() + store = data.get("store", {}) + + status = { + "is_primary": store.get("isPrimary", False), + "uptime": "N/A", # Not available in /debug/vars + "advertise_url": f"http://{node_address}:{port}" # Best guess + } + + # Look for lag in dbs or store if it exists in other versions + if "replicationLag" in store: + status["replication_lag"] = store["replicationLag"] + + return status + except Exception as e: + return { + "error": str(e), + "is_primary": False, + "uptime": "N/A" + } \ No newline at end of file diff --git a/scripts/cluster_status/tests/test_consul_client.py b/scripts/cluster_status/tests/test_consul_client.py new file mode 100644 index 0000000..bcbe19d --- /dev/null +++ b/scripts/cluster_status/tests/test_consul_client.py @@ -0,0 +1,64 @@ +import pytest +from unittest.mock import patch, MagicMock +import consul_client + +@patch("requests.get") +def test_get_cluster_services(mock_get): + """Test fetching healthy services from Consul.""" + # Mock responses for navidrome and replica-navidrome + mock_navidrome = [ + { + "Node": {"Node": "node1", "Address": "192.168.1.101"}, + "Service": {"Service": "navidrome", "Port": 4533, "ID": "navidrome-1"}, + "Checks": [{"Status": "passing"}] + } + ] + mock_replicas = [ + { + "Node": {"Node": "node2", "Address": "192.168.1.102"}, + "Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-1"}, + "Checks": [{"Status": "passing"}] + }, + { + "Node": {"Node": "node3", "Address": "192.168.1.103"}, + "Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-2"}, + "Checks": [{"Status": "critical"}] # One failing check + } + ] + + def side_effect(url, params=None, timeout=None): + if "health/service/navidrome" in url: + m = MagicMock() + m.json.return_value = mock_navidrome + m.raise_for_status.return_value = None + return m + elif "health/service/replica-navidrome" in url: + m = MagicMock() + m.json.return_value = mock_replicas + m.raise_for_status.return_value = None + return m + return MagicMock() + + mock_get.side_effect = side_effect + + consul_url = "http://consul:8500" + services = consul_client.get_cluster_services(consul_url) + + # Should find 3 nodes total (node1 primary, node2 healthy replica, node3 critical replica) + assert len(services) == 3 + + # Check node1 (primary) + node1 = next(s for s in services if s["node"] == "node1") + assert node1["role"] == "primary" + assert node1["status"] == "passing" + assert node1["address"] == "192.168.1.101" + + # Check node2 (healthy replica) + node2 = next(s for s in services if s["node"] == "node2") + assert node2["role"] == "replica" + assert node2["status"] == "passing" + + # Check node3 (critical replica) + node3 = next(s for s in services if s["node"] == "node3") + assert node3["role"] == "replica" + assert node3["status"] == "critical" diff --git a/scripts/cluster_status/tests/test_litefs_client.py b/scripts/cluster_status/tests/test_litefs_client.py new file mode 100644 index 0000000..cc228f3 --- /dev/null +++ b/scripts/cluster_status/tests/test_litefs_client.py @@ -0,0 +1,58 @@ +import pytest +from unittest.mock import patch, MagicMock +import litefs_client + +@patch("requests.get") +def test_get_node_status_primary(mock_get): + """Test fetching LiteFS status for a primary node via /status.""" + mock_status = { + "clusterID": "cid1", + "primary": True, + "uptime": 3600, + "advertiseURL": "http://192.168.1.101:20202" + } + + m = MagicMock() + m.status_code = 200 + m.json.return_value = mock_status + mock_get.return_value = m + + status = litefs_client.get_node_status("192.168.1.101") + + assert status["is_primary"] is True + assert status["uptime"] == 3600 + assert status["advertise_url"] == "http://192.168.1.101:20202" + +@patch("requests.get") +def test_get_node_status_fallback(mock_get): + """Test fetching LiteFS status via /debug/vars fallback.""" + def side_effect(url, **kwargs): + m = MagicMock() + if "/status" in url: + m.status_code = 404 + return m + elif "/debug/vars" in url: + m.status_code = 200 + m.json.return_value = { + "store": {"isPrimary": True} + } + return m + return m + + mock_get.side_effect = side_effect + + status = litefs_client.get_node_status("192.168.1.101") + + assert status["is_primary"] is True + assert status["uptime"] == "N/A" + assert status["advertise_url"] == "http://192.168.1.101:20202" + +@patch("requests.get") +def test_get_node_status_error(mock_get): + """Test fetching LiteFS status with a connection error.""" + mock_get.side_effect = Exception("Connection failed") + + status = litefs_client.get_node_status("192.168.1.101") + + assert "error" in status + assert status["is_primary"] is False \ No newline at end of file