conductor(checkpoint): Checkpoint end of Phase 2
This commit is contained in:
49
scripts/cluster_status/consul_client.py
Normal file
49
scripts/cluster_status/consul_client.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
def get_cluster_services(consul_url):
|
||||||
|
"""
|
||||||
|
Queries Consul health API for navidrome and replica-navidrome services.
|
||||||
|
Returns a list of dictionaries with node info.
|
||||||
|
"""
|
||||||
|
services = []
|
||||||
|
|
||||||
|
# Define roles to fetch
|
||||||
|
role_map = {
|
||||||
|
"navidrome": "primary",
|
||||||
|
"replica-navidrome": "replica"
|
||||||
|
}
|
||||||
|
|
||||||
|
for service_name, role in role_map.items():
|
||||||
|
url = f"{consul_url}/v1/health/service/{service_name}"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=5)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
node_name = item["Node"]["Node"]
|
||||||
|
address = item["Node"]["Address"]
|
||||||
|
port = item["Service"]["Port"]
|
||||||
|
|
||||||
|
# Determine overall status from checks
|
||||||
|
checks = item.get("Checks", [])
|
||||||
|
status = "passing"
|
||||||
|
for check in checks:
|
||||||
|
if check["Status"] != "passing":
|
||||||
|
status = check["Status"]
|
||||||
|
break
|
||||||
|
|
||||||
|
services.append({
|
||||||
|
"node": node_name,
|
||||||
|
"address": address,
|
||||||
|
"port": port,
|
||||||
|
"role": role,
|
||||||
|
"status": status,
|
||||||
|
"service_id": item["Service"]["ID"]
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
# For now, we just don't add the service if it fails to fetch
|
||||||
|
# In a real script we might want to report the error
|
||||||
|
print(f"Error fetching {service_name}: {e}")
|
||||||
|
|
||||||
|
return services
|
||||||
51
scripts/cluster_status/litefs_client.py
Normal file
51
scripts/cluster_status/litefs_client.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
def get_node_status(node_address, port=20202):
|
||||||
|
"""
|
||||||
|
Queries the LiteFS HTTP API on a specific node for its status.
|
||||||
|
Tries /status first, then falls back to /debug/vars.
|
||||||
|
"""
|
||||||
|
# Try /status first
|
||||||
|
url = f"http://{node_address}:{port}/status"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=3)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
status = {
|
||||||
|
"is_primary": data.get("primary", False),
|
||||||
|
"uptime": data.get("uptime", 0),
|
||||||
|
"advertise_url": data.get("advertiseURL", "")
|
||||||
|
}
|
||||||
|
if "replicationLag" in data:
|
||||||
|
status["replication_lag"] = data["replicationLag"]
|
||||||
|
if "primaryURL" in data:
|
||||||
|
status["primary_url"] = data["primaryURL"]
|
||||||
|
return status
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback to /debug/vars
|
||||||
|
url = f"http://{node_address}:{port}/debug/vars"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=3)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
store = data.get("store", {})
|
||||||
|
|
||||||
|
status = {
|
||||||
|
"is_primary": store.get("isPrimary", False),
|
||||||
|
"uptime": "N/A", # Not available in /debug/vars
|
||||||
|
"advertise_url": f"http://{node_address}:{port}" # Best guess
|
||||||
|
}
|
||||||
|
|
||||||
|
# Look for lag in dbs or store if it exists in other versions
|
||||||
|
if "replicationLag" in store:
|
||||||
|
status["replication_lag"] = store["replicationLag"]
|
||||||
|
|
||||||
|
return status
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"error": str(e),
|
||||||
|
"is_primary": False,
|
||||||
|
"uptime": "N/A"
|
||||||
|
}
|
||||||
64
scripts/cluster_status/tests/test_consul_client.py
Normal file
64
scripts/cluster_status/tests/test_consul_client.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
import consul_client
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_get_cluster_services(mock_get):
|
||||||
|
"""Test fetching healthy services from Consul."""
|
||||||
|
# Mock responses for navidrome and replica-navidrome
|
||||||
|
mock_navidrome = [
|
||||||
|
{
|
||||||
|
"Node": {"Node": "node1", "Address": "192.168.1.101"},
|
||||||
|
"Service": {"Service": "navidrome", "Port": 4533, "ID": "navidrome-1"},
|
||||||
|
"Checks": [{"Status": "passing"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
mock_replicas = [
|
||||||
|
{
|
||||||
|
"Node": {"Node": "node2", "Address": "192.168.1.102"},
|
||||||
|
"Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-1"},
|
||||||
|
"Checks": [{"Status": "passing"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Node": {"Node": "node3", "Address": "192.168.1.103"},
|
||||||
|
"Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-2"},
|
||||||
|
"Checks": [{"Status": "critical"}] # One failing check
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def side_effect(url, params=None, timeout=None):
|
||||||
|
if "health/service/navidrome" in url:
|
||||||
|
m = MagicMock()
|
||||||
|
m.json.return_value = mock_navidrome
|
||||||
|
m.raise_for_status.return_value = None
|
||||||
|
return m
|
||||||
|
elif "health/service/replica-navidrome" in url:
|
||||||
|
m = MagicMock()
|
||||||
|
m.json.return_value = mock_replicas
|
||||||
|
m.raise_for_status.return_value = None
|
||||||
|
return m
|
||||||
|
return MagicMock()
|
||||||
|
|
||||||
|
mock_get.side_effect = side_effect
|
||||||
|
|
||||||
|
consul_url = "http://consul:8500"
|
||||||
|
services = consul_client.get_cluster_services(consul_url)
|
||||||
|
|
||||||
|
# Should find 3 nodes total (node1 primary, node2 healthy replica, node3 critical replica)
|
||||||
|
assert len(services) == 3
|
||||||
|
|
||||||
|
# Check node1 (primary)
|
||||||
|
node1 = next(s for s in services if s["node"] == "node1")
|
||||||
|
assert node1["role"] == "primary"
|
||||||
|
assert node1["status"] == "passing"
|
||||||
|
assert node1["address"] == "192.168.1.101"
|
||||||
|
|
||||||
|
# Check node2 (healthy replica)
|
||||||
|
node2 = next(s for s in services if s["node"] == "node2")
|
||||||
|
assert node2["role"] == "replica"
|
||||||
|
assert node2["status"] == "passing"
|
||||||
|
|
||||||
|
# Check node3 (critical replica)
|
||||||
|
node3 = next(s for s in services if s["node"] == "node3")
|
||||||
|
assert node3["role"] == "replica"
|
||||||
|
assert node3["status"] == "critical"
|
||||||
58
scripts/cluster_status/tests/test_litefs_client.py
Normal file
58
scripts/cluster_status/tests/test_litefs_client.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
import litefs_client
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_get_node_status_primary(mock_get):
|
||||||
|
"""Test fetching LiteFS status for a primary node via /status."""
|
||||||
|
mock_status = {
|
||||||
|
"clusterID": "cid1",
|
||||||
|
"primary": True,
|
||||||
|
"uptime": 3600,
|
||||||
|
"advertiseURL": "http://192.168.1.101:20202"
|
||||||
|
}
|
||||||
|
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = 200
|
||||||
|
m.json.return_value = mock_status
|
||||||
|
mock_get.return_value = m
|
||||||
|
|
||||||
|
status = litefs_client.get_node_status("192.168.1.101")
|
||||||
|
|
||||||
|
assert status["is_primary"] is True
|
||||||
|
assert status["uptime"] == 3600
|
||||||
|
assert status["advertise_url"] == "http://192.168.1.101:20202"
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_get_node_status_fallback(mock_get):
|
||||||
|
"""Test fetching LiteFS status via /debug/vars fallback."""
|
||||||
|
def side_effect(url, **kwargs):
|
||||||
|
m = MagicMock()
|
||||||
|
if "/status" in url:
|
||||||
|
m.status_code = 404
|
||||||
|
return m
|
||||||
|
elif "/debug/vars" in url:
|
||||||
|
m.status_code = 200
|
||||||
|
m.json.return_value = {
|
||||||
|
"store": {"isPrimary": True}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
return m
|
||||||
|
|
||||||
|
mock_get.side_effect = side_effect
|
||||||
|
|
||||||
|
status = litefs_client.get_node_status("192.168.1.101")
|
||||||
|
|
||||||
|
assert status["is_primary"] is True
|
||||||
|
assert status["uptime"] == "N/A"
|
||||||
|
assert status["advertise_url"] == "http://192.168.1.101:20202"
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_get_node_status_error(mock_get):
|
||||||
|
"""Test fetching LiteFS status with a connection error."""
|
||||||
|
mock_get.side_effect = Exception("Connection failed")
|
||||||
|
|
||||||
|
status = litefs_client.get_node_status("192.168.1.101")
|
||||||
|
|
||||||
|
assert "error" in status
|
||||||
|
assert status["is_primary"] is False
|
||||||
Reference in New Issue
Block a user