conductor(checkpoint): Checkpoint end of Phase 1

This commit is contained in:
2026-02-08 11:15:55 -08:00
parent 22ec8a5cc0
commit 860000bd04
8 changed files with 204 additions and 50 deletions

View File

@@ -6,8 +6,10 @@ import cluster_aggregator
@patch("litefs_client.get_node_status")
@patch("nomad_client.get_allocation_id")
@patch("nomad_client.get_allocation_logs")
def test_aggregate_cluster_status(mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
@patch("nomad_client.get_node_map")
def test_aggregate_cluster_status(mock_node_map, mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
"""Test aggregating Consul and LiteFS data."""
mock_node_map.return_value = {"id": "name"}
# Mock Consul data
mock_consul.return_value = [
{"node": "node1", "address": "1.1.1.1", "role": "primary", "status": "passing"},
@@ -15,10 +17,10 @@ def test_aggregate_cluster_status(mock_nomad_logs, mock_nomad_id, mock_litefs, m
]
# Mock LiteFS data
def litefs_side_effect(addr):
def litefs_side_effect(addr, **kwargs):
if addr == "1.1.1.1":
return {"is_primary": True, "uptime": 100, "advertise_url": "url1"}
return {"is_primary": False, "uptime": 50, "advertise_url": "url2", "replication_lag": 10}
return {"is_primary": True, "uptime": 100, "advertise_url": "url1", "dbs": {"db1": {}}}
return {"is_primary": False, "uptime": 50, "advertise_url": "url2", "replication_lag": 10, "dbs": {"db1": {}}}
mock_litefs.side_effect = litefs_side_effect
mock_nomad_id.return_value = None
@@ -40,15 +42,17 @@ def test_aggregate_cluster_status(mock_nomad_logs, mock_nomad_id, mock_litefs, m
@patch("litefs_client.get_node_status")
@patch("nomad_client.get_allocation_id")
@patch("nomad_client.get_allocation_logs")
def test_aggregate_cluster_status_unhealthy(mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
@patch("nomad_client.get_node_map")
def test_aggregate_cluster_status_unhealthy(mock_node_map, mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
"""Test health calculation when nodes are critical."""
mock_node_map.return_value = {}
mock_consul.return_value = [
{"node": "node1", "address": "1.1.1.1", "role": "primary", "status": "critical"}
]
mock_litefs.return_value = {"is_primary": True, "uptime": 100}
mock_litefs.return_value = {"is_primary": True, "uptime": 100, "dbs": {"db1": {}}}
mock_nomad_id.return_value = "alloc1"
mock_nomad_logs.return_value = "error logs"
cluster_data = cluster_aggregator.get_cluster_status("http://consul:8500")
assert cluster_data["health"] == "Unhealthy"
assert cluster_data["nodes"][0]["nomad_logs"] == "error logs"
assert cluster_data["nodes"][0]["nomad_logs"] == "error logs"

View File

@@ -6,12 +6,13 @@ def test_format_cluster_summary():
cluster_data = {
"health": "Healthy",
"primary_count": 1,
"nodes": []
"nodes": [],
"nomad_available": False
}
summary = output_formatter.format_summary(cluster_data)
assert "Healthy" in summary
assert "Primaries" in summary
assert "1" in summary
assert "WARNING: Nomad CLI unavailable" in summary
def test_format_node_table():
"""Test the table generation."""

View File

@@ -55,4 +55,30 @@ def test_get_node_status_error(mock_get):
status = litefs_client.get_node_status("192.168.1.101")
assert "error" in status
assert status["is_primary"] is False
assert status["is_primary"] is False
@patch("nomad_client.exec_command")
def test_get_node_status_nomad_exec(mock_exec):
"""Test fetching LiteFS status via nomad alloc exec."""
# Mock LiteFS status output (text format)
mock_status_output = """
Config:
Path: /etc/litefs.yml
...
Status:
Primary: true
Uptime: 1h5m10s
Replication Lag: 0s
"""
mock_exec.return_value = mock_status_output
# We need to mock requests.get to fail first
with patch("requests.get") as mock_get:
mock_get.side_effect = Exception("HTTP failed")
status = litefs_client.get_node_status("1.1.1.1", alloc_id="abc12345")
assert status["is_primary"] is True
assert status["uptime"] == "1h5m10s"
# Since it's primary, lag might not be shown or be 0
assert status["replication_lag"] == "0s"

View File

@@ -55,4 +55,37 @@ def test_restart_allocation(mock_run):
mock_run.assert_called_with(
["nomad", "alloc", "restart", "abc12345"],
capture_output=True, text=True, check=True
)
)
@patch("subprocess.run")
def test_exec_command(mock_run):
"""Test executing a command in an allocation."""
m = MagicMock()
m.stdout = "Command output"
m.return_code = 0
mock_run.return_value = m
output = nomad_client.exec_command("abc12345", ["ls", "/data"])
assert output == "Command output"
mock_run.assert_called_with(
["nomad", "alloc", "exec", "-task", "navidrome", "abc12345", "ls", "/data"],
capture_output=True, text=True, check=True
)
@patch("subprocess.run")
def test_exec_command_failure(mock_run):
"""Test executing a command handles failure gracefully."""
mock_run.side_effect = subprocess.CalledProcessError(1, "nomad", stderr="Nomad error")
output = nomad_client.exec_command("abc12345", ["ls", "/data"])
assert "Nomad Error" in output
assert "Nomad error" not in output # The exception str might not contain stderr directly depending on python version
@patch("subprocess.run")
def test_get_node_map_failure(mock_run):
"""Test get_node_map handles failure."""
mock_run.side_effect = FileNotFoundError("No such file")
# It should not raise
node_map = nomad_client.get_node_map()
assert node_map == {}