conductor(checkpoint): Checkpoint end of Phase 2 - Aggregator Refactor

This commit is contained in:
2026-02-09 06:13:09 -08:00
parent 079498caba
commit 655a9b2571
4 changed files with 128 additions and 169 deletions

View File

@@ -1,29 +1,32 @@
import pytest
from unittest.mock import patch
from unittest.mock import patch, MagicMock
import cluster_aggregator
@patch("consul_client.get_cluster_services")
@patch("litefs_client.get_node_status")
@patch("nomad_client.get_allocation_id")
@patch("nomad_client.get_allocation_logs")
@patch("nomad_client.get_job_allocations")
@patch("nomad_client.get_node_map")
def test_aggregate_cluster_status(mock_node_map, mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
"""Test aggregating Consul and LiteFS data."""
def test_aggregate_cluster_status(mock_node_map, mock_nomad_allocs, mock_litefs, mock_consul):
"""Test aggregating Nomad, Consul and LiteFS data."""
mock_node_map.return_value = {"id": "name"}
# Mock Consul data
# Mock Nomad allocations
mock_nomad_allocs.return_value = [
{"id": "alloc1", "node": "node1", "ip": "1.1.1.1"},
{"id": "alloc2", "node": "node2", "ip": "1.1.1.2"}
]
# Mock Consul data (only node1 is registered as primary)
mock_consul.return_value = [
{"node": "node1", "address": "1.1.1.1", "role": "primary", "status": "passing"},
{"node": "node2", "address": "1.1.1.2", "role": "replica", "status": "passing"}
{"node": "node1", "address": "1.1.1.1", "role": "primary", "status": "passing", "check_output": "OK"}
]
# Mock LiteFS data
def litefs_side_effect(addr, **kwargs):
if addr == "1.1.1.1":
return {"is_primary": True, "uptime": 100, "advertise_url": "url1", "dbs": {"db1": {}}}
return {"is_primary": False, "uptime": 50, "advertise_url": "url2", "replication_lag": 10, "dbs": {"db1": {}}}
return {"is_primary": True, "uptime": 100, "dbs": {"db1": {}}}
return {"is_primary": False, "uptime": 50, "dbs": {"db1": {}}}
mock_litefs.side_effect = litefs_side_effect
mock_nomad_id.return_value = None
cluster_data = cluster_aggregator.get_cluster_status("http://consul:8500")
@@ -32,27 +35,30 @@ def test_aggregate_cluster_status(mock_node_map, mock_nomad_logs, mock_nomad_id,
node1 = next(n for n in cluster_data["nodes"] if n["node"] == "node1")
assert node1["litefs_primary"] is True
assert node1["role"] == "primary"
assert node1["status"] == "passing"
node2 = next(n for n in cluster_data["nodes"] if n["node"] == "node2")
assert node2["litefs_primary"] is False
assert node2["replication_lag"] == 10
assert node2["status"] == "standby" # Not in Consul but replica
@patch("consul_client.get_cluster_services")
@patch("litefs_client.get_node_status")
@patch("nomad_client.get_allocation_id")
@patch("nomad_client.get_job_allocations")
@patch("nomad_client.get_allocation_logs")
@patch("nomad_client.get_node_map")
def test_aggregate_cluster_status_unhealthy(mock_node_map, mock_nomad_logs, mock_nomad_id, mock_litefs, mock_consul):
"""Test health calculation when nodes are critical."""
mock_node_map.return_value = {}
mock_consul.return_value = [
{"node": "node1", "address": "1.1.1.1", "role": "primary", "status": "critical"}
def test_aggregate_cluster_status_unhealthy(mock_node_map, mock_nomad_logs, mock_nomad_allocs, mock_litefs, mock_consul):
"""Test health calculation when primary is unregistered or failing."""
mock_node_map.return_value = {"id": "name"}
mock_nomad_allocs.return_value = [
{"id": "alloc1", "node": "node1", "ip": "1.1.1.1"}
]
# Primary in LiteFS but missing in Consul
mock_litefs.return_value = {"is_primary": True, "uptime": 100, "dbs": {"db1": {}}}
mock_nomad_id.return_value = "alloc1"
mock_consul.return_value = []
mock_nomad_logs.return_code = 0
mock_nomad_logs.return_value = "error logs"
cluster_data = cluster_aggregator.get_cluster_status("http://consul:8500")
assert cluster_data["health"] == "Unhealthy"
assert cluster_data["nodes"][0]["nomad_logs"] == "error logs"
assert cluster_data["nodes"][0]["status"] == "unregistered"
assert cluster_data["nodes"][0]["nomad_logs"] == "error logs"