conductor(checkpoint): Checkpoint end of Phase 1
This commit is contained in:
@@ -12,8 +12,15 @@ def get_cluster_status(consul_url, job_id="navidrome-litefs"):
|
||||
is_healthy = True
|
||||
primary_count = 0
|
||||
|
||||
# Check Nomad connectivity
|
||||
node_map = nomad_client.get_node_map()
|
||||
nomad_available = bool(node_map)
|
||||
|
||||
for node in consul_nodes:
|
||||
litefs_status = litefs_client.get_node_status(node["address"])
|
||||
# Fetch allocation ID first to enable nomad exec fallback
|
||||
alloc_id = nomad_client.get_allocation_id(node["node"], job_id)
|
||||
|
||||
litefs_status = litefs_client.get_node_status(node["address"], alloc_id=alloc_id)
|
||||
|
||||
# Merge data
|
||||
node_data = {
|
||||
@@ -23,20 +30,26 @@ def get_cluster_status(consul_url, job_id="navidrome-litefs"):
|
||||
"advertise_url": litefs_status.get("advertise_url", ""),
|
||||
"replication_lag": litefs_status.get("replication_lag", "N/A"),
|
||||
"litefs_error": litefs_status.get("error", None),
|
||||
"nomad_logs": None
|
||||
"nomad_logs": None,
|
||||
"alloc_id": alloc_id
|
||||
}
|
||||
|
||||
if node["status"] != "passing":
|
||||
is_healthy = False
|
||||
# Fetch Nomad logs for critical nodes
|
||||
alloc_id = nomad_client.get_allocation_id(node["node"], job_id)
|
||||
if alloc_id:
|
||||
node_data["alloc_id"] = alloc_id
|
||||
node_data["nomad_logs"] = nomad_client.get_allocation_logs(alloc_id)
|
||||
|
||||
if node_data["litefs_primary"]:
|
||||
primary_count += 1
|
||||
|
||||
# Check for active databases
|
||||
node_dbs = litefs_status.get("dbs", {})
|
||||
if node_dbs:
|
||||
node_data["active_dbs"] = list(node_dbs.keys())
|
||||
else:
|
||||
node_data["active_dbs"] = []
|
||||
|
||||
aggregated_nodes.append(node_data)
|
||||
|
||||
# Final health check
|
||||
@@ -48,8 +61,14 @@ def get_cluster_status(consul_url, job_id="navidrome-litefs"):
|
||||
elif primary_count > 1:
|
||||
health = "Split Brain Detected (Multiple Primaries)"
|
||||
|
||||
# Global warning if no DBs found on any node
|
||||
all_dbs = [db for n in aggregated_nodes for db in n.get("active_dbs", [])]
|
||||
if not all_dbs:
|
||||
health = f"{health} (WARNING: No LiteFS Databases Found)"
|
||||
|
||||
return {
|
||||
"health": health,
|
||||
"nodes": aggregated_nodes,
|
||||
"primary_count": primary_count
|
||||
"primary_count": primary_count,
|
||||
"nomad_available": nomad_available
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user