From 7c0c146d0c1af02cfa20aea0d40d52bd48f2754f Mon Sep 17 00:00:00 2001 From: sstent Date: Sun, 8 Feb 2026 07:44:22 -0800 Subject: [PATCH] feat(diagnose): Update Consul client to fetch health check output and display diagnostics --- scripts/cluster_status/cli.py | 5 +++ scripts/cluster_status/consul_client.py | 11 ++++- scripts/cluster_status/output_formatter.py | 25 +++++++++++ .../tests/test_consul_client.py | 44 +++++++++++++++++++ .../cluster_status/tests/test_formatter.py | 29 ++++++++++++ 5 files changed, 112 insertions(+), 2 deletions(-) diff --git a/scripts/cluster_status/cli.py b/scripts/cluster_status/cli.py index dcfd09f..55fe40d 100755 --- a/scripts/cluster_status/cli.py +++ b/scripts/cluster_status/cli.py @@ -25,6 +25,11 @@ def main(): print(output_formatter.format_summary(cluster_data, use_color=not args.no_color)) print("\n" + output_formatter.format_node_table(cluster_data["nodes"], use_color=not args.no_color)) + # Diagnostics + diagnostics = output_formatter.format_diagnostics(cluster_data["nodes"], use_color=not args.no_color) + if diagnostics: + print(diagnostics) + except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) diff --git a/scripts/cluster_status/consul_client.py b/scripts/cluster_status/consul_client.py index 21f69d6..b961e58 100644 --- a/scripts/cluster_status/consul_client.py +++ b/scripts/cluster_status/consul_client.py @@ -25,13 +25,19 @@ def get_cluster_services(consul_url): address = item["Node"]["Address"] port = item["Service"]["Port"] - # Determine overall status from checks + # Determine overall status from checks and extract output checks = item.get("Checks", []) status = "passing" + check_output = "" for check in checks: if check["Status"] != "passing": status = check["Status"] + check_output = check.get("Output", "") break + else: + # Even if passing, store the output of the first check if it's the only one + if not check_output: + check_output = check.get("Output", "") services.append({ "node": node_name, @@ -39,7 +45,8 @@ def get_cluster_services(consul_url): "port": port, "role": role, "status": status, - "service_id": item["Service"]["ID"] + "service_id": item["Service"]["ID"], + "check_output": check_output }) except Exception as e: # For now, we just don't add the service if it fails to fetch diff --git a/scripts/cluster_status/output_formatter.py b/scripts/cluster_status/output_formatter.py index 6d7e790..4c9d2b7 100644 --- a/scripts/cluster_status/output_formatter.py +++ b/scripts/cluster_status/output_formatter.py @@ -80,3 +80,28 @@ def format_node_table(nodes, use_color=True): ]) return tabulate(table_data, headers=headers, tablefmt="simple") + +def format_diagnostics(nodes, use_color=True): + """ + Formats detailed diagnostic information for nodes with errors. + """ + error_nodes = [n for n in nodes if n["status"] != "passing" or n.get("litefs_error")] + + if not error_nodes: + return "" + + output = ["", colorize("DIAGNOSTICS", BOLD, use_color), "=" * 20] + + for node in error_nodes: + output.append(f"\n{BOLD}Node:{RESET} {colorize(node['node'], RED, use_color)}") + + if node["status"] != "passing": + output.append(f" {BOLD}Consul Check Status:{RESET} {colorize(node['status'], RED, use_color)}") + if node.get("check_output"): + output.append(f" {BOLD}Consul Check Output:{RESET}\n {node['check_output'].strip()}") + + if node.get("litefs_error"): + output.append(f" {BOLD}LiteFS API Error:{RESET} {colorize(node['litefs_error'], RED, use_color)}") + + return "\n".join(output) + diff --git a/scripts/cluster_status/tests/test_consul_client.py b/scripts/cluster_status/tests/test_consul_client.py index bcbe19d..cf3cfa1 100644 --- a/scripts/cluster_status/tests/test_consul_client.py +++ b/scripts/cluster_status/tests/test_consul_client.py @@ -62,3 +62,47 @@ def test_get_cluster_services(mock_get): node3 = next(s for s in services if s["node"] == "node3") assert node3["role"] == "replica" assert node3["status"] == "critical" + +@patch("requests.get") +def test_get_cluster_services_with_errors(mock_get): + """Test fetching services with detailed health check output.""" + mock_navidrome = [ + { + "Node": {"Node": "node1", "Address": "192.168.1.101"}, + "Service": {"Service": "navidrome", "Port": 4533, "ID": "navidrome-1"}, + "Checks": [ + {"Status": "passing", "Output": "HTTP GET http://192.168.1.101:4533/app: 200 OK"} + ] + } + ] + mock_replicas = [ + { + "Node": {"Node": "node3", "Address": "192.168.1.103"}, + "Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-2"}, + "Checks": [ + {"Status": "critical", "Output": "HTTP GET http://192.168.1.103:4533/app: 500 Internal Server Error"} + ] + } + ] + + def side_effect(url, params=None, timeout=None): + if "health/service/navidrome" in url: + m = MagicMock() + m.json.return_value = mock_navidrome + m.raise_for_status.return_value = None + return m + elif "health/service/replica-navidrome" in url: + m = MagicMock() + m.json.return_value = mock_replicas + m.raise_for_status.return_value = None + return m + return MagicMock() + + mock_get.side_effect = side_effect + + services = consul_client.get_cluster_services("http://consul:8500") + + node3 = next(s for s in services if s["node"] == "node3") + assert node3["status"] == "critical" + assert "500 Internal Server Error" in node3["check_output"] + diff --git a/scripts/cluster_status/tests/test_formatter.py b/scripts/cluster_status/tests/test_formatter.py index 5a75442..89bb633 100644 --- a/scripts/cluster_status/tests/test_formatter.py +++ b/scripts/cluster_status/tests/test_formatter.py @@ -29,3 +29,32 @@ def test_format_node_table(): assert "node1" in table assert "primary" in table assert "passing" in table + +def test_format_diagnostics(): + """Test the diagnostics section generation.""" + nodes = [ + { + "node": "node3", + "status": "critical", + "check_output": "500 Internal Error", + "litefs_error": "Connection Timeout" + } + ] + diagnostics = output_formatter.format_diagnostics(nodes, use_color=False) + assert "DIAGNOSTICS" in diagnostics + assert "node3" in diagnostics + assert "500 Internal Error" in diagnostics + assert "Connection Timeout" in diagnostics + +def test_format_diagnostics_empty(): + """Test that diagnostics section is empty when no errors exist.""" + nodes = [ + { + "node": "node1", + "status": "passing", + "litefs_error": None + } + ] + diagnostics = output_formatter.format_diagnostics(nodes, use_color=False) + assert diagnostics == "" +