feat(diagnose): Update Consul client to fetch health check output and display diagnostics

2026-02-08 07:44:22 -08:00
parent 3c4c1c4d80
commit 7c0c146d0c
5 changed files with 112 additions and 2 deletions
--- a/scripts/cluster_status/cli.py
+++ b/scripts/cluster_status/cli.py
@@ -25,6 +25,11 @@ def main():
        print(output_formatter.format_summary(cluster_data, use_color=not args.no_color))
        print("\n" + output_formatter.format_node_table(cluster_data["nodes"], use_color=not args.no_color))
        # Diagnostics
        diagnostics = output_formatter.format_diagnostics(cluster_data["nodes"], use_color=not args.no_color)
        if diagnostics:
            print(diagnostics)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)
--- a/scripts/cluster_status/consul_client.py
+++ b/scripts/cluster_status/consul_client.py
@@ -25,13 +25,19 @@ def get_cluster_services(consul_url):
                address = item["Node"]["Address"]
                port = item["Service"]["Port"]
-                # Determine overall status from checks
+                # Determine overall status from checks and extract output
                checks = item.get("Checks", [])
                status = "passing"
                check_output = ""
                for check in checks:
                    if check["Status"] != "passing":
                        status = check["Status"]
                        check_output = check.get("Output", "")
                        break
                    else:
                        # Even if passing, store the output of the first check if it's the only one
                        if not check_output:
                            check_output = check.get("Output", "")
                services.append({
                    "node": node_name,
@@ -39,7 +45,8 @@ def get_cluster_services(consul_url):
                    "port": port,
                    "role": role,
                    "status": status,
-                    "service_id": item["Service"]["ID"]
+                    "service_id": item["Service"]["ID"],
                    "check_output": check_output
                })
        except Exception as e:
            # For now, we just don't add the service if it fails to fetch
--- a/scripts/cluster_status/output_formatter.py
+++ b/scripts/cluster_status/output_formatter.py
@@ -80,3 +80,28 @@ def format_node_table(nodes, use_color=True):
        ])
    return tabulate(table_data, headers=headers, tablefmt="simple")
 def format_diagnostics(nodes, use_color=True):
    """
    Formats detailed diagnostic information for nodes with errors.
    """
    error_nodes = [n for n in nodes if n["status"] != "passing" or n.get("litefs_error")]
    if not error_nodes:
        return ""
    output = ["", colorize("DIAGNOSTICS", BOLD, use_color), "=" * 20]
    for node in error_nodes:
        output.append(f"\n{BOLD}Node:{RESET} {colorize(node['node'], RED, use_color)}")
        if node["status"] != "passing":
            output.append(f"  {BOLD}Consul Check Status:{RESET} {colorize(node['status'], RED, use_color)}")
            if node.get("check_output"):
                output.append(f"  {BOLD}Consul Check Output:{RESET}\n    {node['check_output'].strip()}")
        if node.get("litefs_error"):
            output.append(f"  {BOLD}LiteFS API Error:{RESET} {colorize(node['litefs_error'], RED, use_color)}")
    return "\n".join(output)
--- a/scripts/cluster_status/tests/test_consul_client.py
+++ b/scripts/cluster_status/tests/test_consul_client.py
@@ -62,3 +62,47 @@ def test_get_cluster_services(mock_get):
    node3 = next(s for s in services if s["node"] == "node3")
    assert node3["role"] == "replica"
    assert node3["status"] == "critical"
@patch("requests.get")
 def test_get_cluster_services_with_errors(mock_get):
    """Test fetching services with detailed health check output."""
    mock_navidrome = [
        {
            "Node": {"Node": "node1", "Address": "192.168.1.101"},
            "Service": {"Service": "navidrome", "Port": 4533, "ID": "navidrome-1"},
            "Checks": [
                {"Status": "passing", "Output": "HTTP GET http://192.168.1.101:4533/app: 200 OK"}
            ]
        }
    ]
    mock_replicas = [
        {
            "Node": {"Node": "node3", "Address": "192.168.1.103"},
            "Service": {"Service": "replica-navidrome", "Port": 4533, "ID": "replica-2"},
            "Checks": [
                {"Status": "critical", "Output": "HTTP GET http://192.168.1.103:4533/app: 500 Internal Server Error"}
            ]
        }
    ]
    def side_effect(url, params=None, timeout=None):
        if "health/service/navidrome" in url:
            m = MagicMock()
            m.json.return_value = mock_navidrome
            m.raise_for_status.return_value = None
            return m
        elif "health/service/replica-navidrome" in url:
            m = MagicMock()
            m.json.return_value = mock_replicas
            m.raise_for_status.return_value = None
            return m
        return MagicMock()
    mock_get.side_effect = side_effect
    services = consul_client.get_cluster_services("http://consul:8500")
    node3 = next(s for s in services if s["node"] == "node3")
    assert node3["status"] == "critical"
    assert "500 Internal Server Error" in node3["check_output"]
--- a/scripts/cluster_status/tests/test_formatter.py
+++ b/scripts/cluster_status/tests/test_formatter.py
@@ -29,3 +29,32 @@ def test_format_node_table():
    assert "node1" in table
    assert "primary" in table
    assert "passing" in table
 def test_format_diagnostics():
    """Test the diagnostics section generation."""
    nodes = [
        {
            "node": "node3",
            "status": "critical",
            "check_output": "500 Internal Error",
            "litefs_error": "Connection Timeout"
        }
    ]
    diagnostics = output_formatter.format_diagnostics(nodes, use_color=False)
    assert "DIAGNOSTICS" in diagnostics
    assert "node3" in diagnostics
    assert "500 Internal Error" in diagnostics
    assert "Connection Timeout" in diagnostics
 def test_format_diagnostics_empty():
    """Test that diagnostics section is empty when no errors exist."""
    nodes = [
        {
            "node": "node1",
            "status": "passing",
            "litefs_error": None
        }
    ]
    diagnostics = output_formatter.format_diagnostics(nodes, use_color=False)
    assert diagnostics == ""