diff --git a/consul-monitor/app.py b/consul-monitor/app.py index d52b51d..9745f2c 100644 --- a/consul-monitor/app.py +++ b/consul-monitor/app.py @@ -50,6 +50,19 @@ def index(): # Generate URLs for each instance in each service for service in services: + # Create a set of unique ports for this service + unique_ports = set() + for instance in service['instances']: + if instance['port']: + unique_ports.add(instance['port']) + + # Create port-based URLs + service['port_urls'] = [ + f"http://{service['name']}.service.dc1.consul:{port}" + for port in unique_ports + ] + + # Keep instance URLs for other display purposes for instance in service['instances']: if instance['port']: instance['url'] = f"http://{service['name']}.service.dc1.consul:{instance['port']}" @@ -73,12 +86,22 @@ def get_services(): consul_available = consul_client.is_consul_available() # Generate URLs for each instance in each service + # Generate URLs for each service and its instances for service in services: + # Create a set of unique ports for port-based URLs + unique_ports = set() for instance in service['instances']: if instance['port']: + unique_ports.add(instance['port']) instance['url'] = f"http://{service['name']}.service.dc1.consul:{instance['port']}" else: instance['url'] = None + + # Add port-based URLs to service object + service['port_urls'] = [ + f"http://{service['name']}.service.dc1.consul:{port}" + for port in unique_ports + ] response = { 'status': 'success', @@ -276,5 +299,12 @@ def health_check(): 'timestamp': datetime.utcnow().isoformat() }) +# Log 404 errors +@app.after_request +def log_404(response): + if response.status_code == 404: + app.logger.warning(f"404 for {request.path} from {request.remote_addr}") + return response + if __name__ == '__main__': app.run(host='0.0.0.0', port=5000) diff --git a/consul-monitor/consul_client.py b/consul-monitor/consul_client.py index 1944ef7..0121686 100644 --- a/consul-monitor/consul_client.py +++ b/consul-monitor/consul_client.py @@ -1,5 +1,6 @@ import requests import logging +from collections import defaultdict # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -10,16 +11,29 @@ CONSUL_HOST = "consul.service.dc1.consul" CONSUL_PORT = 8500 CONSUL_BASE_URL = f"http://{CONSUL_HOST}:{CONSUL_PORT}" -def get_consul_services(): - """Fetch all registered services from Consul""" - url = f"{CONSUL_BASE_URL}/v1/agent/services" +def get_all_service_names(): + """Fetch all service names from Consul catalog""" + url = f"{CONSUL_BASE_URL}/v1/catalog/services" + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + services = response.json() + # Filter out consul service and return service names + return [name for name in services.keys() if name != 'consul'] + except requests.exceptions.RequestException as e: + logger.error(f"Failed to fetch Consul services: {e}") + return [] + +def get_service_instances(service_name): + """Fetch instances of a service from Consul catalog""" + url = f"{CONSUL_BASE_URL}/v1/catalog/service/{service_name}" try: response = requests.get(url, timeout=5) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: - logger.error(f"Failed to fetch Consul services: {e}") - raise + logger.error(f"Failed to fetch instances for service {service_name}: {e}") + return [] def get_service_health(service_name): """Fetch health checks for a specific service""" @@ -27,20 +41,10 @@ def get_service_health(service_name): try: response = requests.get(url, timeout=5) response.raise_for_status() - data = response.json() - - # Process health checks - health_checks = [] - for entry in data: - for check in entry.get('Checks', []): - health_checks.append({ - 'check_name': check.get('Name', ''), - 'status': check.get('Status', '') - }) - return health_checks + return response.json() except requests.exceptions.RequestException as e: logger.error(f"Failed to fetch health for service {service_name}: {e}") - raise + return [] def is_consul_available(): """Check if Consul is reachable""" @@ -50,67 +54,90 @@ def is_consul_available(): except requests.exceptions.RequestException: return False -def calculate_composite_health(services): - """Calculate overall health status for a group of services""" - status_priority = {'critical': 3, 'warning': 2, 'passing': 1} - worst_status = 'passing' - - for service in services: - for check in service['health_checks']: - if status_priority[check['status']] > status_priority[worst_status]: - worst_status = check['status'] - return worst_status - -def group_services_by_instance(services): - """Group services by their instance address""" - instances = {} - for service in services.values(): - address = service['address'] - if address not in instances: - instances[address] = { - 'address': address, - 'services': [], - 'health_status': 'passing' - } - instances[address]['services'].append(service) - - # Calculate composite health for each instance - for instance in instances.values(): - instance['health_status'] = calculate_composite_health(instance['services']) - - return instances - def fetch_all_service_data(): """Fetch service data and health status for all services, grouped by instance""" try: - services = get_consul_services() + # Get all service names + service_names = get_all_service_names() + if not service_names: + logger.warning("No services found in Consul catalog") + return {} + + logger.info(f"Received {len(service_names)} services from Consul") + + # Initialize data structures service_data = {} + instances = defaultdict(lambda: { + 'address': '', + 'health_status': 'passing', + 'services': [] + }) - for service_id, service_info in services.items(): - service_name = service_info.get('Service', '') - health_checks = [] + # Process each service + for service_name in service_names: + # Get service instances from catalog + catalog_instances = get_service_instances(service_name) + if not catalog_instances: + continue + + # Get health information + health_data = get_service_health(service_name) - try: - health_checks = get_service_health(service_name) - except requests.exceptions.RequestException: - # Log but continue with other services - logger.warning(f"Skipping health checks for service {service_name}") + # Create a mapping of Node+ServiceID to health checks + health_map = {} + for entry in health_data: + node = entry['Node']['Node'] + service_id = entry['Service']['ID'] + health_map[(node, service_id)] = entry['Checks'] - service_data[service_id] = { - 'id': service_id, - 'name': service_info.get('Service', ''), - 'address': service_info.get('Address', ''), - 'port': service_info.get('Port', None), - 'tags': service_info.get('Tags', []), - 'meta': service_info.get('Meta', {}), - 'health_checks': health_checks - } + # Process each instance + for instance in catalog_instances: + node = instance['Node'] + service_id = instance['ServiceID'] + address = instance['ServiceAddress'] or instance['Address'] + port = instance['ServicePort'] + + # Get health checks for this instance + checks = health_map.get((node, service_id), []) + health_checks = [ + {'check_name': c.get('Name', ''), 'status': c.get('Status', '')} + for c in checks + ] + + # Create service object + service_obj = { + 'id': service_id, + 'name': service_name, + 'address': address, + 'port': port, + 'tags': instance.get('ServiceTags', []), + 'meta': instance.get('ServiceMeta', {}), + 'health_checks': health_checks + } + + # Add to service data + service_data[service_id] = service_obj + + # Add to instance grouping + if address not in instances: + instances[address]['address'] = address + instances[address]['services'].append(service_obj) + + # Calculate composite health for each instance + for instance in instances.values(): + status_priority = {'critical': 3, 'warning': 2, 'passing': 1} + worst_status = 'passing' + for service in instance['services']: + for check in service['health_checks']: + if status_priority.get(check['status'], 0) > status_priority.get(worst_status, 0): + worst_status = check['status'] + instance['health_status'] = worst_status - # Return both individual services and grouped instances return { 'services': service_data, - 'instances': group_services_by_instance(service_data) + 'instances': dict(instances) } - except requests.exceptions.RequestException: - logger.error("Failed to fetch service data from Consul") + + except Exception as e: + logger.error(f"Error fetching service data: {e}") return {} diff --git a/consul-monitor/database.py b/consul-monitor/database.py index 1121303..37e6d78 100644 --- a/consul-monitor/database.py +++ b/consul-monitor/database.py @@ -176,17 +176,32 @@ def get_all_services_grouped(conn): services.append(service) return services -def get_service_history(conn, service_name, instance_address, hours=24): +def get_service_history(conn, service_name, instance_address='', hours=24): + """Get service history by service name with optional instance filtering""" cursor = conn.cursor() - cursor.execute(''' - SELECT hc.status, hc.timestamp - FROM health_checks hc - JOIN services s ON hc.service_id = s.id - WHERE s.name = ? - AND s.address = ? - AND hc.timestamp >= datetime('now', ?) - ORDER BY hc.timestamp ASC - ''', (service_name, instance_address, f'-{hours} hours')) + + if instance_address: + # Get history for specific service instance + cursor.execute(''' + SELECT hc.status, hc.timestamp + FROM health_checks hc + JOIN services s ON hc.service_id = s.id + WHERE s.name = ? + AND s.address = ? + AND hc.timestamp >= datetime('now', ?) + ORDER BY hc.timestamp ASC + ''', (service_name, instance_address, f'-{hours} hours')) + else: + # Get history for all instances of the service + cursor.execute(''' + SELECT hc.status, hc.timestamp + FROM health_checks hc + JOIN services s ON hc.service_id = s.id + WHERE s.name = ? + AND hc.timestamp >= datetime('now', ?) + ORDER BY hc.timestamp ASC + ''', (service_name, f'-{hours} hours')) + return cursor.fetchall() def get_service_history_detailed(conn, service_id, hours=24): diff --git a/consul-monitor/templates/index.html b/consul-monitor/templates/index.html index 7adb088..d7444bd 100644 --- a/consul-monitor/templates/index.html +++ b/consul-monitor/templates/index.html @@ -11,14 +11,14 @@
- + +