mirror of
https://github.com/sstent/consul-monitor.git
synced 2025-12-06 08:01:58 +00:00
first post
This commit is contained in:
27
consul-monitor/Dockerfile
Normal file
27
consul-monitor/Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m appuser && chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
ENV FLASK_APP=app.py
|
||||||
|
ENV FLASK_ENV=production
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import requests; requests.get('http://localhost:5000/health', timeout=5)" || exit 1
|
||||||
|
|
||||||
|
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]
|
||||||
126
consul-monitor/app.py
Normal file
126
consul-monitor/app.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
from flask import Flask, render_template, jsonify, g
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
import database
|
||||||
|
import consul_client
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
def get_db():
|
||||||
|
"""Get a thread-local database connection"""
|
||||||
|
if 'db_conn' not in g:
|
||||||
|
g.db_conn = database.init_database()
|
||||||
|
database.create_tables(g.db_conn)
|
||||||
|
return g.db_conn
|
||||||
|
|
||||||
|
@app.teardown_appcontext
|
||||||
|
def close_db(e=None):
|
||||||
|
"""Close database connection at end of request"""
|
||||||
|
db_conn = g.pop('db_conn', None)
|
||||||
|
if db_conn is not None:
|
||||||
|
db_conn.close()
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
"""Render the main dashboard"""
|
||||||
|
# Get thread-local database connection
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
# Get initial service data
|
||||||
|
services = database.get_all_services_with_health(db_conn)
|
||||||
|
consul_available = consul_client.is_consul_available()
|
||||||
|
|
||||||
|
# Generate URLs for services
|
||||||
|
for service in services:
|
||||||
|
if service['port']:
|
||||||
|
service['url'] = f"http://{service['name']}.service.dc1.consul:{service['port']}"
|
||||||
|
else:
|
||||||
|
service['url'] = None
|
||||||
|
|
||||||
|
return render_template('index.html', services=services, consul_available=consul_available)
|
||||||
|
|
||||||
|
@app.route('/api/services')
|
||||||
|
def get_services():
|
||||||
|
"""API endpoint to get service data"""
|
||||||
|
# Get thread-local database connection
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to get fresh data from Consul
|
||||||
|
if consul_client.is_consul_available():
|
||||||
|
service_data = consul_client.fetch_all_service_data()
|
||||||
|
|
||||||
|
# Update database with fresh data
|
||||||
|
for service_id, data in service_data.items():
|
||||||
|
# Upsert service
|
||||||
|
database.upsert_service(db_conn, {
|
||||||
|
'id': service_id,
|
||||||
|
'name': data['name'],
|
||||||
|
'address': data['address'],
|
||||||
|
'port': data['port'],
|
||||||
|
'tags': data['tags'],
|
||||||
|
'meta': data['meta']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Insert health checks
|
||||||
|
for check in data['health_checks']:
|
||||||
|
database.insert_health_check(db_conn, service_id,
|
||||||
|
check['check_name'], check['status'])
|
||||||
|
|
||||||
|
# Retrieve services from DB with updated data
|
||||||
|
services = database.get_all_services_with_health(db_conn)
|
||||||
|
consul_available = True
|
||||||
|
else:
|
||||||
|
raise Exception("Consul unavailable")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to cached data
|
||||||
|
services = database.get_all_services_with_health(db_conn)
|
||||||
|
consul_available = False
|
||||||
|
error_message = str(e)
|
||||||
|
|
||||||
|
# Generate URLs for services
|
||||||
|
for service in services:
|
||||||
|
if service['port']:
|
||||||
|
service['url'] = f"http://{service['name']}.service.dc1.consul:{service['port']}"
|
||||||
|
else:
|
||||||
|
service['url'] = None
|
||||||
|
|
||||||
|
# Prepare response
|
||||||
|
if consul_available:
|
||||||
|
response = {
|
||||||
|
'status': 'success',
|
||||||
|
'consul_available': True,
|
||||||
|
'services': services
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
response = {
|
||||||
|
'status': 'error',
|
||||||
|
'consul_available': False,
|
||||||
|
'services': services,
|
||||||
|
'error': error_message
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsonify(response)
|
||||||
|
|
||||||
|
@app.route('/health')
|
||||||
|
def health_check():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
# Get thread-local database connection
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
db_available = database.is_database_available(db_conn)
|
||||||
|
consul_available = consul_client.is_consul_available()
|
||||||
|
|
||||||
|
status = 'healthy' if db_available and consul_available else 'unhealthy'
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'status': status,
|
||||||
|
'consul': 'connected' if consul_available else 'disconnected',
|
||||||
|
'database': 'available' if db_available else 'unavailable',
|
||||||
|
'timestamp': datetime.utcnow().isoformat()
|
||||||
|
})
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host='0.0.0.0', port=5000)
|
||||||
82
consul-monitor/consul_client.py
Normal file
82
consul-monitor/consul_client.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import requests
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Consul configuration
|
||||||
|
CONSUL_HOST = "consul.service.dc1.consul"
|
||||||
|
CONSUL_PORT = 8500
|
||||||
|
CONSUL_BASE_URL = f"http://{CONSUL_HOST}:{CONSUL_PORT}"
|
||||||
|
|
||||||
|
def get_consul_services():
|
||||||
|
"""Fetch all registered services from Consul"""
|
||||||
|
url = f"{CONSUL_BASE_URL}/v1/agent/services"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=5)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error(f"Failed to fetch Consul services: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_service_health(service_name):
|
||||||
|
"""Fetch health checks for a specific service"""
|
||||||
|
url = f"{CONSUL_BASE_URL}/v1/health/service/{service_name}"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=5)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Process health checks
|
||||||
|
health_checks = []
|
||||||
|
for entry in data:
|
||||||
|
for check in entry.get('Checks', []):
|
||||||
|
health_checks.append({
|
||||||
|
'check_name': check.get('Name', ''),
|
||||||
|
'status': check.get('Status', '')
|
||||||
|
})
|
||||||
|
return health_checks
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error(f"Failed to fetch health for service {service_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def is_consul_available():
|
||||||
|
"""Check if Consul is reachable"""
|
||||||
|
try:
|
||||||
|
response = requests.get(f"{CONSUL_BASE_URL}/v1/agent/self", timeout=2)
|
||||||
|
return response.status_code == 200
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def fetch_all_service_data():
|
||||||
|
"""Fetch service data and health status for all services"""
|
||||||
|
try:
|
||||||
|
services = get_consul_services()
|
||||||
|
service_data = {}
|
||||||
|
|
||||||
|
for service_id, service_info in services.items():
|
||||||
|
service_name = service_info.get('Service', '')
|
||||||
|
health_checks = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
health_checks = get_service_health(service_name)
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
# Log but continue with other services
|
||||||
|
logger.warning(f"Skipping health checks for service {service_name}")
|
||||||
|
|
||||||
|
service_data[service_id] = {
|
||||||
|
'id': service_id,
|
||||||
|
'name': service_info.get('Service', ''),
|
||||||
|
'address': service_info.get('Address', ''),
|
||||||
|
'port': service_info.get('Port', None),
|
||||||
|
'tags': service_info.get('Tags', []),
|
||||||
|
'meta': service_info.get('Meta', {}),
|
||||||
|
'health_checks': health_checks
|
||||||
|
}
|
||||||
|
|
||||||
|
return service_data
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
logger.error("Failed to fetch service data from Consul")
|
||||||
|
return {}
|
||||||
121
consul-monitor/database.py
Normal file
121
consul-monitor/database.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def create_tables(conn):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
# Create services table
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS services (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
address TEXT,
|
||||||
|
port INTEGER,
|
||||||
|
tags TEXT,
|
||||||
|
meta TEXT,
|
||||||
|
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Create health checks table
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS health_checks (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
service_id TEXT NOT NULL,
|
||||||
|
check_name TEXT,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (service_id) REFERENCES services (id)
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Create indexes
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_health_checks_service_timestamp
|
||||||
|
ON health_checks (service_id, timestamp)
|
||||||
|
''')
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_health_checks_timestamp
|
||||||
|
ON health_checks (timestamp)
|
||||||
|
''')
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def init_database():
|
||||||
|
"""Initialize database and return connection"""
|
||||||
|
return sqlite3.connect('file:consul-monitor.db?mode=memory&cache=shared', uri=True)
|
||||||
|
|
||||||
|
def upsert_service(conn, service_data):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO services (id, name, address, port, tags, meta)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(id) DO UPDATE SET
|
||||||
|
name = excluded.name,
|
||||||
|
address = excluded.address,
|
||||||
|
port = excluded.port,
|
||||||
|
tags = excluded.tags,
|
||||||
|
meta = excluded.meta,
|
||||||
|
last_seen = CURRENT_TIMESTAMP
|
||||||
|
''', (
|
||||||
|
service_data['id'],
|
||||||
|
service_data['name'],
|
||||||
|
service_data.get('address'),
|
||||||
|
service_data.get('port'),
|
||||||
|
json.dumps(service_data.get('tags', [])),
|
||||||
|
json.dumps(service_data.get('meta', {}))
|
||||||
|
))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def insert_health_check(conn, service_id, check_name, status):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT INTO health_checks (service_id, check_name, status)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
''', (service_id, check_name, status))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def get_all_services_with_health(conn):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT s.id, s.name, s.address, s.port, s.tags, s.meta,
|
||||||
|
h.status, MAX(h.timestamp) AS last_check
|
||||||
|
FROM services s
|
||||||
|
LEFT JOIN health_checks h ON s.id = h.service_id
|
||||||
|
GROUP BY s.id
|
||||||
|
''')
|
||||||
|
|
||||||
|
services = []
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
service = {
|
||||||
|
'id': row[0],
|
||||||
|
'name': row[1],
|
||||||
|
'address': row[2],
|
||||||
|
'port': row[3],
|
||||||
|
'tags': json.loads(row[4]) if row[4] else [],
|
||||||
|
'meta': json.loads(row[5]) if row[5] else {},
|
||||||
|
'current_status': row[6] or 'unknown',
|
||||||
|
'last_check': row[7]
|
||||||
|
}
|
||||||
|
services.append(service)
|
||||||
|
return services
|
||||||
|
|
||||||
|
def get_service_history(conn, service_id, hours=24):
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute('''
|
||||||
|
SELECT status, timestamp
|
||||||
|
FROM health_checks
|
||||||
|
WHERE service_id = ?
|
||||||
|
AND timestamp >= datetime('now', ?)
|
||||||
|
ORDER BY timestamp
|
||||||
|
''', (service_id, f'-{hours} hours'))
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
def is_database_available(conn):
|
||||||
|
try:
|
||||||
|
conn.execute('SELECT 1')
|
||||||
|
return True
|
||||||
|
except sqlite3.Error:
|
||||||
|
return False
|
||||||
2
consul-monitor/requirements.txt
Normal file
2
consul-monitor/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
Flask==2.3.3
|
||||||
|
requests==2.31.0
|
||||||
113
consul-monitor/static/css/style.css
Normal file
113
consul-monitor/static/css/style.css
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
/* Basic reset and layout */
|
||||||
|
* {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 0;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
.header {
|
||||||
|
background: white;
|
||||||
|
padding: 1rem 2rem;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
z-index: 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Alert banners */
|
||||||
|
.error-banner, .warning-banner {
|
||||||
|
padding: 0.75rem 2rem;
|
||||||
|
margin: 0;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
.error-banner {
|
||||||
|
background: #fee;
|
||||||
|
color: #c33;
|
||||||
|
}
|
||||||
|
.warning-banner {
|
||||||
|
background: #fff3cd;
|
||||||
|
color: #856404;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Services table */
|
||||||
|
.services-container {
|
||||||
|
padding: 2rem;
|
||||||
|
}
|
||||||
|
.services-table {
|
||||||
|
width: 100%;
|
||||||
|
background: white;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
border-collapse: collapse;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.services-table th, .services-table td {
|
||||||
|
padding: 1rem;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid #eee;
|
||||||
|
}
|
||||||
|
.services-table th {
|
||||||
|
background: #f8f9fa;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status indicators */
|
||||||
|
.status-icon {
|
||||||
|
font-size: 1.2rem;
|
||||||
|
display: inline-block;
|
||||||
|
width: 24px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.status-passing { color: #28a745; }
|
||||||
|
.status-warning { color: #ffc107; }
|
||||||
|
.status-critical { color: #dc3545; }
|
||||||
|
.status-unknown { color: #6c757d; }
|
||||||
|
|
||||||
|
/* Tags */
|
||||||
|
.tag {
|
||||||
|
display: inline-block;
|
||||||
|
background: #e9ecef;
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
button {
|
||||||
|
background: #007bff;
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
button:hover { background: #0056b3; }
|
||||||
|
button:disabled {
|
||||||
|
background: #6c757d;
|
||||||
|
cursor: not-allowed;
|
||||||
|
opacity: 0.7;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No services message */
|
||||||
|
.no-services {
|
||||||
|
padding: 2rem;
|
||||||
|
text-align: center;
|
||||||
|
background: white;
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-top: 1rem;
|
||||||
|
font-style: italic;
|
||||||
|
color: #6c757d;
|
||||||
|
}
|
||||||
74
consul-monitor/static/js/app.js
Normal file
74
consul-monitor/static/js/app.js
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
console.log('app.js loading...');
|
||||||
|
|
||||||
|
// Define the serviceMonitor component
|
||||||
|
function serviceMonitor() {
|
||||||
|
console.log('serviceMonitor function called');
|
||||||
|
return {
|
||||||
|
services: [],
|
||||||
|
loading: false,
|
||||||
|
error: null,
|
||||||
|
consulAvailable: true,
|
||||||
|
|
||||||
|
init() {
|
||||||
|
console.log('Initializing serviceMonitor component');
|
||||||
|
this.refreshServices();
|
||||||
|
},
|
||||||
|
|
||||||
|
async refreshServices() {
|
||||||
|
console.log('Refreshing services');
|
||||||
|
this.loading = true;
|
||||||
|
this.error = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/services');
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
if (data.status === 'success') {
|
||||||
|
this.services = data.services;
|
||||||
|
this.consulAvailable = data.consul_available;
|
||||||
|
} else {
|
||||||
|
this.error = data.error || 'Failed to fetch services';
|
||||||
|
this.services = data.services || [];
|
||||||
|
this.consulAvailable = data.consul_available;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.error = 'Network error: ' + err.message;
|
||||||
|
this.services = [];
|
||||||
|
this.consulAvailable = false;
|
||||||
|
} finally {
|
||||||
|
this.loading = false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
getStatusClass(status) {
|
||||||
|
return {
|
||||||
|
'status-passing': status === 'passing',
|
||||||
|
'status-warning': status === 'warning',
|
||||||
|
'status-critical': status === 'critical',
|
||||||
|
'status-unknown': !status || status === 'unknown'
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
getStatusEmoji(status) {
|
||||||
|
switch(status) {
|
||||||
|
case 'passing': return '🟢';
|
||||||
|
case 'warning': return '🟡';
|
||||||
|
case 'critical': return '🔴';
|
||||||
|
default: return '⚪';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to register with Alpine.js with fallback to window
|
||||||
|
try {
|
||||||
|
console.log('Registering with Alpine.js');
|
||||||
|
Alpine.data('serviceMonitor', serviceMonitor);
|
||||||
|
console.log('Alpine registration successful');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Alpine registration failed:', error);
|
||||||
|
window.serviceMonitor = serviceMonitor;
|
||||||
|
console.log('Fallback to window registration');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('app.js loaded');
|
||||||
63
consul-monitor/templates/index.html
Normal file
63
consul-monitor/templates/index.html
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Consul Service Monitor</title>
|
||||||
|
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
|
||||||
|
<script src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/app.js') }}" defer></script>
|
||||||
|
</head>
|
||||||
|
<body x-data="serviceMonitor">
|
||||||
|
<div class="header">
|
||||||
|
<h1>Consul Service Monitor</h1>
|
||||||
|
<div class="controls">
|
||||||
|
<button @click="refreshServices" :disabled="loading">
|
||||||
|
<span x-show="!loading">🔄 Refresh</span>
|
||||||
|
<span x-show="loading">Loading...</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div x-show="error" class="error-banner" x-text="error"></div>
|
||||||
|
<div x-show="!consulAvailable" class="warning-banner">
|
||||||
|
⚠️ Consul connection failed - showing cached data
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="services-container">
|
||||||
|
<table class="services-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Service Name</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>URL</th>
|
||||||
|
<th>Tags</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<template x-for="service in services" :key="service.id">
|
||||||
|
<tr>
|
||||||
|
<td x-text="service.name"></td>
|
||||||
|
<td>
|
||||||
|
<span class="status-icon"
|
||||||
|
:class="getStatusClass(service.current_status)"
|
||||||
|
x-text="getStatusEmoji(service.current_status)">
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a :href="service.url" target="_blank" x-text="service.url"></a>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<template x-for="tag in service.tags">
|
||||||
|
<span class="tag" x-text="tag"></span>
|
||||||
|
</template>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</template>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div x-show="services.length === 0 && !loading" class="no-services">
|
||||||
|
No services found
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
318
design.md
Normal file
318
design.md
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
# Consul Service Monitor - Design Document
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
A web-based dashboard application that monitors and visualizes the health status of services registered in HashiCorp Consul. The application provides real-time monitoring with historical health tracking capabilities.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### High-Level Components
|
||||||
|
|
||||||
|
1. **Web Frontend** - Interactive dashboard displaying service status
|
||||||
|
2. **Backend API** - REST API for data retrieval and configuration
|
||||||
|
3. **Data Collection Service** - Background service polling Consul for health data
|
||||||
|
4. **SQLite Database** - Historical health check data storage
|
||||||
|
5. **Consul Integration** - Service discovery and health check monitoring
|
||||||
|
|
||||||
|
### Technology Stack
|
||||||
|
|
||||||
|
- **Frontend**: HTML5, CSS3, JavaScript (with Chart.js for visualizations)
|
||||||
|
- **Backend**: Python 3.9+ with Flask
|
||||||
|
- **Database**: SQLite (ephemeral storage)
|
||||||
|
- **Service Discovery**: HashiCorp Consul (consul.service.dc1.consul)
|
||||||
|
- **Updates**: Periodic polling (no WebSockets needed)
|
||||||
|
|
||||||
|
## Functional Requirements
|
||||||
|
|
||||||
|
### Core Features
|
||||||
|
|
||||||
|
#### 1. Service List Display
|
||||||
|
- Display all services registered in Consul
|
||||||
|
- Show service name, ID, and tags
|
||||||
|
- Provide clickable links to service URLs
|
||||||
|
- Support sorting and filtering
|
||||||
|
|
||||||
|
#### 2. Health Status Visualization
|
||||||
|
- **Current Status Indicator**
|
||||||
|
- Green icon: All health checks passing
|
||||||
|
- Red icon: One or more health checks failing
|
||||||
|
- Yellow icon: Warning state (if supported)
|
||||||
|
- **Historical Status Chart**
|
||||||
|
- Mini bar chart showing 24-hour health history
|
||||||
|
- Time-based visualization (hourly aggregation)
|
||||||
|
- Color-coded status representation
|
||||||
|
|
||||||
|
#### 3. Auto-refresh Functionality
|
||||||
|
- Toggle switch to enable/disable auto-refresh
|
||||||
|
- Configurable refresh interval (30s, 1m, 2m, 5m, 10m)
|
||||||
|
- Visual indicator when auto-refresh is active
|
||||||
|
- Manual refresh button
|
||||||
|
|
||||||
|
#### 4. Configuration Management
|
||||||
|
- Session-based storage of user preferences (no persistence needed)
|
||||||
|
- Configurable history granularity (5m, 15m, 30m, 1h) - default: 15 minutes
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
### Tables
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Services table
|
||||||
|
CREATE TABLE services (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
address TEXT,
|
||||||
|
port INTEGER,
|
||||||
|
tags TEXT, -- JSON array
|
||||||
|
meta TEXT, -- JSON object
|
||||||
|
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Health checks table
|
||||||
|
CREATE TABLE health_checks (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
service_id TEXT NOT NULL,
|
||||||
|
check_id TEXT NOT NULL,
|
||||||
|
check_name TEXT,
|
||||||
|
status TEXT NOT NULL, -- 'passing', 'warning', 'critical'
|
||||||
|
output TEXT,
|
||||||
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (service_id) REFERENCES services (id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Configuration table (session-based, optional for defaults)
|
||||||
|
CREATE TABLE config (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Service URLs are generated using pattern: http://{service_name}.service.dc1.consul:{port}
|
||||||
|
|
||||||
|
-- Indexes for performance
|
||||||
|
CREATE INDEX idx_health_checks_service_timestamp ON health_checks (service_id, timestamp);
|
||||||
|
CREATE INDEX idx_health_checks_timestamp ON health_checks (timestamp);
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Design
|
||||||
|
|
||||||
|
### REST Endpoints
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Flask routes
|
||||||
|
GET /
|
||||||
|
- Serves main dashboard HTML page
|
||||||
|
|
||||||
|
GET /api/services
|
||||||
|
- Returns list of all services with current health status
|
||||||
|
- Generated URLs: http://{service_name}.service.dc1.consul:{port}
|
||||||
|
- Response: Array of service objects with health summary
|
||||||
|
|
||||||
|
GET /api/services/<service_id>/history
|
||||||
|
- Returns historical health data for charts
|
||||||
|
- Query params: ?granularity=15 (minutes: 5,15,30,60)
|
||||||
|
- Response: Time-series data for Chart.js
|
||||||
|
|
||||||
|
POST /api/config
|
||||||
|
- Updates session configuration
|
||||||
|
- Body: { "autoRefresh": true, "refreshInterval": 60, "historyGranularity": 15 }
|
||||||
|
|
||||||
|
GET /api/config
|
||||||
|
- Returns current session configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Collection Service
|
||||||
|
|
||||||
|
### Polling Strategy
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Consul Polling:
|
||||||
|
- Interval: 60 seconds
|
||||||
|
- Consul Address: consul.service.dc1.consul:8500
|
||||||
|
- Endpoints:
|
||||||
|
- /v1/agent/services (service discovery)
|
||||||
|
- /v1/health/service/{service} (health checks)
|
||||||
|
- No authentication required
|
||||||
|
- Error handling: Log errors, continue polling
|
||||||
|
- Expected services: 30-40 services
|
||||||
|
|
||||||
|
Data Retention:
|
||||||
|
- Keep detailed data for 24 hours only (ephemeral storage)
|
||||||
|
- No long-term aggregation needed
|
||||||
|
- Database recreated on container restart
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check Processing
|
||||||
|
|
||||||
|
1. **Data Collection**
|
||||||
|
- Poll Consul API for service list
|
||||||
|
- For each service, fetch health check status
|
||||||
|
- Store raw health check data with timestamps
|
||||||
|
|
||||||
|
2. **Status Aggregation**
|
||||||
|
- Service-level status: Worst status among all checks
|
||||||
|
- Historical aggregation: Count of passing/warning/critical per time window
|
||||||
|
|
||||||
|
3. **Change Detection**
|
||||||
|
- Compare current status with previous poll
|
||||||
|
- Trigger notifications/updates on status changes
|
||||||
|
- Maintain service registration/deregistration events
|
||||||
|
|
||||||
|
## Frontend Design
|
||||||
|
|
||||||
|
### Main Dashboard Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────┐
|
||||||
|
│ Consul Service Monitor [⚙️] [🔄] │
|
||||||
|
├─────────────────────────────────────────────────┤
|
||||||
|
│ Auto-refresh: [ON/OFF] Interval: [1m ▼] │
|
||||||
|
│ History granularity: [15m ▼] │
|
||||||
|
├─────────────────────────────────────────────────┤
|
||||||
|
│ Service Name │ Status │ URL │ History │
|
||||||
|
├─────────────────┼────────┼──────────┼───────────┤
|
||||||
|
│ web-api │ 🟢 │ [link] │ ▆▆█▆█▆▆ │
|
||||||
|
│ database │ 🔴 │ [link] │ █▆▆▄▂▂▄ │
|
||||||
|
│ cache-service │ 🟢 │ [link] │ ████████ │
|
||||||
|
└─────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Interactive Elements
|
||||||
|
|
||||||
|
- **Status Icons**: Visual indicators only (no detailed popup needed)
|
||||||
|
- **History Charts**: Chart.js mini bar charts with 24-hour data
|
||||||
|
- **Service Links**: URLs generated as http://{service_name}.service.dc1.consul:{port}
|
||||||
|
- **Desktop-optimized**: No mobile responsive design required
|
||||||
|
|
||||||
|
### Updates
|
||||||
|
|
||||||
|
- Periodic AJAX polling for updates
|
||||||
|
- Configurable refresh intervals (30s, 1m, 2m, 5m, 10m)
|
||||||
|
- Visual loading indicators during refresh
|
||||||
|
|
||||||
|
## Configuration Management
|
||||||
|
|
||||||
|
### User Settings (Session-based)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"autoRefresh": {
|
||||||
|
"enabled": false,
|
||||||
|
"interval": 60,
|
||||||
|
"options": [30, 60, 120, 300, 600]
|
||||||
|
},
|
||||||
|
"display": {
|
||||||
|
"historyGranularity": 15,
|
||||||
|
"granularityOptions": [5, 15, 30, 60]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### System Configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Flask configuration
|
||||||
|
CONSUL_HOST = "consul.service.dc1.consul"
|
||||||
|
CONSUL_PORT = 8500
|
||||||
|
DATABASE_PATH = ":memory:" # Ephemeral SQLite
|
||||||
|
POLL_INTERVAL = 60 # seconds
|
||||||
|
MAX_SERVICES = 50 # Safety limit
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Considerations
|
||||||
|
|
||||||
|
### Docker Deployment
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV FLASK_APP=app.py
|
||||||
|
ENV FLASK_ENV=production
|
||||||
|
ENV CONSUL_HOST=consul.service.dc1.consul
|
||||||
|
ENV CONSUL_PORT=8500
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:5000/health || exit 1
|
||||||
|
|
||||||
|
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Python Dependencies (requirements.txt)
|
||||||
|
|
||||||
|
```
|
||||||
|
Flask==2.3.3
|
||||||
|
requests==2.31.0
|
||||||
|
sqlite3 # Built-in
|
||||||
|
APScheduler==3.10.4 # For background polling
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
- `CONSUL_HOST`: Consul server hostname (default: consul.service.dc1.consul)
|
||||||
|
- `CONSUL_PORT`: Consul server port (default: 8500)
|
||||||
|
- `FLASK_PORT`: Web server port (default: 5000)
|
||||||
|
- `POLL_INTERVAL`: Health check polling interval in seconds (default: 60)
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
|
||||||
|
The application should expose its own health endpoint:
|
||||||
|
- `GET /health`: Returns application health status
|
||||||
|
- `GET /metrics`: Prometheus-style metrics (optional)
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
1. **Consul Access**: No authentication required for your setup
|
||||||
|
2. **Database**: Ephemeral SQLite in container memory
|
||||||
|
3. **Web Interface**: Open dashboard, no authentication needed
|
||||||
|
4. **Input Validation**: Sanitize service names and configuration inputs
|
||||||
|
5. **Container Security**: Run as non-root user in container
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- **Alerting**: Email/Slack notifications on service failures (mentioned as future feature)
|
||||||
|
- **Service Filtering**: Search and filter capabilities for larger service lists
|
||||||
|
- **Service Details**: Detailed health check information popup/modal
|
||||||
|
- **Themes**: Dark/light mode toggle
|
||||||
|
- **Export**: Export health data as CSV/JSON
|
||||||
|
- **Custom Time Ranges**: Configurable history periods beyond 24 hours
|
||||||
|
|
||||||
|
## Development Phases
|
||||||
|
|
||||||
|
### Phase 1: Core Functionality
|
||||||
|
- Basic Consul integration
|
||||||
|
- SQLite database setup
|
||||||
|
- Simple web interface
|
||||||
|
- Manual refresh capability
|
||||||
|
|
||||||
|
### Phase 2: Real-time Features
|
||||||
|
- Auto-refresh functionality
|
||||||
|
- WebSocket integration
|
||||||
|
- Historical data visualization
|
||||||
|
- Configuration persistence
|
||||||
|
|
||||||
|
### Phase 3: Enhanced UX
|
||||||
|
- Responsive design
|
||||||
|
- Advanced filtering
|
||||||
|
- Performance optimizations
|
||||||
|
- Error handling improvements
|
||||||
|
|
||||||
|
### Phase 4: Production Ready
|
||||||
|
- Docker deployment
|
||||||
|
- Security hardening
|
||||||
|
- Monitoring and logging
|
||||||
|
- Documentation and testing
|
||||||
477
plan_phase1.md
Normal file
477
plan_phase1.md
Normal file
@@ -0,0 +1,477 @@
|
|||||||
|
# Phase 1 Implementation Plan - Consul Service Monitor
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Implement the core functionality for a Flask-based Consul service monitoring dashboard. This phase focuses on basic Consul integration, SQLite database setup, and a simple web interface with manual refresh capability.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
Create the following directory structure:
|
||||||
|
```
|
||||||
|
consul-monitor/
|
||||||
|
├── app.py # Main Flask application
|
||||||
|
├── consul_client.py # Consul API integration
|
||||||
|
├── database.py # SQLite database operations
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
├── templates/
|
||||||
|
│ └── index.html # Main dashboard template
|
||||||
|
├── static/
|
||||||
|
│ ├── css/
|
||||||
|
│ │ └── style.css # Dashboard styles
|
||||||
|
│ └── js/
|
||||||
|
│ └── app.js # Frontend JavaScript with Alpine.js
|
||||||
|
└── Dockerfile # Container configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dependencies (requirements.txt)
|
||||||
|
```
|
||||||
|
Flask==2.3.3
|
||||||
|
requests==2.31.0
|
||||||
|
```
|
||||||
|
|
||||||
|
## Database Implementation (database.py)
|
||||||
|
|
||||||
|
### Database Schema
|
||||||
|
Implement exactly these SQLite tables:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Services table
|
||||||
|
CREATE TABLE IF NOT EXISTS services (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
address TEXT,
|
||||||
|
port INTEGER,
|
||||||
|
tags TEXT, -- Store as JSON string
|
||||||
|
meta TEXT, -- Store as JSON string
|
||||||
|
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Health checks table
|
||||||
|
CREATE TABLE IF NOT EXISTS health_checks (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
service_id TEXT NOT NULL,
|
||||||
|
check_name TEXT,
|
||||||
|
status TEXT NOT NULL, -- 'passing', 'warning', 'critical'
|
||||||
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
FOREIGN KEY (service_id) REFERENCES services (id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_health_checks_service_timestamp
|
||||||
|
ON health_checks (service_id, timestamp);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Functions
|
||||||
|
Create these specific functions in database.py:
|
||||||
|
|
||||||
|
1. **`init_database()`**: Initialize SQLite database with the above schema
|
||||||
|
2. **`upsert_service(service_data)`**: Insert or update service record
|
||||||
|
- Parameters: dictionary with id, name, address, port, tags (as JSON string), meta (as JSON string)
|
||||||
|
- Update last_seen timestamp on existing records
|
||||||
|
3. **`insert_health_check(service_id, check_name, status)`**: Insert health check record
|
||||||
|
4. **`get_all_services_with_health()`**: Return all services with their latest health status
|
||||||
|
- Join services table with latest health_checks record per service
|
||||||
|
- Return list of dictionaries with service details + current health status
|
||||||
|
5. **`get_service_history(service_id, hours=24)`**: Get health history for specific service
|
||||||
|
6. **`is_database_available()`**: Test database connectivity
|
||||||
|
|
||||||
|
## Consul Client Implementation (consul_client.py)
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
Set these constants:
|
||||||
|
```python
|
||||||
|
CONSUL_HOST = "consul.service.dc1.consul"
|
||||||
|
CONSUL_PORT = 8500
|
||||||
|
CONSUL_BASE_URL = f"http://{CONSUL_HOST}:{CONSUL_PORT}"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Consul Functions
|
||||||
|
Implement these specific functions:
|
||||||
|
|
||||||
|
1. **`get_consul_services()`**:
|
||||||
|
- Call `/v1/agent/services` endpoint
|
||||||
|
- Return dictionary of services or raise exception on failure
|
||||||
|
- Handle HTTP errors and connection timeouts
|
||||||
|
|
||||||
|
2. **`get_service_health(service_name)`**:
|
||||||
|
- Call `/v1/health/service/{service_name}` endpoint
|
||||||
|
- Parse health check results
|
||||||
|
- Return list of health checks with check_name and status
|
||||||
|
- Handle cases where service has no health checks
|
||||||
|
|
||||||
|
3. **`is_consul_available()`**:
|
||||||
|
- Test connection to Consul
|
||||||
|
- Return True/False boolean
|
||||||
|
|
||||||
|
4. **`fetch_all_service_data()`**:
|
||||||
|
- Orchestrate calls to get_consul_services() and get_service_health()
|
||||||
|
- Return combined service and health data
|
||||||
|
- Handle partial failures gracefully
|
||||||
|
|
||||||
|
## Flask Application (app.py)
|
||||||
|
|
||||||
|
### Application Configuration
|
||||||
|
```python
|
||||||
|
from flask import Flask, render_template, jsonify
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
```
|
||||||
|
|
||||||
|
### Flask Routes
|
||||||
|
Implement exactly these routes:
|
||||||
|
|
||||||
|
1. **`GET /`**:
|
||||||
|
- Render main dashboard using index.html template
|
||||||
|
- Pass initial service data to template
|
||||||
|
- Handle database/consul errors gracefully
|
||||||
|
|
||||||
|
2. **`GET /api/services`**:
|
||||||
|
- Return JSON array of all services with current health status
|
||||||
|
- Include generated URLs using pattern: `http://{service_name}.service.dc1.consul:{port}`
|
||||||
|
- Response format:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success|error",
|
||||||
|
"consul_available": true|false,
|
||||||
|
"services": [
|
||||||
|
{
|
||||||
|
"id": "service-id",
|
||||||
|
"name": "service-name",
|
||||||
|
"address": "10.0.0.1",
|
||||||
|
"port": 8080,
|
||||||
|
"url": "http://service-name.service.dc1.consul:8080",
|
||||||
|
"tags": ["tag1", "tag2"],
|
||||||
|
"current_status": "passing|warning|critical|unknown",
|
||||||
|
"last_check": "2024-01-01T12:00:00"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"error": "error message if any"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **`GET /health`**:
|
||||||
|
- Return application health status
|
||||||
|
- Test both database and Consul connectivity
|
||||||
|
- Response format:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "healthy|unhealthy",
|
||||||
|
"consul": "connected|disconnected",
|
||||||
|
"database": "available|unavailable",
|
||||||
|
"timestamp": "2024-01-01T12:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Flow Logic
|
||||||
|
Implement this exact flow in the `/api/services` endpoint:
|
||||||
|
|
||||||
|
1. Try to fetch fresh data from Consul using `fetch_all_service_data()`
|
||||||
|
2. If successful:
|
||||||
|
- Update database with new service and health data
|
||||||
|
- Return fresh data with `consul_available: true`
|
||||||
|
3. If Consul fails:
|
||||||
|
- Retrieve cached data from database using `get_all_services_with_health()`
|
||||||
|
- Return cached data with `consul_available: false` and error message
|
||||||
|
4. If both fail:
|
||||||
|
- Return error response with empty services array
|
||||||
|
|
||||||
|
## Frontend Implementation
|
||||||
|
|
||||||
|
### HTML Template (templates/index.html)
|
||||||
|
Create dashboard with this structure:
|
||||||
|
```html
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Consul Service Monitor</title>
|
||||||
|
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
|
||||||
|
<script src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
</head>
|
||||||
|
<body x-data="serviceMonitor()">
|
||||||
|
<div class="header">
|
||||||
|
<h1>Consul Service Monitor</h1>
|
||||||
|
<div class="controls">
|
||||||
|
<button @click="refreshServices()" :disabled="loading">
|
||||||
|
<span x-show="!loading">🔄 Refresh</span>
|
||||||
|
<span x-show="loading">Loading...</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div x-show="error" class="error-banner" x-text="error"></div>
|
||||||
|
<div x-show="!consulAvailable" class="warning-banner">
|
||||||
|
⚠️ Consul connection failed - showing cached data
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="services-container">
|
||||||
|
<table class="services-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Service Name</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>URL</th>
|
||||||
|
<th>Tags</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<template x-for="service in services" :key="service.id">
|
||||||
|
<tr>
|
||||||
|
<td x-text="service.name"></td>
|
||||||
|
<td>
|
||||||
|
<span class="status-icon"
|
||||||
|
:class="getStatusClass(service.current_status)"
|
||||||
|
x-text="getStatusEmoji(service.current_status)">
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a :href="service.url" target="_blank" x-text="service.url"></a>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<template x-for="tag in service.tags">
|
||||||
|
<span class="tag" x-text="tag"></span>
|
||||||
|
</template>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</template>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div x-show="services.length === 0 && !loading" class="no-services">
|
||||||
|
No services found
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alpine.js JavaScript (static/js/app.js)
|
||||||
|
```javascript
|
||||||
|
function serviceMonitor() {
|
||||||
|
return {
|
||||||
|
services: [],
|
||||||
|
loading: false,
|
||||||
|
error: null,
|
||||||
|
consulAvailable: true,
|
||||||
|
|
||||||
|
init() {
|
||||||
|
this.refreshServices();
|
||||||
|
},
|
||||||
|
|
||||||
|
async refreshServices() {
|
||||||
|
this.loading = true;
|
||||||
|
this.error = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/services');
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
if (data.status === 'success') {
|
||||||
|
this.services = data.services;
|
||||||
|
this.consulAvailable = data.consul_available;
|
||||||
|
} else {
|
||||||
|
this.error = data.error || 'Failed to fetch services';
|
||||||
|
this.services = data.services || [];
|
||||||
|
this.consulAvailable = data.consul_available;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.error = 'Network error: ' + err.message;
|
||||||
|
this.services = [];
|
||||||
|
this.consulAvailable = false;
|
||||||
|
} finally {
|
||||||
|
this.loading = false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
getStatusClass(status) {
|
||||||
|
return {
|
||||||
|
'status-passing': status === 'passing',
|
||||||
|
'status-warning': status === 'warning',
|
||||||
|
'status-critical': status === 'critical',
|
||||||
|
'status-unknown': !status || status === 'unknown'
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
getStatusEmoji(status) {
|
||||||
|
switch(status) {
|
||||||
|
case 'passing': return '🟢';
|
||||||
|
case 'warning': return '🟡';
|
||||||
|
case 'critical': return '🔴';
|
||||||
|
default: return '⚪';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### CSS Styling (static/css/style.css)
|
||||||
|
Implement these specific styles:
|
||||||
|
```css
|
||||||
|
/* Basic reset and layout */
|
||||||
|
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||||
|
body { font-family: Arial, sans-serif; background: #f5f5f5; }
|
||||||
|
|
||||||
|
/* Header */
|
||||||
|
.header {
|
||||||
|
background: white;
|
||||||
|
padding: 1rem 2rem;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Alert banners */
|
||||||
|
.error-banner, .warning-banner {
|
||||||
|
padding: 0.75rem 2rem;
|
||||||
|
margin: 0;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
.error-banner { background: #fee; color: #c33; }
|
||||||
|
.warning-banner { background: #fff3cd; color: #856404; }
|
||||||
|
|
||||||
|
/* Services table */
|
||||||
|
.services-container { padding: 2rem; }
|
||||||
|
.services-table {
|
||||||
|
width: 100%;
|
||||||
|
background: white;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
.services-table th, .services-table td {
|
||||||
|
padding: 1rem;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid #eee;
|
||||||
|
}
|
||||||
|
.services-table th { background: #f8f9fa; font-weight: bold; }
|
||||||
|
|
||||||
|
/* Status indicators */
|
||||||
|
.status-icon { font-size: 1.2rem; }
|
||||||
|
.status-passing { color: #28a745; }
|
||||||
|
.status-warning { color: #ffc107; }
|
||||||
|
.status-critical { color: #dc3545; }
|
||||||
|
.status-unknown { color: #6c757d; }
|
||||||
|
|
||||||
|
/* Tags */
|
||||||
|
.tag {
|
||||||
|
display: inline-block;
|
||||||
|
background: #e9ecef;
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
margin-right: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
button {
|
||||||
|
background: #007bff;
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
button:hover { background: #0056b3; }
|
||||||
|
button:disabled { background: #6c757d; cursor: not-allowed; }
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling Requirements
|
||||||
|
|
||||||
|
### Consul Connection Errors
|
||||||
|
- Catch `requests.exceptions.ConnectionError` and `requests.exceptions.Timeout`
|
||||||
|
- Log errors but continue serving cached data
|
||||||
|
- Display connection status in UI
|
||||||
|
|
||||||
|
### Database Errors
|
||||||
|
- Handle SQLite database lock errors
|
||||||
|
- Graceful degradation when database is unavailable
|
||||||
|
- Return appropriate HTTP status codes
|
||||||
|
|
||||||
|
### Data Validation
|
||||||
|
- Validate service data structure from Consul API
|
||||||
|
- Handle missing or malformed service records
|
||||||
|
- Default to 'unknown' status for services without health checks
|
||||||
|
|
||||||
|
## Testing Checklist
|
||||||
|
Before considering Phase 1 complete, verify:
|
||||||
|
|
||||||
|
1. **Database Operations**:
|
||||||
|
- [ ] Database tables created correctly
|
||||||
|
- [ ] Services can be inserted/updated
|
||||||
|
- [ ] Health checks are stored with timestamps
|
||||||
|
- [ ] Queries return expected data structure
|
||||||
|
|
||||||
|
2. **Consul Integration**:
|
||||||
|
- [ ] Can fetch service list from Consul
|
||||||
|
- [ ] Can fetch health status for each service
|
||||||
|
- [ ] Handles Consul connection failures gracefully
|
||||||
|
- [ ] Service URLs generated correctly
|
||||||
|
|
||||||
|
3. **Web Interface**:
|
||||||
|
- [ ] Dashboard loads without errors
|
||||||
|
- [ ] Services displayed in table format
|
||||||
|
- [ ] Status icons show correct colors
|
||||||
|
- [ ] Refresh button updates data via AJAX
|
||||||
|
- [ ] Error messages display when appropriate
|
||||||
|
|
||||||
|
4. **Error Scenarios**:
|
||||||
|
- [ ] App starts when Consul is unavailable
|
||||||
|
- [ ] Shows cached data when Consul fails
|
||||||
|
- [ ] Displays appropriate error messages
|
||||||
|
- [ ] Recovers when Consul comes back online
|
||||||
|
|
||||||
|
## Docker Configuration (Dockerfile)
|
||||||
|
```dockerfile
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m appuser && chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
ENV FLASK_APP=app.py
|
||||||
|
ENV FLASK_ENV=production
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import requests; requests.get('http://localhost:5000/health', timeout=5)" || exit 1
|
||||||
|
|
||||||
|
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Order
|
||||||
|
Follow this exact sequence:
|
||||||
|
|
||||||
|
1. Create project structure and requirements.txt
|
||||||
|
2. Implement database.py with all functions and test database operations
|
||||||
|
3. Implement consul_client.py and test Consul connectivity
|
||||||
|
4. Create basic Flask app.py with health endpoint
|
||||||
|
5. Add /api/services endpoint with full error handling
|
||||||
|
6. Create HTML template with Alpine.js integration
|
||||||
|
7. Add CSS styling for professional appearance
|
||||||
|
8. Test complete workflow: Consul → Database → API → Frontend
|
||||||
|
9. Create Dockerfile and test containerized deployment
|
||||||
|
10. Verify all error scenarios work as expected
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
Phase 1 is complete when:
|
||||||
|
- Application starts successfully in Docker container
|
||||||
|
- Dashboard displays list of services from Consul
|
||||||
|
- Manual refresh button updates service data
|
||||||
|
- Application gracefully handles Consul outages
|
||||||
|
- All services show correct health status with colored indicators
|
||||||
|
- Generated service URLs follow the specified pattern
|
||||||
|
- Error messages display appropriately in the UI
|
||||||
Reference in New Issue
Block a user