first post

This commit is contained in:
2025-08-09 19:27:29 -07:00
commit 20bda0cee0
10 changed files with 1403 additions and 0 deletions

27
consul-monitor/Dockerfile Normal file
View File

@@ -0,0 +1,27 @@
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Create non-root user
RUN useradd -m appuser && chown -R appuser:appuser /app
USER appuser
# Expose port
EXPOSE 5000
# Environment variables
ENV FLASK_APP=app.py
ENV FLASK_ENV=production
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:5000/health', timeout=5)" || exit 1
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]

126
consul-monitor/app.py Normal file
View File

@@ -0,0 +1,126 @@
from flask import Flask, render_template, jsonify, g
import sqlite3
import json
from datetime import datetime
import database
import consul_client
app = Flask(__name__)
def get_db():
"""Get a thread-local database connection"""
if 'db_conn' not in g:
g.db_conn = database.init_database()
database.create_tables(g.db_conn)
return g.db_conn
@app.teardown_appcontext
def close_db(e=None):
"""Close database connection at end of request"""
db_conn = g.pop('db_conn', None)
if db_conn is not None:
db_conn.close()
@app.route('/')
def index():
"""Render the main dashboard"""
# Get thread-local database connection
db_conn = get_db()
# Get initial service data
services = database.get_all_services_with_health(db_conn)
consul_available = consul_client.is_consul_available()
# Generate URLs for services
for service in services:
if service['port']:
service['url'] = f"http://{service['name']}.service.dc1.consul:{service['port']}"
else:
service['url'] = None
return render_template('index.html', services=services, consul_available=consul_available)
@app.route('/api/services')
def get_services():
"""API endpoint to get service data"""
# Get thread-local database connection
db_conn = get_db()
try:
# Try to get fresh data from Consul
if consul_client.is_consul_available():
service_data = consul_client.fetch_all_service_data()
# Update database with fresh data
for service_id, data in service_data.items():
# Upsert service
database.upsert_service(db_conn, {
'id': service_id,
'name': data['name'],
'address': data['address'],
'port': data['port'],
'tags': data['tags'],
'meta': data['meta']
})
# Insert health checks
for check in data['health_checks']:
database.insert_health_check(db_conn, service_id,
check['check_name'], check['status'])
# Retrieve services from DB with updated data
services = database.get_all_services_with_health(db_conn)
consul_available = True
else:
raise Exception("Consul unavailable")
except Exception as e:
# Fallback to cached data
services = database.get_all_services_with_health(db_conn)
consul_available = False
error_message = str(e)
# Generate URLs for services
for service in services:
if service['port']:
service['url'] = f"http://{service['name']}.service.dc1.consul:{service['port']}"
else:
service['url'] = None
# Prepare response
if consul_available:
response = {
'status': 'success',
'consul_available': True,
'services': services
}
else:
response = {
'status': 'error',
'consul_available': False,
'services': services,
'error': error_message
}
return jsonify(response)
@app.route('/health')
def health_check():
"""Health check endpoint"""
# Get thread-local database connection
db_conn = get_db()
db_available = database.is_database_available(db_conn)
consul_available = consul_client.is_consul_available()
status = 'healthy' if db_available and consul_available else 'unhealthy'
return jsonify({
'status': status,
'consul': 'connected' if consul_available else 'disconnected',
'database': 'available' if db_available else 'unavailable',
'timestamp': datetime.utcnow().isoformat()
})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

View File

@@ -0,0 +1,82 @@
import requests
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Consul configuration
CONSUL_HOST = "consul.service.dc1.consul"
CONSUL_PORT = 8500
CONSUL_BASE_URL = f"http://{CONSUL_HOST}:{CONSUL_PORT}"
def get_consul_services():
"""Fetch all registered services from Consul"""
url = f"{CONSUL_BASE_URL}/v1/agent/services"
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Failed to fetch Consul services: {e}")
raise
def get_service_health(service_name):
"""Fetch health checks for a specific service"""
url = f"{CONSUL_BASE_URL}/v1/health/service/{service_name}"
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
data = response.json()
# Process health checks
health_checks = []
for entry in data:
for check in entry.get('Checks', []):
health_checks.append({
'check_name': check.get('Name', ''),
'status': check.get('Status', '')
})
return health_checks
except requests.exceptions.RequestException as e:
logger.error(f"Failed to fetch health for service {service_name}: {e}")
raise
def is_consul_available():
"""Check if Consul is reachable"""
try:
response = requests.get(f"{CONSUL_BASE_URL}/v1/agent/self", timeout=2)
return response.status_code == 200
except requests.exceptions.RequestException:
return False
def fetch_all_service_data():
"""Fetch service data and health status for all services"""
try:
services = get_consul_services()
service_data = {}
for service_id, service_info in services.items():
service_name = service_info.get('Service', '')
health_checks = []
try:
health_checks = get_service_health(service_name)
except requests.exceptions.RequestException:
# Log but continue with other services
logger.warning(f"Skipping health checks for service {service_name}")
service_data[service_id] = {
'id': service_id,
'name': service_info.get('Service', ''),
'address': service_info.get('Address', ''),
'port': service_info.get('Port', None),
'tags': service_info.get('Tags', []),
'meta': service_info.get('Meta', {}),
'health_checks': health_checks
}
return service_data
except requests.exceptions.RequestException:
logger.error("Failed to fetch service data from Consul")
return {}

121
consul-monitor/database.py Normal file
View File

@@ -0,0 +1,121 @@
import sqlite3
import json
from datetime import datetime
def create_tables(conn):
cursor = conn.cursor()
# Create services table
cursor.execute('''
CREATE TABLE IF NOT EXISTS services (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
address TEXT,
port INTEGER,
tags TEXT,
meta TEXT,
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
)
''')
# Create health checks table
cursor.execute('''
CREATE TABLE IF NOT EXISTS health_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_id TEXT NOT NULL,
check_name TEXT,
status TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (service_id) REFERENCES services (id)
)
''')
# Create indexes
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_health_checks_service_timestamp
ON health_checks (service_id, timestamp)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_health_checks_timestamp
ON health_checks (timestamp)
''')
conn.commit()
conn.commit()
def init_database():
"""Initialize database and return connection"""
return sqlite3.connect('file:consul-monitor.db?mode=memory&cache=shared', uri=True)
def upsert_service(conn, service_data):
cursor = conn.cursor()
cursor.execute('''
INSERT INTO services (id, name, address, port, tags, meta)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
name = excluded.name,
address = excluded.address,
port = excluded.port,
tags = excluded.tags,
meta = excluded.meta,
last_seen = CURRENT_TIMESTAMP
''', (
service_data['id'],
service_data['name'],
service_data.get('address'),
service_data.get('port'),
json.dumps(service_data.get('tags', [])),
json.dumps(service_data.get('meta', {}))
))
conn.commit()
def insert_health_check(conn, service_id, check_name, status):
cursor = conn.cursor()
cursor.execute('''
INSERT INTO health_checks (service_id, check_name, status)
VALUES (?, ?, ?)
''', (service_id, check_name, status))
conn.commit()
def get_all_services_with_health(conn):
cursor = conn.cursor()
cursor.execute('''
SELECT s.id, s.name, s.address, s.port, s.tags, s.meta,
h.status, MAX(h.timestamp) AS last_check
FROM services s
LEFT JOIN health_checks h ON s.id = h.service_id
GROUP BY s.id
''')
services = []
for row in cursor.fetchall():
service = {
'id': row[0],
'name': row[1],
'address': row[2],
'port': row[3],
'tags': json.loads(row[4]) if row[4] else [],
'meta': json.loads(row[5]) if row[5] else {},
'current_status': row[6] or 'unknown',
'last_check': row[7]
}
services.append(service)
return services
def get_service_history(conn, service_id, hours=24):
cursor = conn.cursor()
cursor.execute('''
SELECT status, timestamp
FROM health_checks
WHERE service_id = ?
AND timestamp >= datetime('now', ?)
ORDER BY timestamp
''', (service_id, f'-{hours} hours'))
return cursor.fetchall()
def is_database_available(conn):
try:
conn.execute('SELECT 1')
return True
except sqlite3.Error:
return False

View File

@@ -0,0 +1,2 @@
Flask==2.3.3
requests==2.31.0

View File

@@ -0,0 +1,113 @@
/* Basic reset and layout */
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: Arial, sans-serif;
background: #f5f5f5;
padding: 0;
margin: 0;
}
/* Header */
.header {
background: white;
padding: 1rem 2rem;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
display: flex;
justify-content: space-between;
align-items: center;
position: sticky;
top: 0;
z-index: 100;
}
/* Alert banners */
.error-banner, .warning-banner {
padding: 0.75rem 2rem;
margin: 0;
font-weight: bold;
}
.error-banner {
background: #fee;
color: #c33;
}
.warning-banner {
background: #fff3cd;
color: #856404;
}
/* Services table */
.services-container {
padding: 2rem;
}
.services-table {
width: 100%;
background: white;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-collapse: collapse;
overflow: hidden;
}
.services-table th, .services-table td {
padding: 1rem;
text-align: left;
border-bottom: 1px solid #eee;
}
.services-table th {
background: #f8f9fa;
font-weight: bold;
}
/* Status indicators */
.status-icon {
font-size: 1.2rem;
display: inline-block;
width: 24px;
text-align: center;
}
.status-passing { color: #28a745; }
.status-warning { color: #ffc107; }
.status-critical { color: #dc3545; }
.status-unknown { color: #6c757d; }
/* Tags */
.tag {
display: inline-block;
background: #e9ecef;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.875rem;
margin-right: 0.5rem;
margin-bottom: 0.25rem;
}
/* Buttons */
button {
background: #007bff;
color: white;
border: none;
padding: 0.5rem 1rem;
border-radius: 4px;
cursor: pointer;
font-size: 1rem;
}
button:hover { background: #0056b3; }
button:disabled {
background: #6c757d;
cursor: not-allowed;
opacity: 0.7;
}
/* No services message */
.no-services {
padding: 2rem;
text-align: center;
background: white;
border-radius: 8px;
margin-top: 1rem;
font-style: italic;
color: #6c757d;
}

View File

@@ -0,0 +1,74 @@
console.log('app.js loading...');
// Define the serviceMonitor component
function serviceMonitor() {
console.log('serviceMonitor function called');
return {
services: [],
loading: false,
error: null,
consulAvailable: true,
init() {
console.log('Initializing serviceMonitor component');
this.refreshServices();
},
async refreshServices() {
console.log('Refreshing services');
this.loading = true;
this.error = null;
try {
const response = await fetch('/api/services');
const data = await response.json();
if (data.status === 'success') {
this.services = data.services;
this.consulAvailable = data.consul_available;
} else {
this.error = data.error || 'Failed to fetch services';
this.services = data.services || [];
this.consulAvailable = data.consul_available;
}
} catch (err) {
this.error = 'Network error: ' + err.message;
this.services = [];
this.consulAvailable = false;
} finally {
this.loading = false;
}
},
getStatusClass(status) {
return {
'status-passing': status === 'passing',
'status-warning': status === 'warning',
'status-critical': status === 'critical',
'status-unknown': !status || status === 'unknown'
};
},
getStatusEmoji(status) {
switch(status) {
case 'passing': return '🟢';
case 'warning': return '🟡';
case 'critical': return '🔴';
default: return '⚪';
}
}
};
}
// Try to register with Alpine.js with fallback to window
try {
console.log('Registering with Alpine.js');
Alpine.data('serviceMonitor', serviceMonitor);
console.log('Alpine registration successful');
} catch (error) {
console.error('Alpine registration failed:', error);
window.serviceMonitor = serviceMonitor;
console.log('Fallback to window registration');
}
console.log('app.js loaded');

View File

@@ -0,0 +1,63 @@
<!DOCTYPE html>
<html>
<head>
<title>Consul Service Monitor</title>
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
<script src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<script src="{{ url_for('static', filename='js/app.js') }}" defer></script>
</head>
<body x-data="serviceMonitor">
<div class="header">
<h1>Consul Service Monitor</h1>
<div class="controls">
<button @click="refreshServices" :disabled="loading">
<span x-show="!loading">🔄 Refresh</span>
<span x-show="loading">Loading...</span>
</button>
</div>
</div>
<div x-show="error" class="error-banner" x-text="error"></div>
<div x-show="!consulAvailable" class="warning-banner">
⚠️ Consul connection failed - showing cached data
</div>
<div class="services-container">
<table class="services-table">
<thead>
<tr>
<th>Service Name</th>
<th>Status</th>
<th>URL</th>
<th>Tags</th>
</tr>
</thead>
<tbody>
<template x-for="service in services" :key="service.id">
<tr>
<td x-text="service.name"></td>
<td>
<span class="status-icon"
:class="getStatusClass(service.current_status)"
x-text="getStatusEmoji(service.current_status)">
</span>
</td>
<td>
<a :href="service.url" target="_blank" x-text="service.url"></a>
</td>
<td>
<template x-for="tag in service.tags">
<span class="tag" x-text="tag"></span>
</template>
</td>
</tr>
</template>
</tbody>
</table>
<div x-show="services.length === 0 && !loading" class="no-services">
No services found
</div>
</div>
</body>
</html>

318
design.md Normal file
View File

@@ -0,0 +1,318 @@
# Consul Service Monitor - Design Document
## Overview
A web-based dashboard application that monitors and visualizes the health status of services registered in HashiCorp Consul. The application provides real-time monitoring with historical health tracking capabilities.
## Architecture
### High-Level Components
1. **Web Frontend** - Interactive dashboard displaying service status
2. **Backend API** - REST API for data retrieval and configuration
3. **Data Collection Service** - Background service polling Consul for health data
4. **SQLite Database** - Historical health check data storage
5. **Consul Integration** - Service discovery and health check monitoring
### Technology Stack
- **Frontend**: HTML5, CSS3, JavaScript (with Chart.js for visualizations)
- **Backend**: Python 3.9+ with Flask
- **Database**: SQLite (ephemeral storage)
- **Service Discovery**: HashiCorp Consul (consul.service.dc1.consul)
- **Updates**: Periodic polling (no WebSockets needed)
## Functional Requirements
### Core Features
#### 1. Service List Display
- Display all services registered in Consul
- Show service name, ID, and tags
- Provide clickable links to service URLs
- Support sorting and filtering
#### 2. Health Status Visualization
- **Current Status Indicator**
- Green icon: All health checks passing
- Red icon: One or more health checks failing
- Yellow icon: Warning state (if supported)
- **Historical Status Chart**
- Mini bar chart showing 24-hour health history
- Time-based visualization (hourly aggregation)
- Color-coded status representation
#### 3. Auto-refresh Functionality
- Toggle switch to enable/disable auto-refresh
- Configurable refresh interval (30s, 1m, 2m, 5m, 10m)
- Visual indicator when auto-refresh is active
- Manual refresh button
#### 4. Configuration Management
- Session-based storage of user preferences (no persistence needed)
- Configurable history granularity (5m, 15m, 30m, 1h) - default: 15 minutes
## Database Schema
### Tables
```sql
-- Services table
CREATE TABLE services (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
address TEXT,
port INTEGER,
tags TEXT, -- JSON array
meta TEXT, -- JSON object
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Health checks table
CREATE TABLE health_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_id TEXT NOT NULL,
check_id TEXT NOT NULL,
check_name TEXT,
status TEXT NOT NULL, -- 'passing', 'warning', 'critical'
output TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (service_id) REFERENCES services (id)
);
-- Configuration table (session-based, optional for defaults)
CREATE TABLE config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Service URLs are generated using pattern: http://{service_name}.service.dc1.consul:{port}
-- Indexes for performance
CREATE INDEX idx_health_checks_service_timestamp ON health_checks (service_id, timestamp);
CREATE INDEX idx_health_checks_timestamp ON health_checks (timestamp);
```
## API Design
### REST Endpoints
```python
# Flask routes
GET /
- Serves main dashboard HTML page
GET /api/services
- Returns list of all services with current health status
- Generated URLs: http://{service_name}.service.dc1.consul:{port}
- Response: Array of service objects with health summary
GET /api/services/<service_id>/history
- Returns historical health data for charts
- Query params: ?granularity=15 (minutes: 5,15,30,60)
- Response: Time-series data for Chart.js
POST /api/config
- Updates session configuration
- Body: { "autoRefresh": true, "refreshInterval": 60, "historyGranularity": 15 }
GET /api/config
- Returns current session configuration
```
## Data Collection Service
### Polling Strategy
```yaml
Consul Polling:
- Interval: 60 seconds
- Consul Address: consul.service.dc1.consul:8500
- Endpoints:
- /v1/agent/services (service discovery)
- /v1/health/service/{service} (health checks)
- No authentication required
- Error handling: Log errors, continue polling
- Expected services: 30-40 services
Data Retention:
- Keep detailed data for 24 hours only (ephemeral storage)
- No long-term aggregation needed
- Database recreated on container restart
```
### Health Check Processing
1. **Data Collection**
- Poll Consul API for service list
- For each service, fetch health check status
- Store raw health check data with timestamps
2. **Status Aggregation**
- Service-level status: Worst status among all checks
- Historical aggregation: Count of passing/warning/critical per time window
3. **Change Detection**
- Compare current status with previous poll
- Trigger notifications/updates on status changes
- Maintain service registration/deregistration events
## Frontend Design
### Main Dashboard Layout
```
┌─────────────────────────────────────────────────┐
│ Consul Service Monitor [⚙️] [🔄] │
├─────────────────────────────────────────────────┤
│ Auto-refresh: [ON/OFF] Interval: [1m ▼] │
│ History granularity: [15m ▼] │
├─────────────────────────────────────────────────┤
│ Service Name │ Status │ URL │ History │
├─────────────────┼────────┼──────────┼───────────┤
│ web-api │ 🟢 │ [link] │ ▆▆█▆█▆▆ │
│ database │ 🔴 │ [link] │ █▆▆▄▂▂▄ │
│ cache-service │ 🟢 │ [link] │ ████████ │
└─────────────────────────────────────────────────┘
```
### Interactive Elements
- **Status Icons**: Visual indicators only (no detailed popup needed)
- **History Charts**: Chart.js mini bar charts with 24-hour data
- **Service Links**: URLs generated as http://{service_name}.service.dc1.consul:{port}
- **Desktop-optimized**: No mobile responsive design required
### Updates
- Periodic AJAX polling for updates
- Configurable refresh intervals (30s, 1m, 2m, 5m, 10m)
- Visual loading indicators during refresh
## Configuration Management
### User Settings (Session-based)
```json
{
"autoRefresh": {
"enabled": false,
"interval": 60,
"options": [30, 60, 120, 300, 600]
},
"display": {
"historyGranularity": 15,
"granularityOptions": [5, 15, 30, 60]
}
}
```
### System Configuration
```python
# Flask configuration
CONSUL_HOST = "consul.service.dc1.consul"
CONSUL_PORT = 8500
DATABASE_PATH = ":memory:" # Ephemeral SQLite
POLL_INTERVAL = 60 # seconds
MAX_SERVICES = 50 # Safety limit
```
## Deployment Considerations
### Docker Deployment
```dockerfile
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Expose port
EXPOSE 5000
# Set environment variables
ENV FLASK_APP=app.py
ENV FLASK_ENV=production
ENV CONSUL_HOST=consul.service.dc1.consul
ENV CONSUL_PORT=8500
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]
```
### Python Dependencies (requirements.txt)
```
Flask==2.3.3
requests==2.31.0
sqlite3 # Built-in
APScheduler==3.10.4 # For background polling
```
### Environment Variables
- `CONSUL_HOST`: Consul server hostname (default: consul.service.dc1.consul)
- `CONSUL_PORT`: Consul server port (default: 8500)
- `FLASK_PORT`: Web server port (default: 5000)
- `POLL_INTERVAL`: Health check polling interval in seconds (default: 60)
### Health Checks
The application should expose its own health endpoint:
- `GET /health`: Returns application health status
- `GET /metrics`: Prometheus-style metrics (optional)
## Security Considerations
1. **Consul Access**: No authentication required for your setup
2. **Database**: Ephemeral SQLite in container memory
3. **Web Interface**: Open dashboard, no authentication needed
4. **Input Validation**: Sanitize service names and configuration inputs
5. **Container Security**: Run as non-root user in container
## Future Enhancements
- **Alerting**: Email/Slack notifications on service failures (mentioned as future feature)
- **Service Filtering**: Search and filter capabilities for larger service lists
- **Service Details**: Detailed health check information popup/modal
- **Themes**: Dark/light mode toggle
- **Export**: Export health data as CSV/JSON
- **Custom Time Ranges**: Configurable history periods beyond 24 hours
## Development Phases
### Phase 1: Core Functionality
- Basic Consul integration
- SQLite database setup
- Simple web interface
- Manual refresh capability
### Phase 2: Real-time Features
- Auto-refresh functionality
- WebSocket integration
- Historical data visualization
- Configuration persistence
### Phase 3: Enhanced UX
- Responsive design
- Advanced filtering
- Performance optimizations
- Error handling improvements
### Phase 4: Production Ready
- Docker deployment
- Security hardening
- Monitoring and logging
- Documentation and testing

477
plan_phase1.md Normal file
View File

@@ -0,0 +1,477 @@
# Phase 1 Implementation Plan - Consul Service Monitor
## Overview
Implement the core functionality for a Flask-based Consul service monitoring dashboard. This phase focuses on basic Consul integration, SQLite database setup, and a simple web interface with manual refresh capability.
## Project Structure
Create the following directory structure:
```
consul-monitor/
├── app.py # Main Flask application
├── consul_client.py # Consul API integration
├── database.py # SQLite database operations
├── requirements.txt # Python dependencies
├── templates/
│ └── index.html # Main dashboard template
├── static/
│ ├── css/
│ │ └── style.css # Dashboard styles
│ └── js/
│ └── app.js # Frontend JavaScript with Alpine.js
└── Dockerfile # Container configuration
```
## Dependencies (requirements.txt)
```
Flask==2.3.3
requests==2.31.0
```
## Database Implementation (database.py)
### Database Schema
Implement exactly these SQLite tables:
```sql
-- Services table
CREATE TABLE IF NOT EXISTS services (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
address TEXT,
port INTEGER,
tags TEXT, -- Store as JSON string
meta TEXT, -- Store as JSON string
first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
last_seen DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Health checks table
CREATE TABLE IF NOT EXISTS health_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_id TEXT NOT NULL,
check_name TEXT,
status TEXT NOT NULL, -- 'passing', 'warning', 'critical'
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (service_id) REFERENCES services (id)
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_health_checks_service_timestamp
ON health_checks (service_id, timestamp);
```
### Database Functions
Create these specific functions in database.py:
1. **`init_database()`**: Initialize SQLite database with the above schema
2. **`upsert_service(service_data)`**: Insert or update service record
- Parameters: dictionary with id, name, address, port, tags (as JSON string), meta (as JSON string)
- Update last_seen timestamp on existing records
3. **`insert_health_check(service_id, check_name, status)`**: Insert health check record
4. **`get_all_services_with_health()`**: Return all services with their latest health status
- Join services table with latest health_checks record per service
- Return list of dictionaries with service details + current health status
5. **`get_service_history(service_id, hours=24)`**: Get health history for specific service
6. **`is_database_available()`**: Test database connectivity
## Consul Client Implementation (consul_client.py)
### Configuration
Set these constants:
```python
CONSUL_HOST = "consul.service.dc1.consul"
CONSUL_PORT = 8500
CONSUL_BASE_URL = f"http://{CONSUL_HOST}:{CONSUL_PORT}"
```
### Consul Functions
Implement these specific functions:
1. **`get_consul_services()`**:
- Call `/v1/agent/services` endpoint
- Return dictionary of services or raise exception on failure
- Handle HTTP errors and connection timeouts
2. **`get_service_health(service_name)`**:
- Call `/v1/health/service/{service_name}` endpoint
- Parse health check results
- Return list of health checks with check_name and status
- Handle cases where service has no health checks
3. **`is_consul_available()`**:
- Test connection to Consul
- Return True/False boolean
4. **`fetch_all_service_data()`**:
- Orchestrate calls to get_consul_services() and get_service_health()
- Return combined service and health data
- Handle partial failures gracefully
## Flask Application (app.py)
### Application Configuration
```python
from flask import Flask, render_template, jsonify
import sqlite3
import json
from datetime import datetime
```
### Flask Routes
Implement exactly these routes:
1. **`GET /`**:
- Render main dashboard using index.html template
- Pass initial service data to template
- Handle database/consul errors gracefully
2. **`GET /api/services`**:
- Return JSON array of all services with current health status
- Include generated URLs using pattern: `http://{service_name}.service.dc1.consul:{port}`
- Response format:
```json
{
"status": "success|error",
"consul_available": true|false,
"services": [
{
"id": "service-id",
"name": "service-name",
"address": "10.0.0.1",
"port": 8080,
"url": "http://service-name.service.dc1.consul:8080",
"tags": ["tag1", "tag2"],
"current_status": "passing|warning|critical|unknown",
"last_check": "2024-01-01T12:00:00"
}
],
"error": "error message if any"
}
```
3. **`GET /health`**:
- Return application health status
- Test both database and Consul connectivity
- Response format:
```json
{
"status": "healthy|unhealthy",
"consul": "connected|disconnected",
"database": "available|unavailable",
"timestamp": "2024-01-01T12:00:00"
}
```
### Data Flow Logic
Implement this exact flow in the `/api/services` endpoint:
1. Try to fetch fresh data from Consul using `fetch_all_service_data()`
2. If successful:
- Update database with new service and health data
- Return fresh data with `consul_available: true`
3. If Consul fails:
- Retrieve cached data from database using `get_all_services_with_health()`
- Return cached data with `consul_available: false` and error message
4. If both fail:
- Return error response with empty services array
## Frontend Implementation
### HTML Template (templates/index.html)
Create dashboard with this structure:
```html
<!DOCTYPE html>
<html>
<head>
<title>Consul Service Monitor</title>
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
<script src="https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body x-data="serviceMonitor()">
<div class="header">
<h1>Consul Service Monitor</h1>
<div class="controls">
<button @click="refreshServices()" :disabled="loading">
<span x-show="!loading">🔄 Refresh</span>
<span x-show="loading">Loading...</span>
</button>
</div>
</div>
<div x-show="error" class="error-banner" x-text="error"></div>
<div x-show="!consulAvailable" class="warning-banner">
⚠️ Consul connection failed - showing cached data
</div>
<div class="services-container">
<table class="services-table">
<thead>
<tr>
<th>Service Name</th>
<th>Status</th>
<th>URL</th>
<th>Tags</th>
</tr>
</thead>
<tbody>
<template x-for="service in services" :key="service.id">
<tr>
<td x-text="service.name"></td>
<td>
<span class="status-icon"
:class="getStatusClass(service.current_status)"
x-text="getStatusEmoji(service.current_status)">
</span>
</td>
<td>
<a :href="service.url" target="_blank" x-text="service.url"></a>
</td>
<td>
<template x-for="tag in service.tags">
<span class="tag" x-text="tag"></span>
</template>
</td>
</tr>
</template>
</tbody>
</table>
<div x-show="services.length === 0 && !loading" class="no-services">
No services found
</div>
</div>
</body>
</html>
```
### Alpine.js JavaScript (static/js/app.js)
```javascript
function serviceMonitor() {
return {
services: [],
loading: false,
error: null,
consulAvailable: true,
init() {
this.refreshServices();
},
async refreshServices() {
this.loading = true;
this.error = null;
try {
const response = await fetch('/api/services');
const data = await response.json();
if (data.status === 'success') {
this.services = data.services;
this.consulAvailable = data.consul_available;
} else {
this.error = data.error || 'Failed to fetch services';
this.services = data.services || [];
this.consulAvailable = data.consul_available;
}
} catch (err) {
this.error = 'Network error: ' + err.message;
this.services = [];
this.consulAvailable = false;
} finally {
this.loading = false;
}
},
getStatusClass(status) {
return {
'status-passing': status === 'passing',
'status-warning': status === 'warning',
'status-critical': status === 'critical',
'status-unknown': !status || status === 'unknown'
};
},
getStatusEmoji(status) {
switch(status) {
case 'passing': return '🟢';
case 'warning': return '🟡';
case 'critical': return '🔴';
default: return '⚪';
}
}
}
}
```
### CSS Styling (static/css/style.css)
Implement these specific styles:
```css
/* Basic reset and layout */
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: Arial, sans-serif; background: #f5f5f5; }
/* Header */
.header {
background: white;
padding: 1rem 2rem;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
display: flex;
justify-content: space-between;
align-items: center;
}
/* Alert banners */
.error-banner, .warning-banner {
padding: 0.75rem 2rem;
margin: 0;
font-weight: bold;
}
.error-banner { background: #fee; color: #c33; }
.warning-banner { background: #fff3cd; color: #856404; }
/* Services table */
.services-container { padding: 2rem; }
.services-table {
width: 100%;
background: white;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
border-collapse: collapse;
}
.services-table th, .services-table td {
padding: 1rem;
text-align: left;
border-bottom: 1px solid #eee;
}
.services-table th { background: #f8f9fa; font-weight: bold; }
/* Status indicators */
.status-icon { font-size: 1.2rem; }
.status-passing { color: #28a745; }
.status-warning { color: #ffc107; }
.status-critical { color: #dc3545; }
.status-unknown { color: #6c757d; }
/* Tags */
.tag {
display: inline-block;
background: #e9ecef;
padding: 0.25rem 0.5rem;
border-radius: 4px;
font-size: 0.875rem;
margin-right: 0.5rem;
}
/* Buttons */
button {
background: #007bff;
color: white;
border: none;
padding: 0.5rem 1rem;
border-radius: 4px;
cursor: pointer;
}
button:hover { background: #0056b3; }
button:disabled { background: #6c757d; cursor: not-allowed; }
```
## Error Handling Requirements
### Consul Connection Errors
- Catch `requests.exceptions.ConnectionError` and `requests.exceptions.Timeout`
- Log errors but continue serving cached data
- Display connection status in UI
### Database Errors
- Handle SQLite database lock errors
- Graceful degradation when database is unavailable
- Return appropriate HTTP status codes
### Data Validation
- Validate service data structure from Consul API
- Handle missing or malformed service records
- Default to 'unknown' status for services without health checks
## Testing Checklist
Before considering Phase 1 complete, verify:
1. **Database Operations**:
- [ ] Database tables created correctly
- [ ] Services can be inserted/updated
- [ ] Health checks are stored with timestamps
- [ ] Queries return expected data structure
2. **Consul Integration**:
- [ ] Can fetch service list from Consul
- [ ] Can fetch health status for each service
- [ ] Handles Consul connection failures gracefully
- [ ] Service URLs generated correctly
3. **Web Interface**:
- [ ] Dashboard loads without errors
- [ ] Services displayed in table format
- [ ] Status icons show correct colors
- [ ] Refresh button updates data via AJAX
- [ ] Error messages display when appropriate
4. **Error Scenarios**:
- [ ] App starts when Consul is unavailable
- [ ] Shows cached data when Consul fails
- [ ] Displays appropriate error messages
- [ ] Recovers when Consul comes back online
## Docker Configuration (Dockerfile)
```dockerfile
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Create non-root user
RUN useradd -m appuser && chown -R appuser:appuser /app
USER appuser
# Expose port
EXPOSE 5000
# Environment variables
ENV FLASK_APP=app.py
ENV FLASK_ENV=production
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:5000/health', timeout=5)" || exit 1
CMD ["python", "-m", "flask", "run", "--host=0.0.0.0"]
```
## Implementation Order
Follow this exact sequence:
1. Create project structure and requirements.txt
2. Implement database.py with all functions and test database operations
3. Implement consul_client.py and test Consul connectivity
4. Create basic Flask app.py with health endpoint
5. Add /api/services endpoint with full error handling
6. Create HTML template with Alpine.js integration
7. Add CSS styling for professional appearance
8. Test complete workflow: Consul → Database → API → Frontend
9. Create Dockerfile and test containerized deployment
10. Verify all error scenarios work as expected
## Success Criteria
Phase 1 is complete when:
- Application starts successfully in Docker container
- Dashboard displays list of services from Consul
- Manual refresh button updates service data
- Application gracefully handles Consul outages
- All services show correct health status with colored indicators
- Generated service URLs follow the specified pattern
- Error messages display appropriately in the UI