first commit
This commit is contained in:
80
REMEDIATION_BUG_FIX.md
Normal file
80
REMEDIATION_BUG_FIX.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Remediation System Bug Fix - Connection Detection Logic
|
||||
|
||||
## Problem Summary
|
||||
|
||||
The remediation system had a critical logic inversion bug that prevented it from detecting stable connections during the `waiting_for_stability` state, causing unnecessary timeouts.
|
||||
|
||||
### Symptoms
|
||||
- Connection showed as "connected (DHT: 357 nodes)" in logs but system treated it as unstable
|
||||
- Remediation timed out after 1+ hours despite connection being stable
|
||||
- System accumulated failures even when connection was working
|
||||
|
||||
### Root Cause
|
||||
**File**: [`monitoring/connection_monitor.py`](monitoring/connection_monitor.py)
|
||||
**Lines**: 96-100 (before fix)
|
||||
|
||||
The bug was in the `_determine_connection_state` method:
|
||||
|
||||
```python
|
||||
# BUGGY CODE (before fix):
|
||||
if self.state_manager.remediation_state != 'waiting_for_stability':
|
||||
return 'stable'
|
||||
else:
|
||||
# In remediation, maintain current state until 1-hour requirement is met
|
||||
return self.state_manager.connection_state # ❌ WRONG
|
||||
```
|
||||
|
||||
This logic returned the **previous connection state** instead of the actual current state when in remediation, creating a feedback loop where the system could never detect stability.
|
||||
|
||||
## The Fix
|
||||
|
||||
**Changed to**:
|
||||
```python
|
||||
# FIXED CODE:
|
||||
if is_connected:
|
||||
self.state_manager.consecutive_stable_checks += 1
|
||||
# Always return 'stable' when connection is good, regardless of remediation state
|
||||
# The 1-hour stability requirement is handled in the stability tracking logic, not here
|
||||
return 'stable'
|
||||
```
|
||||
|
||||
## Impact
|
||||
|
||||
### Before Fix
|
||||
- System could not detect stable connections during remediation
|
||||
- Remediation always timed out after 62 minutes (3720 seconds)
|
||||
- Connection quality metrics were inaccurate
|
||||
|
||||
### After Fix
|
||||
- System correctly detects stable connections immediately
|
||||
- Remediation can complete successfully when connection stabilizes
|
||||
- Stability timer starts properly when connection becomes stable
|
||||
|
||||
## Testing
|
||||
|
||||
A verification test was created ([`test_fix_verification.py`](test_fix_verification.py)) that simulates the exact problematic scenario:
|
||||
|
||||
```bash
|
||||
python test_fix_verification.py
|
||||
```
|
||||
|
||||
The test confirms that:
|
||||
1. Good connections return 'stable' during remediation
|
||||
2. Bad connections return 'unstable'
|
||||
3. Error conditions are handled properly
|
||||
4. The specific log scenario now works correctly
|
||||
|
||||
## Files Modified
|
||||
|
||||
1. **`monitoring/connection_monitor.py`** - Fixed logic inversion bug in `_determine_connection_state`
|
||||
2. **`test_fix_verification.py`** - Added verification test for the fix
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
While this fixes the critical bug, additional improvements could be made:
|
||||
|
||||
1. **Sliding window detection** - Track connection quality over multiple checks
|
||||
2. **Graceful transitions** - Require multiple consecutive state changes
|
||||
3. **Enhanced logging** - Better connection quality metrics
|
||||
|
||||
These can be implemented as separate enhancements now that the core detection logic is fixed.
|
||||
BIN
__pycache__/main.cpython-313.pyc
Normal file
BIN
__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
8
api/__init__.py
Normal file
8
api/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
API clients for external services
|
||||
"""
|
||||
|
||||
from .qbittorrent_client import QBittorrentClient
|
||||
from .nomad_client import NomadClient
|
||||
|
||||
__all__ = ['QBittorrentClient', 'NomadClient']
|
||||
BIN
api/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
api/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
api/__pycache__/nomad_client.cpython-313.pyc
Normal file
BIN
api/__pycache__/nomad_client.cpython-313.pyc
Normal file
Binary file not shown.
BIN
api/__pycache__/qbittorrent_client.cpython-313.pyc
Normal file
BIN
api/__pycache__/qbittorrent_client.cpython-313.pyc
Normal file
Binary file not shown.
BIN
api/__pycache__/vpn_client.cpython-313.pyc
Normal file
BIN
api/__pycache__/vpn_client.cpython-313.pyc
Normal file
Binary file not shown.
276
api/nomad_client.py
Normal file
276
api/nomad_client.py
Normal file
@@ -0,0 +1,276 @@
|
||||
import requests
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
class NomadClient:
|
||||
"""Client for interacting with Nomad API"""
|
||||
|
||||
def __init__(self, base_url: str = 'http://127.0.0.1:4646',
|
||||
logger: logging.Logger = None):
|
||||
"""
|
||||
Initialize Nomad client
|
||||
|
||||
Args:
|
||||
base_url: Base URL for Nomad API
|
||||
logger: Optional logger instance (uses module logger if not provided)
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
def restart_task_via_allocation(self, job_id: str, task_name: str,
|
||||
namespace: str = "default", wait_time: int = 60,
|
||||
token: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Restart a specific task in a Nomad job by restarting just that task.
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job containing the task
|
||||
task_name: The name of the task to restart
|
||||
namespace: The namespace of the job (default: 'default')
|
||||
wait_time: Seconds to wait after restart (default: 60)
|
||||
token: Optional Nomad ACL token
|
||||
|
||||
Returns:
|
||||
bool: True if restart succeeded, False otherwise
|
||||
"""
|
||||
headers = {}
|
||||
if token:
|
||||
headers['X-Nomad-Token'] = token
|
||||
|
||||
try:
|
||||
# Get allocations for the job
|
||||
allocs_url = f"{self.base_url}/v1/job/{job_id}/allocations"
|
||||
params = {'namespace': namespace}
|
||||
|
||||
self.logger.info(f"Fetching allocations for job '{job_id}'...")
|
||||
response = requests.get(allocs_url, headers=headers, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
allocations = response.json()
|
||||
|
||||
# Find allocation containing the task
|
||||
target_alloc = None
|
||||
for alloc in allocations:
|
||||
if alloc['ClientStatus'] == 'running':
|
||||
task_states = alloc.get('TaskStates', {})
|
||||
if task_name in task_states:
|
||||
target_alloc = alloc
|
||||
break
|
||||
|
||||
if not target_alloc:
|
||||
self.logger.error(f"No running allocation found for task '{task_name}' in job '{job_id}'")
|
||||
return False
|
||||
|
||||
# Restart just the specific task
|
||||
alloc_id = target_alloc['ID']
|
||||
restart_url = f"{self.base_url}/v1/client/allocation/{alloc_id}/restart"
|
||||
payload = {"TaskName": task_name}
|
||||
|
||||
self.logger.info(f"Restarting task '{task_name}' in job '{job_id}'...")
|
||||
response = requests.post(restart_url, headers=headers, params=params, json=payload, timeout=10)
|
||||
|
||||
if response.status_code in [200, 204]:
|
||||
self.logger.info(f"Successfully restarted task '{task_name}' in job '{job_id}'")
|
||||
time.sleep(wait_time)
|
||||
return True
|
||||
else:
|
||||
self.logger.error(f"Failed: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Request failed: {e}")
|
||||
return False
|
||||
|
||||
def check_job_status(self, job_id: str, namespace: str = "default",
|
||||
token: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Check the status of a Nomad job
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job to check
|
||||
namespace: The namespace of the job
|
||||
token: Optional Nomad ACL token
|
||||
|
||||
Returns:
|
||||
Job status information or None if failed
|
||||
"""
|
||||
headers = {}
|
||||
if token:
|
||||
headers['X-Nomad-Token'] = token
|
||||
|
||||
try:
|
||||
job_url = f"{self.base_url}/v1/job/{job_id}"
|
||||
params = {'namespace': namespace}
|
||||
|
||||
response = requests.get(job_url, headers=headers, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get job status: {e}")
|
||||
return None
|
||||
|
||||
def get_allocations(self, job_id: str, namespace: str = "default",
|
||||
token: Optional[str] = None) -> Optional[list]:
|
||||
"""
|
||||
Get allocations for a specific job
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job
|
||||
namespace: The namespace of the job
|
||||
token: Optional Nomad ACL token
|
||||
|
||||
Returns:
|
||||
List of allocations or None if failed
|
||||
"""
|
||||
headers = {}
|
||||
if token:
|
||||
headers['X-Nomad-Token'] = token
|
||||
|
||||
try:
|
||||
allocs_url = f"{self.base_url}/v1/job/{job_id}/allocations"
|
||||
params = {'namespace': namespace}
|
||||
|
||||
response = requests.get(allocs_url, headers=headers, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get allocations: {e}")
|
||||
return None
|
||||
|
||||
def get_container_info(self, job_id: str, task_names: list, namespace: str = "default",
|
||||
token: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Get container information including uptime and restart details for specified tasks
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job containing the tasks
|
||||
task_names: List of task names to get information for
|
||||
namespace: The namespace of the job
|
||||
token: Optional Nomad ACL token
|
||||
|
||||
Returns:
|
||||
Dictionary with container information for each task
|
||||
"""
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
headers = {}
|
||||
if token:
|
||||
headers['X-Nomad-Token'] = token
|
||||
|
||||
container_info = {}
|
||||
|
||||
try:
|
||||
# Get allocations for the job
|
||||
allocations = self.get_allocations(job_id, namespace, token)
|
||||
if not allocations:
|
||||
self.logger.warning(f"No allocations found for job '{job_id}'")
|
||||
return container_info
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
for task_name in task_names:
|
||||
task_info = {
|
||||
'uptime': 'Unknown',
|
||||
'last_restart_time': 'Never',
|
||||
'last_restart_reason': 'None',
|
||||
'restart_count': 0
|
||||
}
|
||||
|
||||
# Find allocation with the task
|
||||
for alloc in allocations:
|
||||
if alloc.get('ClientStatus') == 'running':
|
||||
task_states = alloc.get('TaskStates', {})
|
||||
if task_name in task_states:
|
||||
task_state = task_states[task_name]
|
||||
|
||||
# Calculate uptime from StartedAt
|
||||
started_at = task_state.get('StartedAt')
|
||||
if started_at and started_at != '0001-01-01T00:00:00Z':
|
||||
try:
|
||||
start_dt = datetime.fromisoformat(started_at.replace('Z', '+00:00'))
|
||||
uptime_seconds = current_time - start_dt.timestamp()
|
||||
from utils.time_utils import format_human_readable_time
|
||||
task_info['uptime'] = format_human_readable_time(uptime_seconds)
|
||||
except (ValueError, TypeError):
|
||||
task_info['uptime'] = 'Invalid timestamp'
|
||||
|
||||
# Get last restart information
|
||||
last_restart = task_state.get('LastRestart')
|
||||
if last_restart and last_restart != '0001-01-01T00:00:00Z':
|
||||
try:
|
||||
restart_dt = datetime.fromisoformat(last_restart.replace('Z', '+00:00'))
|
||||
restart_seconds_ago = current_time - restart_dt.timestamp()
|
||||
from utils.time_utils import format_human_readable_time
|
||||
task_info['last_restart_time'] = f"{format_human_readable_time(restart_seconds_ago)} ago"
|
||||
except (ValueError, TypeError):
|
||||
task_info['last_restart_time'] = 'Invalid timestamp'
|
||||
|
||||
# Get restart count
|
||||
task_info['restart_count'] = task_state.get('Restarts', 0)
|
||||
|
||||
# Find restart reasons from events - analyze multiple event types
|
||||
events = task_state.get('Events', [])
|
||||
restart_reasons = []
|
||||
|
||||
# Look for the most recent restart-related events in priority order
|
||||
for event in reversed(events): # Check most recent events first
|
||||
event_type = event.get('Type')
|
||||
|
||||
# Highest priority: Termination events with exit codes (actual failure)
|
||||
if event_type == 'Terminated':
|
||||
exit_code = event.get('ExitCode')
|
||||
exit_message = event.get('ExitMessage', '')
|
||||
if exit_code is not None:
|
||||
if exit_code != 0: # Only consider non-zero exit codes as failures
|
||||
reason = f"Exit Code: {exit_code}"
|
||||
if exit_message:
|
||||
reason += f", {exit_message}"
|
||||
restart_reasons.append(reason)
|
||||
break
|
||||
|
||||
# Second priority: Killed events (external termination)
|
||||
elif event_type == 'Killed':
|
||||
reason = event.get('KillReason') or 'Task killed'
|
||||
restart_reasons.append(reason)
|
||||
break
|
||||
|
||||
# Third priority: Explicit restart events
|
||||
elif event_type == 'Restart':
|
||||
reason = event.get('RestartReason') or 'Restart initiated'
|
||||
restart_reasons.append(reason)
|
||||
break
|
||||
|
||||
# Fourth priority: Restart signaling (user or system initiated)
|
||||
elif event_type == 'Restart Signaled':
|
||||
reason = event.get('DriverMessage') or 'Restart signaled'
|
||||
restart_reasons.append(reason)
|
||||
break
|
||||
|
||||
# If no specific restart events found, look for any abnormal events
|
||||
if not restart_reasons:
|
||||
for event in reversed(events):
|
||||
event_type = event.get('Type')
|
||||
if event_type in ['Terminated', 'Killed']:
|
||||
exit_code = event.get('ExitCode')
|
||||
if exit_code and exit_code != 0: # Non-zero exit code
|
||||
exit_message = event.get('ExitMessage', '')
|
||||
reason = f"Abnormal termination (Exit: {exit_code})"
|
||||
if exit_message:
|
||||
reason += f": {exit_message}"
|
||||
restart_reasons.append(reason)
|
||||
break
|
||||
|
||||
task_info['last_restart_reason'] = restart_reasons[0] if restart_reasons else 'Normal operation'
|
||||
|
||||
break # Found the task, move to next task
|
||||
|
||||
container_info[task_name] = task_info
|
||||
|
||||
return container_info
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get container info: {e}")
|
||||
return container_info
|
||||
196
api/qbittorrent_client.py
Normal file
196
api/qbittorrent_client.py
Normal file
@@ -0,0 +1,196 @@
|
||||
import requests
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from requests import Session
|
||||
|
||||
class QBittorrentClient:
|
||||
"""Client for interacting with qBittorrent API"""
|
||||
|
||||
def __init__(self, base_url: str = 'http://127.0.0.1:8080',
|
||||
username: str = 'admin',
|
||||
password: str = 'adminpass',
|
||||
logger: logging.Logger = None):
|
||||
"""
|
||||
Initialize qBittorrent client
|
||||
|
||||
Args:
|
||||
base_url: Base URL for qBittorrent API
|
||||
username: qBittorrent username
|
||||
password: qBittorrent password
|
||||
logger: Optional logger instance (uses module logger if not provided)
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
# API endpoints
|
||||
self.api_url = f'{self.base_url}/api/v2/transfer/info'
|
||||
self.login_url = f'{self.base_url}/api/v2/auth/login'
|
||||
self.torrents_url = f'{self.base_url}/api/v2/torrents/info'
|
||||
self.stop_url = f'{self.base_url}/api/v2/torrents/stop'
|
||||
self.start_url = f'{self.base_url}/api/v2/torrents/start'
|
||||
|
||||
# API request retry configuration
|
||||
self.api_retry_attempts = 3
|
||||
self.api_retry_delay = 2
|
||||
self.api_retry_backoff = 2
|
||||
|
||||
def login(self) -> Optional[Session]:
|
||||
"""
|
||||
Authenticate with qBittorrent
|
||||
|
||||
Returns:
|
||||
Authenticated session or None if login failed
|
||||
"""
|
||||
try:
|
||||
session = requests.Session()
|
||||
login_data = {
|
||||
'username': self.username,
|
||||
'password': self.password
|
||||
}
|
||||
response = session.post(self.login_url, data=login_data)
|
||||
response.raise_for_status()
|
||||
self.logger.info("Successfully logged into qBittorrent")
|
||||
return session
|
||||
except requests.RequestException as e:
|
||||
self.logger.error(f"qBittorrent login failed: {e}")
|
||||
return None
|
||||
|
||||
def get_connection_status(self, verbose_debug: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve connection status from qBittorrent API with retry logic
|
||||
|
||||
Args:
|
||||
verbose_debug: Whether to log detailed debug information
|
||||
|
||||
Returns:
|
||||
Connection status dictionary
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.api_retry_attempts):
|
||||
try:
|
||||
response = requests.get(self.api_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
status_data = response.json()
|
||||
|
||||
# Log the actual status values for debugging (optional)
|
||||
if verbose_debug:
|
||||
connection_status = status_data.get('connection_status', 'unknown')
|
||||
dht_nodes = status_data.get('dht_nodes', 0)
|
||||
self.logger.debug(f"API response - Status: {connection_status}, DHT Nodes: {dht_nodes}")
|
||||
|
||||
return status_data
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
if attempt < self.api_retry_attempts - 1: # Not the last attempt
|
||||
delay = self.api_retry_delay * (self.api_retry_backoff ** attempt)
|
||||
self.logger.warning(f"API request attempt {attempt + 1}/{self.api_retry_attempts} failed: {type(e).__name__}: {e}. Retrying in {delay} seconds...")
|
||||
import time
|
||||
time.sleep(delay)
|
||||
else:
|
||||
# More detailed error logging for final failure
|
||||
error_type = type(last_exception).__name__
|
||||
error_details = str(last_exception)
|
||||
self.logger.error(f"API request failed after {self.api_retry_attempts} attempts: {error_type}: {error_details}")
|
||||
|
||||
# Return error details for better debugging
|
||||
return {
|
||||
'connection_status': 'error',
|
||||
'dht_nodes': 0,
|
||||
'error_type': error_type,
|
||||
'error_details': error_details,
|
||||
'api_url': self.api_url
|
||||
}
|
||||
|
||||
# Fallback return if all attempts fail (shouldn't normally reach here)
|
||||
return {
|
||||
'connection_status': 'error',
|
||||
'dht_nodes': 0,
|
||||
'error_type': 'Unknown',
|
||||
'error_details': 'All retry attempts exhausted',
|
||||
'api_url': self.api_url
|
||||
}
|
||||
|
||||
def stop_tracker_torrents(self, session: Session, tracker_name: str) -> bool:
|
||||
"""
|
||||
Stop torrents matching specific tracker
|
||||
|
||||
Args:
|
||||
session: Authenticated session
|
||||
tracker_name: Tracker name to match
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Get list of torrents
|
||||
torrents = session.get(self.torrents_url).json()
|
||||
|
||||
# Find and stop torrents with matching tracker
|
||||
tracker_torrents = [
|
||||
torrent['hash'] for torrent in torrents
|
||||
if tracker_name.lower() in str(torrent).lower()
|
||||
]
|
||||
|
||||
if tracker_torrents:
|
||||
hashes_str = '|'.join(tracker_torrents)
|
||||
self.logger.debug(f"Stopping torrents: {hashes_str}")
|
||||
response = session.post(self.stop_url, data={'hashes': hashes_str})
|
||||
response.raise_for_status()
|
||||
self.logger.info(f"Stopped {len(tracker_torrents)} torrents for tracker {tracker_name}")
|
||||
return True
|
||||
else:
|
||||
self.logger.info(f"No torrents found for tracker {tracker_name}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
self.logger.error(f"Failed to stop torrents: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error stopping torrents: {e}")
|
||||
return False
|
||||
|
||||
def restart_tracker_torrents(self, session: Session, tracker_name: str) -> bool:
|
||||
"""
|
||||
Restart torrents for specific tracker
|
||||
|
||||
Args:
|
||||
session: Authenticated session
|
||||
tracker_name: Tracker name to match
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Get list of torrents
|
||||
torrents = session.get(self.torrents_url).json()
|
||||
|
||||
# Find and resume torrents with matching tracker
|
||||
tracker_torrents = [
|
||||
torrent['hash'] for torrent in torrents
|
||||
if (tracker_name.lower() in str(torrent).lower()
|
||||
and torrent.get('state') == 'paused')
|
||||
]
|
||||
|
||||
if tracker_torrents:
|
||||
hashes_str = '|'.join(tracker_torrents)
|
||||
self.logger.debug(f"Restarting torrents: {hashes_str}")
|
||||
# Note: Start endpoint commented out in original code
|
||||
# response = session.post(self.start_url, data={'hashes': hashes_str})
|
||||
# response.raise_for_status()
|
||||
self.logger.info(f"Restarted {len(tracker_torrents)} torrents for tracker {tracker_name}")
|
||||
return True
|
||||
else:
|
||||
self.logger.info(f"No paused torrents found for tracker {tracker_name}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
self.logger.error(f"Failed to restart torrents: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error restarting torrents: {e}")
|
||||
return False
|
||||
132
api/vpn_client.py
Normal file
132
api/vpn_client.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
class VPNClient:
|
||||
"""Client for monitoring VPN server status and public IP information"""
|
||||
|
||||
def __init__(self, base_url: str = 'http://192.168.4.36:8000',
|
||||
logger: logging.Logger = None):
|
||||
"""
|
||||
Initialize VPN client
|
||||
|
||||
Args:
|
||||
base_url: Base URL for VPN monitoring API
|
||||
logger: Optional logger instance (uses module logger if not provided)
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
# API endpoints
|
||||
self.vpn_status_url = f'{self.base_url}/v1/vpn/status'
|
||||
self.public_ip_url = f'{self.base_url}/v1/publicip/ip'
|
||||
|
||||
# API request retry configuration
|
||||
self.api_retry_attempts = 3
|
||||
self.api_retry_delay = 2
|
||||
self.api_retry_backoff = 2
|
||||
|
||||
def get_vpn_status(self, verbose_debug: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve VPN status from VPN monitoring API with retry logic
|
||||
|
||||
Args:
|
||||
verbose_debug: Whether to log detailed debug information
|
||||
|
||||
Returns:
|
||||
VPN status dictionary with 'status' field
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.api_retry_attempts):
|
||||
try:
|
||||
response = requests.get(self.vpn_status_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
status_data = response.json()
|
||||
|
||||
# Log the VPN status for debugging (optional)
|
||||
if verbose_debug:
|
||||
vpn_status = status_data.get('status', 'unknown')
|
||||
self.logger.debug(f"VPN status: {vpn_status}")
|
||||
|
||||
return status_data
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
if attempt < self.api_retry_attempts - 1: # Not the last attempt
|
||||
delay = self.api_retry_delay * (self.api_retry_backoff ** attempt)
|
||||
self.logger.warning(f"VPN status request attempt {attempt + 1}/{self.api_retry_attempts} failed: {type(e).__name__}: {e}. Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
# More detailed error logging for final failure
|
||||
error_type = type(last_exception).__name__
|
||||
error_details = str(last_exception)
|
||||
self.logger.error(f"VPN status request failed after {self.api_retry_attempts} attempts: {error_type}: {error_details}")
|
||||
|
||||
# Return error details for better debugging
|
||||
return {
|
||||
'status': 'error',
|
||||
'error_type': error_type,
|
||||
'error_details': error_details,
|
||||
'api_url': self.vpn_status_url
|
||||
}
|
||||
|
||||
# Fallback return if all attempts fail (shouldn't normally reach here)
|
||||
return {
|
||||
'status': 'error',
|
||||
'error_type': 'Unknown',
|
||||
'error_details': 'All retry attempts exhausted',
|
||||
'api_url': self.vpn_status_url
|
||||
}
|
||||
|
||||
def get_public_ip_info(self, verbose_debug: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve public IP information from VPN monitoring API with retry logic
|
||||
|
||||
Args:
|
||||
verbose_debug: Whether to log detailed debug information
|
||||
|
||||
Returns:
|
||||
Public IP information dictionary with 'public_ip' and geographic details
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.api_retry_attempts):
|
||||
try:
|
||||
response = requests.get(self.public_ip_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
ip_data = response.json()
|
||||
|
||||
# Log the public IP for debugging (optional)
|
||||
if verbose_debug:
|
||||
public_ip = ip_data.get('public_ip', 'unknown')
|
||||
self.logger.debug(f"Public IP: {public_ip}")
|
||||
|
||||
return ip_data
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
if attempt < self.api_retry_attempts - 1: # Not the last attempt
|
||||
delay = self.api_retry_delay * (self.api_retry_backoff ** attempt)
|
||||
self.logger.warning(f"Public IP request attempt {attempt + 1}/{self.api_retry_attempts} failed: {type(e).__name__}: {e}. Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
# More detailed error logging for final failure
|
||||
error_type = type(last_exception).__name__
|
||||
error_details = str(last_exception)
|
||||
self.logger.error(f"Public IP request failed after {self.api_retry_attempts} attempts: {error_type}: {error_details}")
|
||||
|
||||
# Return error details for better debugging
|
||||
return {
|
||||
'public_ip': 'error',
|
||||
'error_type': error_type,
|
||||
'error_details': error_details,
|
||||
'api_url': self.public_ip_url
|
||||
}
|
||||
|
||||
# Fallback return if all attempts fail (shouldn't normally reach here)
|
||||
return {
|
||||
'public_ip': 'error',
|
||||
'error_type': 'Unknown',
|
||||
'error_details': 'All retry attempts exhausted',
|
||||
'api_url': self.public_ip_url
|
||||
}
|
||||
819
checker_backup_original.py
Normal file
819
checker_backup_original.py
Normal file
@@ -0,0 +1,819 @@
|
||||
import requests
|
||||
import time
|
||||
import logging
|
||||
import sys
|
||||
import json
|
||||
from typing import Dict, Any, Optional
|
||||
from pprint import pprint
|
||||
from time import sleep
|
||||
|
||||
try:
|
||||
import consul
|
||||
CONSUL_AVAILABLE = True
|
||||
except ImportError:
|
||||
consul = None
|
||||
CONSUL_AVAILABLE = False
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("python-consul package not available. State persistence disabled.")
|
||||
|
||||
# Configure logging with colors
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""Custom formatter with colors for different log levels"""
|
||||
|
||||
# ANSI color codes
|
||||
GREY = '\033[90m'
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BOLD_RED = '\033[1;91m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
def format(self, record):
|
||||
# Add color based on log level
|
||||
if record.levelno == logging.DEBUG:
|
||||
color = self.GREY
|
||||
elif record.levelno == logging.INFO:
|
||||
color = self.GREEN
|
||||
elif record.levelno == logging.WARNING:
|
||||
color = self.YELLOW
|
||||
elif record.levelno in (logging.ERROR, logging.CRITICAL):
|
||||
color = self.RED
|
||||
else:
|
||||
color = self.RESET
|
||||
|
||||
# Format the message with color
|
||||
message = super().format(record)
|
||||
return f"{color}{message}{self.RESET}"
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Remove any existing handlers
|
||||
logger.handlers = []
|
||||
|
||||
# Console handler with colors
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setFormatter(ColoredFormatter(
|
||||
'%(asctime)s - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
))
|
||||
|
||||
# File handler (no colors)
|
||||
file_handler = logging.FileHandler('connection_monitor.log')
|
||||
file_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
))
|
||||
|
||||
# Add both handlers
|
||||
logger.addHandler(console_handler)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
def format_human_readable_time(seconds: float) -> str:
|
||||
"""
|
||||
Convert seconds to human-readable time format using datetime.timedelta
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
td = timedelta(seconds=seconds)
|
||||
|
||||
# Extract components
|
||||
days = td.days
|
||||
hours, remainder = divmod(td.seconds, 3600)
|
||||
minutes, seconds_remaining = divmod(remainder, 60)
|
||||
|
||||
# Build the human-readable string
|
||||
parts = []
|
||||
if days > 0:
|
||||
parts.append(f"{days} day" if days == 1 else f"{days} days")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} hour" if hours == 1 else f"{hours} hours")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} minute" if minutes == 1 else f"{minutes} minutes")
|
||||
if seconds_remaining > 0 or not parts: # Include seconds if no other parts or if seconds exist
|
||||
parts.append(f"{seconds_remaining} second" if seconds_remaining == 1 else f"{seconds_remaining} seconds")
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
class ConnectionMonitor:
|
||||
def __init__(self,
|
||||
qbittorrent_url: str = 'http://127.0.0.1:8080',
|
||||
nomad_url: str = 'http://127.0.0.1:4646',
|
||||
tracker_name: str = 'myanon',
|
||||
consul_url: str = 'http://consul.service.dc1.consul:8500'):
|
||||
"""
|
||||
Initialize connection monitoring with configurable parameters
|
||||
"""
|
||||
self.api_url = f'{qbittorrent_url}/api/v2/transfer/info'
|
||||
self.qbittorrent_base_url = qbittorrent_url
|
||||
self.nomad_url = nomad_url
|
||||
self.tracker_name = tracker_name
|
||||
self.consul_url = consul_url
|
||||
|
||||
# Initialize Consul client if available
|
||||
self.consul_client = None
|
||||
if CONSUL_AVAILABLE:
|
||||
try:
|
||||
self.consul_client = consul.Consul(host=consul_url.split('://')[1].split(':')[0],
|
||||
port=int(consul_url.split(':')[2]))
|
||||
logger.info(f"Consul client initialized for {consul_url}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Consul client: {e}")
|
||||
self.consul_client = None
|
||||
|
||||
# Tracking variables
|
||||
self.consecutive_failures = 0
|
||||
self.max_consecutive_failures = 20 # 10 minutes (30s * 20)
|
||||
self.stability_wait_time = 1800 # 30 minutes
|
||||
self.check_interval = 30 # seconds
|
||||
|
||||
# API request retry configuration
|
||||
self.api_retry_attempts = 3 # Number of retry attempts
|
||||
self.api_retry_delay = 2 # Initial delay in seconds
|
||||
self.api_retry_backoff = 2 # Exponential backoff multiplier
|
||||
|
||||
# Connection state tracking
|
||||
self.connection_state = 'stable' # 'stable' or 'unstable'
|
||||
self.last_state_change_time = time.time()
|
||||
self.consecutive_stable_checks = 0
|
||||
self.last_failure_time = None # Track when last failure occurred
|
||||
|
||||
# Remediation state tracking
|
||||
self.remediation_state = None # None, 'stopping_torrents', 'restarting_nomad', 'waiting_for_stability', 'restarting_torrents'
|
||||
self.remediation_start_time = None
|
||||
self.stabilization_checks = 0
|
||||
self.remediation_session = None
|
||||
|
||||
# Stability tracking for 1-hour requirement
|
||||
self.stability_start_time = None # When stable connectivity begins
|
||||
self.stability_duration_required = 3600 # 1 hour in seconds
|
||||
|
||||
# Authentication (update with your credentials)
|
||||
self.qbittorrent_username = 'admin'
|
||||
self.qbittorrent_password = 'adminpass'
|
||||
|
||||
# Load state from Consul if available
|
||||
self._load_state_from_consul()
|
||||
|
||||
def _save_state_to_consul(self):
|
||||
"""
|
||||
Save current state to Consul KV store
|
||||
"""
|
||||
if not self.consul_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
base_key = "qbitcheck/connection_monitor/"
|
||||
|
||||
# Connection state
|
||||
state_data = {
|
||||
'connection_state': self.connection_state,
|
||||
'last_state_change_time': self.last_state_change_time,
|
||||
'consecutive_failures': self.consecutive_failures,
|
||||
'consecutive_stable_checks': self.consecutive_stable_checks,
|
||||
'last_failure_time': self.last_failure_time
|
||||
}
|
||||
|
||||
# Remediation state
|
||||
remediation_data = {
|
||||
'state': self.remediation_state,
|
||||
'start_time': self.remediation_start_time,
|
||||
'stabilization_checks': self.stabilization_checks
|
||||
}
|
||||
|
||||
# Stability tracking
|
||||
stability_data = {
|
||||
'start_time': self.stability_start_time
|
||||
}
|
||||
|
||||
# Save each section to Consul
|
||||
self.consul_client.kv.put(f"{base_key}state", json.dumps(state_data))
|
||||
self.consul_client.kv.put(f"{base_key}remediation", json.dumps(remediation_data))
|
||||
self.consul_client.kv.put(f"{base_key}stability", json.dumps(stability_data))
|
||||
|
||||
logger.debug("State successfully saved to Consul")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save state to Consul: {e}")
|
||||
return False
|
||||
|
||||
def _load_state_from_consul(self):
|
||||
"""
|
||||
Load state from Consul KV store
|
||||
"""
|
||||
if not self.consul_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
base_key = "qbitcheck/connection_monitor/"
|
||||
|
||||
# Load connection state
|
||||
_, state_data = self.consul_client.kv.get(f"{base_key}state")
|
||||
if state_data:
|
||||
state = json.loads(state_data['Value'].decode('utf-8'))
|
||||
self.connection_state = state.get('connection_state', 'stable')
|
||||
self.last_state_change_time = state.get('last_state_change_time', time.time())
|
||||
self.consecutive_failures = state.get('consecutive_failures', 0)
|
||||
self.consecutive_stable_checks = state.get('consecutive_stable_checks', 0)
|
||||
self.last_failure_time = state.get('last_failure_time')
|
||||
|
||||
# Load remediation state
|
||||
_, remediation_data = self.consul_client.kv.get(f"{base_key}remediation")
|
||||
if remediation_data:
|
||||
remediation = json.loads(remediation_data['Value'].decode('utf-8'))
|
||||
self.remediation_state = remediation.get('state')
|
||||
self.remediation_start_time = remediation.get('start_time')
|
||||
self.stabilization_checks = remediation.get('stabilization_checks', 0)
|
||||
|
||||
# Load stability tracking
|
||||
_, stability_data = self.consul_client.kv.get(f"{base_key}stability")
|
||||
if stability_data:
|
||||
stability = json.loads(stability_data['Value'].decode('utf-8'))
|
||||
self.stability_start_time = stability.get('start_time')
|
||||
|
||||
logger.info("State successfully loaded from Consul")
|
||||
logger.debug(f"Loaded state: connection={self.connection_state}, "
|
||||
f"remediation={self.remediation_state}, "
|
||||
f"failures={self.consecutive_failures}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load state from Consul: {e}")
|
||||
return False
|
||||
|
||||
def qbittorrent_login(self) -> requests.Session:
|
||||
"""
|
||||
Authenticate with qBittorrent
|
||||
"""
|
||||
try:
|
||||
session = requests.Session()
|
||||
login_url = f'{self.qbittorrent_base_url}/api/v2/auth/login'
|
||||
login_data = {
|
||||
'username': self.qbittorrent_username,
|
||||
'password': self.qbittorrent_password
|
||||
}
|
||||
response = session.post(login_url, data=login_data)
|
||||
response.raise_for_status()
|
||||
logger.info("Successfully logged into qBittorrent")
|
||||
return session
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"qBittorrent login failed: {e}")
|
||||
return None
|
||||
|
||||
def get_connection_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve connection status from qBittorrent API with retry logic
|
||||
"""
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.api_retry_attempts):
|
||||
try:
|
||||
response = requests.get(self.api_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
status_data = response.json()
|
||||
|
||||
# Log the actual status values for debugging
|
||||
connection_status = status_data.get('connection_status', 'unknown')
|
||||
dht_nodes = status_data.get('dht_nodes', 0)
|
||||
logger.debug(f"API response - Status: {connection_status}, DHT Nodes: {dht_nodes}")
|
||||
|
||||
return status_data
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
if attempt < self.api_retry_attempts - 1: # Not the last attempt
|
||||
delay = self.api_retry_delay * (self.api_retry_backoff ** attempt)
|
||||
logger.warning(f"API request attempt {attempt + 1}/{self.api_retry_attempts} failed: {type(e).__name__}: {e}. Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
# More detailed error logging for final failure
|
||||
error_type = type(last_exception).__name__
|
||||
error_details = str(last_exception)
|
||||
logger.error(f"API request failed after {self.api_retry_attempts} attempts: {error_type}: {error_details}")
|
||||
|
||||
# Return error details for better debugging
|
||||
return {
|
||||
'connection_status': 'error',
|
||||
'dht_nodes': 0,
|
||||
'error_type': error_type,
|
||||
'error_details': error_details,
|
||||
'api_url': self.api_url
|
||||
}
|
||||
|
||||
# Fallback return if all attempts fail (shouldn't normally reach here)
|
||||
return {
|
||||
'connection_status': 'error',
|
||||
'dht_nodes': 0,
|
||||
'error_type': 'Unknown',
|
||||
'error_details': 'All retry attempts exhausted',
|
||||
'api_url': self.api_url
|
||||
}
|
||||
|
||||
def stop_tracker_torrents(self, session: requests.Session):
|
||||
"""
|
||||
Stop torrents matching specific tracker
|
||||
"""
|
||||
try:
|
||||
# Get list of torrents
|
||||
torrents_url = f'{self.qbittorrent_base_url}/api/v2/torrents/info'
|
||||
torrents = session.get(torrents_url).json()
|
||||
|
||||
# Find and stop torrents with matching tracker
|
||||
tracker_torrents = [
|
||||
torrent['hash'] for torrent in torrents
|
||||
if self.tracker_name.lower() in str(torrent).lower()
|
||||
]
|
||||
|
||||
if tracker_torrents:
|
||||
hashes_str = '|'.join(tracker_torrents)
|
||||
logger.debug(f"Stopping torrents: {hashes_str}")
|
||||
stop_url = f'{self.qbittorrent_base_url}/api/v2/torrents/stop'
|
||||
response = session.post(stop_url, data={'hashes': hashes_str})
|
||||
response.raise_for_status()
|
||||
logger.info(f"Stopped {len(tracker_torrents)} torrents for tracker {self.tracker_name}")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"No torrents found for tracker {self.tracker_name}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to stop torrents: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error stopping torrents: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def restart_nomad_task_via_allocation(self, job_id: str, task_name: str, namespace: str = "default", wait_time: int = 60) -> bool:
|
||||
"""
|
||||
Restart a specific task in a Nomad job by restarting just that task.
|
||||
|
||||
Args:
|
||||
job_id: The ID of the job containing the task
|
||||
task_name: The name of the task to restart
|
||||
namespace: The namespace of the job (default: 'default')
|
||||
wait_time: Seconds to wait after restart (default: 60)
|
||||
|
||||
Returns:
|
||||
bool: True if restart succeeded, False otherwise
|
||||
"""
|
||||
headers = {}
|
||||
if hasattr(self, 'token') and self.token:
|
||||
headers['X-Nomad-Token'] = self.token
|
||||
|
||||
try:
|
||||
# Get allocations for the job
|
||||
allocs_url = f"{self.nomad_url}/v1/job/{job_id}/allocations"
|
||||
params = {'namespace': namespace}
|
||||
|
||||
logger.info(f"Fetching allocations for job '{job_id}'...")
|
||||
response = requests.get(allocs_url, headers=headers, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
allocations = response.json()
|
||||
|
||||
# Find allocation containing the task
|
||||
target_alloc = None
|
||||
for alloc in allocations:
|
||||
if alloc['ClientStatus'] == 'running':
|
||||
task_states = alloc.get('TaskStates', {})
|
||||
if task_name in task_states:
|
||||
target_alloc = alloc
|
||||
break
|
||||
|
||||
if not target_alloc:
|
||||
logger.error(f"No running allocation found for task '{task_name}' in job '{job_id}'")
|
||||
return False
|
||||
|
||||
# Restart just the specific task
|
||||
alloc_id = target_alloc['ID']
|
||||
restart_url = f"{self.nomad_url}/v1/client/allocation/{alloc_id}/restart"
|
||||
payload = {"TaskName": task_name}
|
||||
|
||||
logger.info(f"Restarting task '{task_name}' in job '{job_id}'...")
|
||||
response = requests.post(restart_url, headers=headers, params=params, json=payload, timeout=10)
|
||||
|
||||
if response.status_code in [200, 204]:
|
||||
logger.info(f"Successfully restarted task '{task_name}' in job '{job_id}'")
|
||||
time.sleep(wait_time)
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Request failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def restart_tracker_torrents(self, session: requests.Session):
|
||||
"""
|
||||
Restart torrents for specific tracker after stability period
|
||||
"""
|
||||
try:
|
||||
# Get list of previously stopped torrents
|
||||
torrents_url = f'{self.qbittorrent_base_url}/api/v2/torrents/info'
|
||||
torrents = session.get(torrents_url).json()
|
||||
|
||||
# Find and resume torrents with matching tracker
|
||||
tracker_torrents = [
|
||||
torrent['hash'] for torrent in torrents
|
||||
if (self.tracker_name.lower() in str(torrent).lower()
|
||||
and torrent.get('state') == 'paused')
|
||||
]
|
||||
|
||||
if tracker_torrents:
|
||||
hashes_str = '|'.join(tracker_torrents)
|
||||
logger.debug(f"Restarting torrents: {hashes_str}")
|
||||
#start_url = f'{self.qbittorrent_base_url}/api/v2/torrents/start'
|
||||
#response = session.post(start_url, data={'hashes': hashes_str})
|
||||
#response.raise_for_status()
|
||||
logger.info(f"Restarted {len(tracker_torrents)} torrents for tracker {self.tracker_name}")
|
||||
else:
|
||||
logger.info(f"No paused torrents found for tracker {self.tracker_name}")
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to restart torrents: {e}")
|
||||
|
||||
def start_remediation(self):
|
||||
"""
|
||||
Start the remediation process (non-blocking)
|
||||
"""
|
||||
logger.warning("Connection instability detected. Starting remediation...")
|
||||
self.remediation_state = 'stopping_torrents'
|
||||
self.remediation_start_time = time.time()
|
||||
self.stabilization_checks = 0
|
||||
self.remediation_session = self.qbittorrent_login()
|
||||
|
||||
if not self.remediation_session:
|
||||
logger.error("Could not log in to qBittorrent. Aborting remediation.")
|
||||
self.remediation_state = None
|
||||
return False
|
||||
|
||||
logger.info(f"Remediation started. State: {self.remediation_state}")
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
return True
|
||||
|
||||
def process_remediation(self):
|
||||
"""
|
||||
Process the current remediation state (non-blocking)
|
||||
"""
|
||||
if self.remediation_state is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Log detailed remediation state information
|
||||
remediation_duration = time.time() - self.remediation_start_time
|
||||
logger.debug(f"Processing remediation state: {self.remediation_state} (duration: {format_human_readable_time(remediation_duration)})")
|
||||
|
||||
if self.remediation_state == 'stopping_torrents':
|
||||
if self.stop_tracker_torrents(self.remediation_session):
|
||||
logger.info("Torrents stopped successfully, proceeding to restart Nomad task")
|
||||
self.remediation_state = 'restarting_nomad'
|
||||
logger.info(f"Remediation state updated: {self.remediation_state}")
|
||||
# Log state transition with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
else:
|
||||
logger.error("Failed to stop torrents - retrying in next cycle")
|
||||
# Don't reset state, allow retry in next cycle
|
||||
|
||||
elif self.remediation_state == 'restarting_nomad':
|
||||
if self.restart_nomad_task_via_allocation(
|
||||
job_id="qbittorrent",
|
||||
task_name="qbittorrent"
|
||||
):
|
||||
logger.info("Nomad task restarted successfully, waiting for 30 minutes of stable connectivity")
|
||||
self.remediation_state = 'waiting_for_stability'
|
||||
logger.info(f"Remediation state updated: {self.remediation_state}")
|
||||
# Log state transition with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
else:
|
||||
logger.error("Nomad task restart failed - retrying in next cycle")
|
||||
# Don't reset state, allow retry in next cycle
|
||||
|
||||
elif self.remediation_state == 'waiting_for_stability':
|
||||
# This state just waits - stability checking is handled in main loop
|
||||
# Check if we've exceeded the stabilization timeout
|
||||
elapsed_time = time.time() - self.remediation_start_time
|
||||
if elapsed_time > self.stability_wait_time:
|
||||
logger.error(f"Stabilization timeout reached after {format_human_readable_time(elapsed_time)}")
|
||||
self.remediation_state = None
|
||||
self.stability_start_time = None
|
||||
# Log timeout with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul (remediation timeout)
|
||||
self._save_state_to_consul()
|
||||
return False
|
||||
|
||||
elif self.remediation_state == 'restarting_torrents':
|
||||
try:
|
||||
self.restart_tracker_torrents(self.remediation_session)
|
||||
logger.info("Remediation completed successfully")
|
||||
self.remediation_state = None
|
||||
# Log successful completion with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul (remediation completed)
|
||||
self._save_state_to_consul()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to restart torrents: {e}")
|
||||
self.remediation_state = None
|
||||
# Log failure with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul (remediation failed)
|
||||
self._save_state_to_consul()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during remediation: {e}")
|
||||
logger.error("Resetting remediation state due to unexpected error")
|
||||
self.remediation_state = None
|
||||
# Log error with debug metrics
|
||||
self._log_debug_metrics()
|
||||
# Save state to Consul (error condition)
|
||||
self._save_state_to_consul()
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def on_stable_to_unstable(self):
|
||||
"""
|
||||
Called when connection transitions from stable to unstable state
|
||||
"""
|
||||
logger.warning("STATE TRANSITION: STABLE → UNSTABLE")
|
||||
# Log detailed transition information
|
||||
current_time = time.time()
|
||||
stable_duration = current_time - self.last_state_change_time
|
||||
logger.debug(f"Stable duration: {format_human_readable_time(stable_duration)}, Failures: {self.consecutive_failures}/{self.max_consecutive_failures}")
|
||||
# Custom logic: logging, notifications, metrics, etc.
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
|
||||
def _log_debug_metrics(self):
|
||||
"""
|
||||
Log comprehensive debug metrics including remediation state, stability timer,
|
||||
consecutive failures, state transitions, and time remaining
|
||||
"""
|
||||
current_time = time.time()
|
||||
|
||||
# Format remediation state information
|
||||
remediation_info = f"Remediation: {self.remediation_state or 'None'}"
|
||||
if self.remediation_state:
|
||||
remediation_duration = current_time - self.remediation_start_time
|
||||
remediation_info += f" (duration: {format_human_readable_time(remediation_duration)})"
|
||||
|
||||
# Format stability timer information
|
||||
stability_info = "Stability timer: Not active"
|
||||
if self.stability_start_time:
|
||||
elapsed_stable = current_time - self.stability_start_time
|
||||
remaining_stable = max(0, self.stability_duration_required - elapsed_stable)
|
||||
stability_info = f"Stability: {format_human_readable_time(elapsed_stable)}/{format_human_readable_time(self.stability_duration_required)}, Remaining: {format_human_readable_time(remaining_stable)}"
|
||||
|
||||
# Format failure information
|
||||
failure_info = f"Failures: {self.consecutive_failures}/{self.max_consecutive_failures}"
|
||||
|
||||
# Format state transition information
|
||||
state_duration = current_time - self.last_state_change_time
|
||||
transition_info = f"State: {self.connection_state} (duration: {format_human_readable_time(state_duration)})"
|
||||
|
||||
# Format time since last failure information
|
||||
last_failure_info = "Last failure: Never"
|
||||
if self.last_failure_time:
|
||||
time_since_last_failure = current_time - self.last_failure_time
|
||||
last_failure_info = f"Last failure: {format_human_readable_time(time_since_last_failure)} ago"
|
||||
|
||||
# Log comprehensive debug information
|
||||
logger.debug(f"SYSTEM STATUS - {transition_info}, {failure_info}, {last_failure_info}, {remediation_info}, {stability_info}")
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
|
||||
|
||||
def on_unstable_to_stable(self):
|
||||
"""
|
||||
Called when connection transitions from unstable to stable state
|
||||
"""
|
||||
logger.info("STATE TRANSITION: UNSTABLE → STABLE")
|
||||
unstable_duration = time.time() - self.last_state_change_time
|
||||
logger.debug(f"Unstable duration: {format_human_readable_time(unstable_duration)}, Total failures: {self.consecutive_failures}")
|
||||
|
||||
# Only reset failures if not in remediation (remediation handles its own failure tracking)
|
||||
if self.remediation_state is None:
|
||||
self.consecutive_failures = 0
|
||||
self.last_failure_time = None
|
||||
|
||||
# Custom logic: logging, notifications, metrics, etc.
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
|
||||
def _determine_connection_state(self, status) -> str:
|
||||
"""
|
||||
Determine if connection is stable or unstable based on status
|
||||
"""
|
||||
connection_status = status.get('connection_status', 'unknown')
|
||||
dht_nodes = status.get('dht_nodes', 0)
|
||||
|
||||
is_connected = (
|
||||
connection_status == 'connected' and
|
||||
dht_nodes > 0
|
||||
)
|
||||
|
||||
if is_connected:
|
||||
self.consecutive_stable_checks += 1
|
||||
# For normal operation, consider stable immediately when connected
|
||||
# The 1-hour requirement is only for remediation stability checking
|
||||
if self.remediation_state != 'waiting_for_stability':
|
||||
return 'stable'
|
||||
else:
|
||||
# In remediation, maintain current state until 1-hour requirement is met
|
||||
return self.connection_state
|
||||
else:
|
||||
self.consecutive_stable_checks = 0
|
||||
# Log why the connection is considered unstable
|
||||
if connection_status == 'error':
|
||||
logger.warning(f"Connection unstable due to API error: {status.get('error_type', 'Unknown')} - {status.get('error_details', 'No details')}")
|
||||
elif connection_status != 'connected':
|
||||
logger.warning(f"Connection unstable: Status is '{connection_status}' (expected 'connected')")
|
||||
elif dht_nodes <= 0:
|
||||
logger.warning(f"Connection unstable: DHT nodes is {dht_nodes} (expected > 0)")
|
||||
return 'unstable'
|
||||
|
||||
def monitor_connection(self):
|
||||
"""
|
||||
Main connection monitoring loop
|
||||
"""
|
||||
logger.info("Starting connection monitoring...")
|
||||
logger.info(f"Monitoring parameters:")
|
||||
logger.info(f"- API URL: {self.api_url}")
|
||||
logger.info(f"- Tracker: {self.tracker_name}")
|
||||
logger.info(f"- Check Interval: {self.check_interval} seconds")
|
||||
logger.info(f"- Max Consecutive Failures: {self.max_consecutive_failures}")
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Get current connection status
|
||||
status = self.get_connection_status()
|
||||
|
||||
# Determine current connection state
|
||||
current_state = self._determine_connection_state(status)
|
||||
|
||||
# Handle state transitions
|
||||
if current_state != self.connection_state:
|
||||
if current_state == 'unstable':
|
||||
self.on_stable_to_unstable()
|
||||
else:
|
||||
self.on_unstable_to_stable()
|
||||
self.connection_state = current_state
|
||||
self.last_state_change_time = time.time()
|
||||
# Log state transition with debug metrics
|
||||
self._log_debug_metrics()
|
||||
|
||||
# Log comprehensive debug metrics on each iteration
|
||||
self._log_debug_metrics()
|
||||
|
||||
# Handle connection status logging and failure tracking
|
||||
if current_state == 'stable':
|
||||
logger.debug(f"Connection Stable. Status: {status.get('connection_status', 'unknown')}, DHT Nodes: {status.get('dht_nodes', 0)}")
|
||||
|
||||
# Handle stability tracking for remediation
|
||||
if self.remediation_state == 'waiting_for_stability':
|
||||
# Start tracking stability time if not already started
|
||||
if self.stability_start_time is None:
|
||||
self.stability_start_time = time.time()
|
||||
logger.info("Stable connectivity detected, starting 1-hour timer")
|
||||
# Log detailed stability info
|
||||
self._log_debug_metrics()
|
||||
|
||||
# Calculate elapsed stable time
|
||||
elapsed_stable_time = time.time() - self.stability_start_time
|
||||
remaining_time = max(0, self.stability_duration_required - elapsed_stable_time)
|
||||
logger.info(f"Stable for {format_human_readable_time(elapsed_stable_time)}/{format_human_readable_time(self.stability_duration_required)}, Remaining: {format_human_readable_time(remaining_time)}")
|
||||
|
||||
# Check if we've reached 1 hour of stability
|
||||
if elapsed_stable_time >= self.stability_duration_required:
|
||||
logger.info("1 hour of stable connectivity achieved, proceeding to restart torrents")
|
||||
self.remediation_state = 'restarting_torrents'
|
||||
self.stability_start_time = None
|
||||
# Save state to Consul
|
||||
self._save_state_to_consul()
|
||||
else:
|
||||
# Reset stability timer if not in waiting_for_stability state
|
||||
if self.stability_start_time is not None:
|
||||
logger.debug("Resetting stability timer (not in waiting_for_stability state)")
|
||||
self.stability_start_time = None
|
||||
|
||||
# Reset failure count when stable outside of remediation
|
||||
if self.consecutive_failures > 0 and self.remediation_state is None:
|
||||
logger.info(f"Connection restored, resetting failure count from {self.consecutive_failures} to 0")
|
||||
self.consecutive_failures = 0
|
||||
self.last_failure_time = None
|
||||
else:
|
||||
# Increment failure counter for unstable state
|
||||
self.consecutive_failures += 1
|
||||
self.last_failure_time = time.time() # Record when failure occurred
|
||||
logger.warning(f"Connection unstable. Failures: {self.consecutive_failures}/{self.max_consecutive_failures}")
|
||||
|
||||
# Reset stability timer if connection is lost
|
||||
if self.stability_start_time is not None:
|
||||
logger.warning("Connection lost during stabilization, resetting 1-hour timer")
|
||||
self.stability_start_time = None
|
||||
# Log the reset with debug metrics
|
||||
self._log_debug_metrics()
|
||||
|
||||
# Check if remediation is needed (only if not already in progress)
|
||||
if (self.consecutive_failures >= self.max_consecutive_failures and
|
||||
self.remediation_state is None):
|
||||
logger.error(f"Persistent connection failure ({self.consecutive_failures}/{self.max_consecutive_failures}). Initiating remediation.")
|
||||
if self.start_remediation():
|
||||
logger.info("Remediation started successfully")
|
||||
# Log remediation start with debug metrics
|
||||
self._log_debug_metrics()
|
||||
else:
|
||||
logger.error("Failed to start remediation")
|
||||
self.consecutive_failures = self.max_consecutive_failures
|
||||
|
||||
# Process remediation if active (even if connection is down)
|
||||
if self.remediation_state is not None:
|
||||
remediation_result = self.process_remediation()
|
||||
if remediation_result:
|
||||
logger.info("Remediation completed successfully")
|
||||
self.consecutive_failures = 0
|
||||
# Log successful remediation with debug metrics
|
||||
self._log_debug_metrics()
|
||||
elif self.remediation_state is None:
|
||||
logger.warning("Remediation failed or was cancelled")
|
||||
self.consecutive_failures = self.max_consecutive_failures
|
||||
# Log remediation failure with debug metrics
|
||||
self._log_debug_metrics()
|
||||
|
||||
# Wait before next check
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in monitoring loop: {e}")
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Connection monitoring stopped by user.")
|
||||
break
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point for the connection monitoring script
|
||||
"""
|
||||
# Create monitor instance with optional custom parameters
|
||||
monitor = ConnectionMonitor(
|
||||
qbittorrent_url='http://sp.service.dc1.consul:8080', # Customize as needed
|
||||
nomad_url='http://192.168.4.36:4646', # Customize as needed
|
||||
tracker_name='https://t.myanonamouse.net/tracker.php/VPRYYAL-WpTwnr9G9aIN6044YVZ7x8Ao/announce', # Customize tracker name
|
||||
consul_url='http://consul.service.dc1.consul:8500' # Consul server URL
|
||||
)
|
||||
|
||||
try:
|
||||
# Start connection monitoring
|
||||
monitor.monitor_connection()
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"Critical error in main execution: {e}")
|
||||
|
||||
def debug_system_info():
|
||||
"""
|
||||
Log system and environment information for troubleshooting
|
||||
"""
|
||||
import platform
|
||||
import socket
|
||||
|
||||
logger.info("System Diagnostic Information:")
|
||||
logger.info(f"Python Version: {platform.python_version()}")
|
||||
logger.info(f"Operating System: {platform.platform()}")
|
||||
logger.info(f"Hostname: {socket.gethostname()}")
|
||||
|
||||
# Network connectivity check
|
||||
try:
|
||||
import requests
|
||||
response = requests.get('https://www.google.com', timeout=5)
|
||||
logger.info(f"Internet Connectivity: OK (Status Code: {response.status_code})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Internet Connectivity Check Failed: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Run system diagnostics before main script
|
||||
debug_system_info()
|
||||
|
||||
# Configure additional logging
|
||||
try:
|
||||
# Attempt to set up more detailed logging
|
||||
import logging
|
||||
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
||||
logging.getLogger('requests').setLevel(logging.WARNING)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure additional logging: {e}")
|
||||
|
||||
# Run the main monitoring script
|
||||
main()
|
||||
7
config/__init__.py
Normal file
7
config/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Configuration module for qBittorrent connection monitoring
|
||||
"""
|
||||
|
||||
from .logging_config import setup_logging, configure_third_party_logging, ColoredFormatter
|
||||
|
||||
__all__ = ['setup_logging', 'configure_third_party_logging', 'ColoredFormatter']
|
||||
BIN
config/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
config/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
config/__pycache__/logging_config.cpython-313.pyc
Normal file
BIN
config/__pycache__/logging_config.cpython-313.pyc
Normal file
Binary file not shown.
83
config/logging_config.py
Normal file
83
config/logging_config.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import logging
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""Custom formatter with colors for different log levels"""
|
||||
|
||||
# ANSI color codes
|
||||
GREY = '\033[90m'
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BOLD_RED = '\033[1;91m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
def format(self, record):
|
||||
# Add color based on log level
|
||||
if record.levelno == logging.DEBUG:
|
||||
color = self.GREY
|
||||
elif record.levelno == logging.INFO:
|
||||
color = self.GREEN
|
||||
elif record.levelno == logging.WARNING:
|
||||
color = self.YELLOW
|
||||
elif record.levelno in (logging.ERROR, logging.CRITICAL):
|
||||
color = self.RED
|
||||
else:
|
||||
color = self.RESET
|
||||
|
||||
# Format the message with color
|
||||
message = super().format(record)
|
||||
return f"{color}{message}{self.RESET}"
|
||||
|
||||
def setup_logging(logger_name: str = __name__,
|
||||
log_file: str = 'connection_monitor.log',
|
||||
console_level: int = logging.DEBUG,
|
||||
file_level: int = logging.DEBUG) -> logging.Logger:
|
||||
"""
|
||||
Configure logging with console and file handlers
|
||||
|
||||
Args:
|
||||
logger_name: Name of the logger
|
||||
log_file: Path to log file
|
||||
console_level: Log level for console output
|
||||
file_level: Log level for file output
|
||||
|
||||
Returns:
|
||||
Configured logger instance
|
||||
"""
|
||||
logger = logging.getLogger(logger_name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Remove any existing handlers
|
||||
logger.handlers = []
|
||||
|
||||
# Console handler with colors
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(console_level)
|
||||
console_handler.setFormatter(ColoredFormatter(
|
||||
'%(asctime)s - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
))
|
||||
|
||||
# File handler (no colors)
|
||||
file_handler = logging.FileHandler(log_file)
|
||||
file_handler.setLevel(file_level)
|
||||
file_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
))
|
||||
|
||||
# Add both handlers
|
||||
logger.addHandler(console_handler)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
return logger
|
||||
|
||||
def configure_third_party_logging():
|
||||
"""Configure logging for third-party libraries"""
|
||||
try:
|
||||
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
||||
logging.getLogger('requests').setLevel(logging.WARNING)
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).error(f"Failed to configure additional logging: {e}")
|
||||
70679
connection_monitor.log
70679
connection_monitor.log
File diff suppressed because it is too large
Load Diff
64
main.py
Normal file
64
main.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Main entry point for qBittorrent connection monitoring
|
||||
"""
|
||||
|
||||
import platform
|
||||
import socket
|
||||
import logging
|
||||
import requests
|
||||
|
||||
from config.logging_config import setup_logging, configure_third_party_logging
|
||||
from monitoring.connection_monitor import ConnectionMonitor
|
||||
|
||||
def debug_system_info(logger):
|
||||
"""
|
||||
Log system and environment information for troubleshooting
|
||||
|
||||
Args:
|
||||
logger: Configured logger instance
|
||||
"""
|
||||
logger.info("System Diagnostic Information:")
|
||||
logger.info(f"Python Version: {platform.python_version()}")
|
||||
logger.info(f"Operating System: {platform.platform()}")
|
||||
logger.info(f"Hostname: {socket.gethostname()}")
|
||||
|
||||
# Network connectivity check
|
||||
try:
|
||||
response = requests.get('https://www.google.com', timeout=5)
|
||||
logger.info(f"Internet Connectivity: OK (Status Code: {response.status_code})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Internet Connectivity Check Failed: {e}")
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main entry point for the connection monitoring script
|
||||
"""
|
||||
# Configure logging
|
||||
logger = setup_logging(__name__)
|
||||
|
||||
# Run system diagnostics
|
||||
debug_system_info(logger)
|
||||
|
||||
# Configure third-party logging
|
||||
configure_third_party_logging()
|
||||
|
||||
# Create monitor instance with custom parameters
|
||||
monitor = ConnectionMonitor(
|
||||
qbittorrent_url='http://sp.service.dc1.consul:8080', # Customize as needed
|
||||
nomad_url='http://192.168.4.36:4646', # Customize as needed
|
||||
tracker_name='https://t.myanonamouse.net/tracker.php/VPRYYAL-WpTwnr9G9aIN6044YVZ7x8Ao/announce', # Customize tracker name
|
||||
consul_url='http://consul.service.dc1.consul:8500', # Consul server URL
|
||||
logger=logger # Pass the configured logger
|
||||
)
|
||||
|
||||
try:
|
||||
# Start connection monitoring
|
||||
monitor.monitor_connection()
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"Critical error in main execution: {e}")
|
||||
raise
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
8
monitoring/__init__.py
Normal file
8
monitoring/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Monitoring components for qBittorrent connection monitoring
|
||||
"""
|
||||
|
||||
from .connection_monitor import ConnectionMonitor
|
||||
from .remediation_manager import RemediationManager
|
||||
|
||||
__all__ = ['ConnectionMonitor', 'RemediationManager']
|
||||
BIN
monitoring/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
monitoring/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
monitoring/__pycache__/connection_monitor.cpython-313.pyc
Normal file
BIN
monitoring/__pycache__/connection_monitor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
monitoring/__pycache__/remediation_manager.cpython-313.pyc
Normal file
BIN
monitoring/__pycache__/remediation_manager.cpython-313.pyc
Normal file
Binary file not shown.
428
monitoring/connection_monitor.py
Normal file
428
monitoring/connection_monitor.py
Normal file
@@ -0,0 +1,428 @@
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from api.qbittorrent_client import QBittorrentClient
|
||||
from api.nomad_client import NomadClient
|
||||
from api.vpn_client import VPNClient
|
||||
from persistence.state_manager import StateManager
|
||||
from monitoring.remediation_manager import RemediationManager
|
||||
from utils.time_utils import format_human_readable_time
|
||||
from utils.formatters import format_connection_status
|
||||
|
||||
class ConnectionMonitor:
|
||||
"""Main connection monitoring class"""
|
||||
|
||||
def __init__(self,
|
||||
qbittorrent_url: str = 'http://127.0.0.1:8080',
|
||||
nomad_url: str = 'http://127.0.0.1:4646',
|
||||
tracker_name: str = 'myanon',
|
||||
consul_url: str = 'http://consul.service.dc1.consul:8500',
|
||||
check_interval: int = 30,
|
||||
max_consecutive_failures: int = 20,
|
||||
stability_wait_time: int = 3720,
|
||||
stability_duration_required: int = 3600,
|
||||
logger: logging.Logger = None):
|
||||
"""
|
||||
Initialize connection monitoring with configurable parameters
|
||||
|
||||
Args:
|
||||
qbittorrent_url: qBittorrent API URL
|
||||
nomad_url: Nomad API URL
|
||||
tracker_name: Tracker name for torrent operations
|
||||
consul_url: Consul server URL for persistence
|
||||
check_interval: Check interval in seconds
|
||||
max_consecutive_failures: Maximum failures before remediation
|
||||
stability_wait_time: Time to wait for stability during remediation
|
||||
stability_duration_required: Required stability duration after remediation
|
||||
logger: Optional logger instance (uses module logger if not provided)
|
||||
"""
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
# Initialize clients
|
||||
self.qbittorrent_client = QBittorrentClient(
|
||||
base_url=qbittorrent_url,
|
||||
username='admin',
|
||||
password='adminpass',
|
||||
logger=self.logger # Pass the logger
|
||||
)
|
||||
self.nomad_client = NomadClient(base_url=nomad_url, logger=self.logger)
|
||||
|
||||
# Initialize VPN client (using same host as qBittorrent but port 8000)
|
||||
vpn_base_url = qbittorrent_url.replace(':8080', ':8000')
|
||||
self.vpn_client = VPNClient(base_url=vpn_base_url, logger=self.logger)
|
||||
|
||||
# Initialize state management
|
||||
self.state_manager = StateManager(consul_url=consul_url)
|
||||
|
||||
# Initialize remediation manager
|
||||
self.remediation_manager = RemediationManager(
|
||||
qbittorrent_client=self.qbittorrent_client,
|
||||
nomad_client=self.nomad_client,
|
||||
state_manager=self.state_manager,
|
||||
tracker_name=tracker_name,
|
||||
max_consecutive_failures=max_consecutive_failures,
|
||||
stability_wait_time=stability_wait_time,
|
||||
stability_duration_required=stability_duration_required,
|
||||
logger=self.logger # Pass the logger
|
||||
)
|
||||
|
||||
# Configuration
|
||||
self.check_interval = check_interval
|
||||
self.tracker_name = tracker_name
|
||||
|
||||
def _determine_connection_state(self, status: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Determine if connection is stable or unstable based on status
|
||||
|
||||
Args:
|
||||
status: Connection status dictionary
|
||||
|
||||
Returns:
|
||||
'stable' or 'unstable'
|
||||
"""
|
||||
connection_status = status.get('connection_status', 'unknown')
|
||||
dht_nodes = status.get('dht_nodes', 0)
|
||||
|
||||
is_connected = (
|
||||
connection_status == 'connected' and
|
||||
dht_nodes > 0
|
||||
)
|
||||
|
||||
if is_connected:
|
||||
self.state_manager.consecutive_stable_checks += 1
|
||||
# Always return 'stable' when connection is good, regardless of remediation state
|
||||
# The 1-hour stability requirement is handled in the stability tracking logic, not here
|
||||
return 'stable'
|
||||
else:
|
||||
self.state_manager.consecutive_stable_checks = 0
|
||||
# Log why the connection is considered unstable
|
||||
if connection_status == 'error':
|
||||
self.logger.warning(f"Connection unstable due to API error: {status.get('error_type', 'Unknown')} - {status.get('error_details', 'No details')}")
|
||||
elif connection_status != 'connected':
|
||||
self.logger.warning(f"Connection unstable: Status is '{connection_status}' (expected 'connected')")
|
||||
elif dht_nodes <= 0:
|
||||
self.logger.warning(f"Connection unstable: DHT nodes is {dht_nodes} (expected > 0)")
|
||||
return 'unstable'
|
||||
|
||||
def _handle_state_transition(self, current_state: str):
|
||||
"""
|
||||
Handle state transitions and call appropriate handlers
|
||||
|
||||
Args:
|
||||
current_state: Current connection state
|
||||
"""
|
||||
if current_state != self.state_manager.connection_state:
|
||||
if current_state == 'unstable':
|
||||
self._on_stable_to_unstable()
|
||||
else:
|
||||
self._on_unstable_to_stable()
|
||||
|
||||
self.state_manager.connection_state = current_state
|
||||
self.state_manager.last_state_change_time = time.time()
|
||||
self.state_manager.save_state()
|
||||
|
||||
def _on_stable_to_unstable(self):
|
||||
"""Called when connection transitions from stable to unstable state"""
|
||||
self.logger.warning("STATE TRANSITION: STABLE → UNSTABLE")
|
||||
# Log detailed transition information
|
||||
current_time = time.time()
|
||||
stable_duration = current_time - self.state_manager.last_state_change_time
|
||||
self.logger.debug(f"Stable duration: {format_human_readable_time(stable_duration)}, "
|
||||
f"Failures: {self.state_manager.consecutive_failures}/{self.remediation_manager.max_consecutive_failures}")
|
||||
|
||||
# Get container information for qbittorrent job tasks
|
||||
try:
|
||||
container_info = self.nomad_client.get_container_info(
|
||||
job_id="qbittorrent",
|
||||
task_names=["dante", "qbittorrent", "qbittorrent-vpn"],
|
||||
namespace="default"
|
||||
)
|
||||
|
||||
if container_info:
|
||||
self.logger.info("Container Status:")
|
||||
for task_name, info in container_info.items():
|
||||
self.logger.info(
|
||||
f"- {task_name}: Uptime: {info['uptime']}, "
|
||||
f"Last restart: {info['last_restart_time']} "
|
||||
f"(reason: {info['last_restart_reason']}), "
|
||||
f"Restarts: {info['restart_count']}"
|
||||
)
|
||||
else:
|
||||
self.logger.warning("Could not retrieve container information")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get container information: {e}")
|
||||
|
||||
self.state_manager.save_state()
|
||||
|
||||
def _on_unstable_to_stable(self):
|
||||
"""Called when connection transitions from unstable to stable state"""
|
||||
self.logger.info("STATE TRANSITION: UNSTABLE → STABLE")
|
||||
unstable_duration = time.time() - self.state_manager.last_state_change_time
|
||||
self.logger.debug(f"Unstable duration: {format_human_readable_time(unstable_duration)}, "
|
||||
f"Total failures: {self.state_manager.consecutive_failures}")
|
||||
|
||||
# Only reset failures if not in remediation (remediation handles its own failure tracking)
|
||||
if self.state_manager.remediation_state is None:
|
||||
self.state_manager.consecutive_failures = 0
|
||||
self.state_manager.last_failure_time = None
|
||||
self.state_manager.save_state()
|
||||
|
||||
def _log_vpn_status(self, vpn_status: Dict[str, Any], public_ip_info: Dict[str, Any]):
|
||||
"""
|
||||
Log VPN status and public IP information with change detection
|
||||
|
||||
Args:
|
||||
vpn_status: VPN status dictionary
|
||||
public_ip_info: Public IP information dictionary
|
||||
"""
|
||||
# Log VPN status and update state
|
||||
vpn_state = vpn_status.get('status', 'unknown')
|
||||
if vpn_state == 'error':
|
||||
self.logger.error(f"VPN Status Error: {vpn_status.get('error_type', 'Unknown')} - {vpn_status.get('error_details', 'No details')}")
|
||||
self.state_manager.update_vpn_status('error')
|
||||
else:
|
||||
self.logger.info(f"VPN Status: {vpn_state}")
|
||||
self.state_manager.update_vpn_status(vpn_state)
|
||||
|
||||
# Log public IP information with change detection
|
||||
current_ip = public_ip_info.get('public_ip', 'unknown')
|
||||
if current_ip == 'error':
|
||||
self.logger.error(f"Public IP Error: {public_ip_info.get('error_type', 'Unknown')} - {public_ip_info.get('error_details', 'No details')}")
|
||||
else:
|
||||
# Update state with public IP information
|
||||
self.state_manager.update_public_ip(current_ip, public_ip_info)
|
||||
|
||||
# Check if IP has changed for logging purposes
|
||||
previous_ip = getattr(self, '_last_public_ip', None)
|
||||
if previous_ip is None:
|
||||
# First time seeing IP, just log it
|
||||
self._last_public_ip = current_ip
|
||||
self._log_public_ip_details(public_ip_info, "Initial")
|
||||
elif current_ip != previous_ip:
|
||||
# IP has changed
|
||||
self.logger.warning(f"PUBLIC IP CHANGE DETECTED: {previous_ip} → {current_ip}")
|
||||
self._last_public_ip = current_ip
|
||||
self._log_public_ip_details(public_ip_info, "Changed")
|
||||
else:
|
||||
# IP unchanged, log details at debug level
|
||||
self.logger.debug(f"Public IP unchanged: {current_ip}")
|
||||
|
||||
def _log_public_ip_details(self, ip_info: Dict[str, Any], context: str):
|
||||
"""
|
||||
Log detailed public IP information
|
||||
|
||||
Args:
|
||||
ip_info: Public IP information dictionary
|
||||
context: Context for logging (e.g., "Initial", "Changed")
|
||||
"""
|
||||
public_ip = ip_info.get('public_ip', 'unknown')
|
||||
region = ip_info.get('region', 'unknown')
|
||||
country = ip_info.get('country', 'unknown')
|
||||
city = ip_info.get('city', 'unknown')
|
||||
organization = ip_info.get('organization', 'unknown')
|
||||
|
||||
self.logger.info(f"{context} Public IP Details: {public_ip} ({organization}) - {city}, {region}, {country}")
|
||||
|
||||
def _log_check_summary(self, status: Dict[str, Any], vpn_status: Dict[str, Any],
|
||||
public_ip_info: Dict[str, Any], current_state: str):
|
||||
"""
|
||||
Log a comprehensive check summary in hierarchical format
|
||||
|
||||
Args:
|
||||
status: Connection status dictionary
|
||||
vpn_status: VPN status dictionary
|
||||
public_ip_info: Public IP information dictionary
|
||||
current_state: Current connection state
|
||||
"""
|
||||
# Build summary components
|
||||
summary_lines = []
|
||||
|
||||
# Connection status
|
||||
connection_status = status.get('connection_status', 'unknown')
|
||||
dht_nodes = status.get('dht_nodes', 0)
|
||||
summary_lines.append(f"Status: {connection_status} (DHT: {dht_nodes} nodes)")
|
||||
|
||||
# VPN status
|
||||
vpn_state = vpn_status.get('status', 'unknown')
|
||||
if vpn_state == 'error':
|
||||
error_type = vpn_status.get('error_type', 'Unknown')
|
||||
error_details = vpn_status.get('error_details', 'No details')
|
||||
summary_lines.append(f"VPN: {vpn_state} - {error_type}: {error_details}")
|
||||
else:
|
||||
summary_lines.append(f"VPN: {vpn_state}")
|
||||
|
||||
# Public IP
|
||||
current_ip = public_ip_info.get('public_ip', 'unknown')
|
||||
if current_ip == 'error':
|
||||
error_type = public_ip_info.get('error_type', 'Unknown')
|
||||
error_details = public_ip_info.get('error_details', 'No details')
|
||||
summary_lines.append(f"IP: {current_ip} - {error_type}: {error_details}")
|
||||
else:
|
||||
ip_change = "unchanged" if current_ip == getattr(self, '_last_public_ip', None) else "changed"
|
||||
summary_lines.append(f"IP: {current_ip} ({ip_change})")
|
||||
|
||||
# Get system state metrics
|
||||
debug_metrics = self.state_manager.get_debug_metrics()
|
||||
|
||||
# Log as structured hierarchical message
|
||||
self.logger.debug(f"Connection Check Summary:\n " + "\n ".join(summary_lines) +
|
||||
f"\n\n System State:{debug_metrics['multiline']}")
|
||||
|
||||
def _update_vpn_and_ip_state(self, vpn_status: Dict[str, Any], public_ip_info: Dict[str, Any]):
|
||||
"""
|
||||
Update VPN and public IP state without logging (for use with consolidated logging)
|
||||
|
||||
Args:
|
||||
vpn_status: VPN status dictionary
|
||||
public_ip_info: Public IP information dictionary
|
||||
"""
|
||||
# Update VPN status without logging
|
||||
vpn_state = vpn_status.get('status', 'unknown')
|
||||
if vpn_state == 'error':
|
||||
self.state_manager.update_vpn_status('error')
|
||||
else:
|
||||
self.state_manager.update_vpn_status(vpn_state)
|
||||
|
||||
# Update public IP information without logging
|
||||
current_ip = public_ip_info.get('public_ip', 'unknown')
|
||||
if current_ip == 'error':
|
||||
# Just update state, don't log error here
|
||||
pass
|
||||
else:
|
||||
# Update state with public IP information
|
||||
self.state_manager.update_public_ip(current_ip, public_ip_info)
|
||||
|
||||
# Track IP changes for logging purposes in summary
|
||||
previous_ip = getattr(self, '_last_public_ip', None)
|
||||
if previous_ip is None:
|
||||
self._last_public_ip = current_ip
|
||||
elif current_ip != previous_ip:
|
||||
self._last_public_ip = current_ip
|
||||
|
||||
def monitor_connection(self):
|
||||
"""
|
||||
Main connection monitoring loop
|
||||
"""
|
||||
self.logger.info("Starting connection monitoring...")
|
||||
self.logger.info(f"Monitoring parameters:")
|
||||
self.logger.info(f"- API URL: {self.qbittorrent_client.api_url}")
|
||||
self.logger.info(f"- Tracker: {self.tracker_name}")
|
||||
self.logger.info(f"- Check Interval: {self.check_interval} seconds")
|
||||
self.logger.info(f"- Max Consecutive Failures: {self.remediation_manager.max_consecutive_failures}")
|
||||
|
||||
# Log initial container status
|
||||
try:
|
||||
container_info = self.nomad_client.get_container_info(
|
||||
job_id="qbittorrent",
|
||||
task_names=["dante", "qbittorrent", "qbittorrent-vpn"],
|
||||
namespace="default"
|
||||
)
|
||||
|
||||
if container_info:
|
||||
self.logger.info("Initial Container Status:")
|
||||
for task_name, info in container_info.items():
|
||||
self.logger.info(
|
||||
f"- {task_name}: Uptime: {info['uptime']}, "
|
||||
f"Last restart: {info['last_restart_time']} "
|
||||
f"(reason: {info['last_restart_reason']}), "
|
||||
f"Restarts: {info['restart_count']}"
|
||||
)
|
||||
else:
|
||||
self.logger.warning("Could not retrieve initial container information")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to get initial container information: {e}")
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Get current connection status (suppress individual debug logs)
|
||||
status = self.qbittorrent_client.get_connection_status(verbose_debug=False)
|
||||
|
||||
# Get VPN status and public IP information (suppress individual debug logs)
|
||||
vpn_status = self.vpn_client.get_vpn_status(verbose_debug=False)
|
||||
public_ip_info = self.vpn_client.get_public_ip_info(verbose_debug=False)
|
||||
|
||||
# Update VPN and public IP state (without individual logging)
|
||||
self._update_vpn_and_ip_state(vpn_status, public_ip_info)
|
||||
|
||||
# Determine current connection state
|
||||
current_state = self._determine_connection_state(status)
|
||||
|
||||
# Handle state transitions
|
||||
self._handle_state_transition(current_state)
|
||||
|
||||
# Log comprehensive check summary in hierarchical format (only this debug log)
|
||||
self._log_check_summary(status, vpn_status, public_ip_info, current_state)
|
||||
|
||||
# Handle connection status and failure tracking
|
||||
if current_state == 'stable':
|
||||
|
||||
# Handle stability tracking for remediation
|
||||
if self.state_manager.remediation_state == 'waiting_for_stability':
|
||||
# Start tracking stability time if not already started
|
||||
if self.state_manager.stability_start_time is None:
|
||||
self.state_manager.start_stability_timer()
|
||||
self.logger.info("Stable connectivity detected, starting 1-hour timer")
|
||||
|
||||
# Calculate elapsed stable time
|
||||
elapsed_stable_time = time.time() - self.state_manager.stability_start_time
|
||||
remaining_time = max(0, self.remediation_manager.stability_duration_required - elapsed_stable_time)
|
||||
self.logger.info(f"Stable for {format_human_readable_time(elapsed_stable_time)}/"
|
||||
f"{format_human_readable_time(self.remediation_manager.stability_duration_required)}, "
|
||||
f"Remaining: {format_human_readable_time(remaining_time)}")
|
||||
|
||||
# Check if we've reached 1 hour of stability
|
||||
if self.remediation_manager.check_stability_requirement_met():
|
||||
continue
|
||||
else:
|
||||
# Reset stability timer if not in waiting_for_stability state
|
||||
if self.state_manager.stability_start_time is not None:
|
||||
self.logger.debug("Resetting stability timer (not in waiting_for_stability state)")
|
||||
self.state_manager.reset_stability_timer()
|
||||
|
||||
# Reset failure count when stable outside of remediation
|
||||
if (self.state_manager.consecutive_failures > 0 and
|
||||
self.state_manager.remediation_state is None):
|
||||
self.logger.info(f"Connection restored, resetting failure count from {self.state_manager.consecutive_failures} to 0")
|
||||
self.state_manager.consecutive_failures = 0
|
||||
self.state_manager.last_failure_time = None
|
||||
else:
|
||||
# Increment failure counter for unstable state
|
||||
self.state_manager.consecutive_failures += 1
|
||||
self.state_manager.last_failure_time = time.time()
|
||||
self.logger.warning(f"Connection unstable. Failures: {self.state_manager.consecutive_failures}/{self.remediation_manager.max_consecutive_failures}")
|
||||
|
||||
# Reset stability timer if connection is lost
|
||||
self.remediation_manager.reset_stability_on_connection_loss()
|
||||
|
||||
# Check if remediation is needed (only if not already in progress)
|
||||
if self.remediation_manager.should_start_remediation():
|
||||
self.logger.error(f"Persistent connection failure ({self.state_manager.consecutive_failures}/{self.remediation_manager.max_consecutive_failures}). Initiating remediation.")
|
||||
if self.remediation_manager.start_remediation():
|
||||
self.logger.info("Remediation started successfully")
|
||||
else:
|
||||
self.logger.error("Failed to start remediation")
|
||||
self.state_manager.consecutive_failures = self.remediation_manager.max_consecutive_failures
|
||||
|
||||
# Process remediation if active (even if connection is down)
|
||||
if self.state_manager.remediation_state is not None:
|
||||
remediation_result = self.remediation_manager.process_remediation()
|
||||
if remediation_result:
|
||||
self.logger.info("Remediation completed successfully")
|
||||
self.state_manager.consecutive_failures = 0
|
||||
elif self.state_manager.remediation_state is None:
|
||||
self.logger.warning("Remediation failed or was cancelled")
|
||||
self.state_manager.consecutive_failures = self.remediation_manager.max_consecutive_failures
|
||||
|
||||
# Wait before next check
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error in monitoring loop: {e}")
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self.logger.info("Connection monitoring stopped by user.")
|
||||
break
|
||||
189
monitoring/remediation_manager.py
Normal file
189
monitoring/remediation_manager.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
from requests import Session
|
||||
|
||||
from api.qbittorrent_client import QBittorrentClient
|
||||
from api.nomad_client import NomadClient
|
||||
from persistence.state_manager import StateManager
|
||||
from utils.time_utils import format_human_readable_time
|
||||
|
||||
class RemediationManager:
|
||||
"""Manages the remediation state machine for connection issues"""
|
||||
|
||||
def __init__(self,
|
||||
qbittorrent_client: QBittorrentClient,
|
||||
nomad_client: NomadClient,
|
||||
state_manager: StateManager,
|
||||
tracker_name: str,
|
||||
max_consecutive_failures: int = 20,
|
||||
stability_wait_time: int = 1800,
|
||||
stability_duration_required: int = 3600,
|
||||
logger: logging.Logger = None):
|
||||
"""
|
||||
Initialize remediation manager
|
||||
|
||||
Args:
|
||||
qbittorrent_client: QBittorrent client instance
|
||||
nomad_client: Nomad client instance
|
||||
state_manager: State manager instance
|
||||
tracker_name: Tracker name for torrent operations
|
||||
max_consecutive_failures: Maximum failures before remediation
|
||||
stability_wait_time: Time to wait for stability
|
||||
stability_duration_required: Required stability duration
|
||||
logger: Optional logger instance (uses module logger if not provided)
|
||||
"""
|
||||
self.qbittorrent_client = qbittorrent_client
|
||||
self.nomad_client = nomad_client
|
||||
self.state_manager = state_manager
|
||||
self.tracker_name = tracker_name
|
||||
self.max_consecutive_failures = max_consecutive_failures
|
||||
self.stability_wait_time = stability_wait_time
|
||||
self.stability_duration_required = stability_duration_required
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
|
||||
# Remediation session
|
||||
self.remediation_session: Optional[Session] = None
|
||||
|
||||
def start_remediation(self) -> bool:
|
||||
"""
|
||||
Start the remediation process (non-blocking)
|
||||
|
||||
Returns:
|
||||
True if remediation started successfully
|
||||
"""
|
||||
self.logger.warning("Connection instability detected. Starting remediation...")
|
||||
|
||||
# Login to qBittorrent
|
||||
self.remediation_session = self.qbittorrent_client.login()
|
||||
if not self.remediation_session:
|
||||
self.logger.error("Could not log in to qBittorrent. Aborting remediation.")
|
||||
return False
|
||||
|
||||
# Update state
|
||||
self.state_manager.start_remediation()
|
||||
self.logger.info(f"Remediation started. State: {self.state_manager.remediation_state}")
|
||||
return True
|
||||
|
||||
def process_remediation(self) -> bool:
|
||||
"""
|
||||
Process the current remediation state (non-blocking)
|
||||
|
||||
Returns:
|
||||
True if remediation completed successfully, False otherwise
|
||||
"""
|
||||
if self.state_manager.remediation_state is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Log detailed remediation state information
|
||||
remediation_duration = time.time() - self.state_manager.remediation_start_time
|
||||
self.logger.debug(f"Processing remediation state: {self.state_manager.remediation_state} "
|
||||
f"(duration: {format_human_readable_time(remediation_duration)})")
|
||||
|
||||
if self.state_manager.remediation_state == 'stopping_torrents':
|
||||
return self._process_stopping_torrents()
|
||||
|
||||
elif self.state_manager.remediation_state == 'restarting_nomad':
|
||||
return self._process_restarting_nomad()
|
||||
|
||||
elif self.state_manager.remediation_state == 'waiting_for_stability':
|
||||
return self._process_waiting_for_stability()
|
||||
|
||||
elif self.state_manager.remediation_state == 'restarting_torrents':
|
||||
return self._process_restarting_torrents()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error during remediation: {e}")
|
||||
self.logger.error("Resetting remediation state due to unexpected error")
|
||||
self.state_manager.reset_remediation_state()
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def _process_stopping_torrents(self) -> bool:
|
||||
"""Process stopping torrents state"""
|
||||
if self.qbittorrent_client.stop_tracker_torrents(self.remediation_session, self.tracker_name):
|
||||
self.logger.info("Torrents stopped successfully, proceeding to restart Nomad task")
|
||||
self.state_manager.update_remediation_state('restarting_nomad')
|
||||
return False
|
||||
else:
|
||||
self.logger.error("Failed to stop torrents - retrying in next cycle")
|
||||
return False
|
||||
|
||||
def _process_restarting_nomad(self) -> bool:
|
||||
"""Process restarting nomad state"""
|
||||
if self.nomad_client.restart_task_via_allocation(
|
||||
job_id="qbittorrent",
|
||||
task_name="qbittorrent"
|
||||
):
|
||||
self.logger.info("Nomad task restarted successfully, waiting for stable connectivity")
|
||||
self.state_manager.update_remediation_state('waiting_for_stability')
|
||||
return False
|
||||
else:
|
||||
self.logger.error("Nomad task restart failed - retrying in next cycle")
|
||||
return False
|
||||
|
||||
def _process_waiting_for_stability(self) -> bool:
|
||||
"""Process waiting for stability state"""
|
||||
# Check if we've exceeded the stabilization timeout
|
||||
elapsed_time = time.time() - self.state_manager.remediation_start_time
|
||||
if elapsed_time > self.stability_wait_time:
|
||||
self.logger.error(f"Stabilization timeout reached after {format_human_readable_time(elapsed_time)}")
|
||||
self.state_manager.reset_remediation_state()
|
||||
return False
|
||||
|
||||
# This state just waits - stability checking is handled in main monitor
|
||||
return False
|
||||
|
||||
def _process_restarting_torrents(self) -> bool:
|
||||
"""Process restarting torrents state"""
|
||||
try:
|
||||
if self.qbittorrent_client.restart_tracker_torrents(self.remediation_session, self.tracker_name):
|
||||
self.logger.info("Remediation completed successfully")
|
||||
self.state_manager.reset_remediation_state()
|
||||
return True
|
||||
else:
|
||||
self.logger.error("Failed to restart torrents")
|
||||
self.state_manager.reset_remediation_state()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to restart torrents: {e}")
|
||||
self.state_manager.reset_remediation_state()
|
||||
return False
|
||||
|
||||
def should_start_remediation(self) -> bool:
|
||||
"""
|
||||
Check if remediation should be started
|
||||
|
||||
Returns:
|
||||
True if remediation should be started
|
||||
"""
|
||||
return (self.state_manager.consecutive_failures >= self.max_consecutive_failures and
|
||||
self.state_manager.remediation_state is None)
|
||||
|
||||
def check_stability_requirement_met(self) -> bool:
|
||||
"""
|
||||
Check if stability requirement has been met
|
||||
|
||||
Returns:
|
||||
True if stability requirement met
|
||||
"""
|
||||
if (self.state_manager.remediation_state == 'waiting_for_stability' and
|
||||
self.state_manager.stability_start_time is not None):
|
||||
|
||||
elapsed_stable_time = time.time() - self.state_manager.stability_start_time
|
||||
if elapsed_stable_time >= self.stability_duration_required:
|
||||
self.logger.info("1 hour of stable connectivity achieved, proceeding to restart torrents")
|
||||
self.state_manager.update_remediation_state('restarting_torrents')
|
||||
self.state_manager.reset_stability_timer()
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def reset_stability_on_connection_loss(self):
|
||||
"""Reset stability timer when connection is lost"""
|
||||
if self.state_manager.stability_start_time is not None:
|
||||
self.logger.warning("Connection lost during stabilization, resetting 1-hour timer")
|
||||
self.state_manager.reset_stability_timer()
|
||||
8
persistence/__init__.py
Normal file
8
persistence/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""
|
||||
Persistence layer for state management
|
||||
"""
|
||||
|
||||
from .consul_persistence import ConsulPersistence
|
||||
from .state_manager import StateManager
|
||||
|
||||
__all__ = ['ConsulPersistence', 'StateManager']
|
||||
BIN
persistence/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
persistence/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
persistence/__pycache__/consul_persistence.cpython-313.pyc
Normal file
BIN
persistence/__pycache__/consul_persistence.cpython-313.pyc
Normal file
Binary file not shown.
BIN
persistence/__pycache__/state_manager.cpython-313.pyc
Normal file
BIN
persistence/__pycache__/state_manager.cpython-313.pyc
Normal file
Binary file not shown.
144
persistence/consul_persistence.py
Normal file
144
persistence/consul_persistence.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
try:
|
||||
import consul
|
||||
CONSUL_AVAILABLE = True
|
||||
except ImportError:
|
||||
consul = None
|
||||
CONSUL_AVAILABLE = False
|
||||
|
||||
class ConsulPersistence:
|
||||
"""Handles Consul-based state persistence for connection monitoring"""
|
||||
|
||||
def __init__(self, consul_url: str = 'http://consul.service.dc1.consul:8500'):
|
||||
"""
|
||||
Initialize Consul persistence
|
||||
|
||||
Args:
|
||||
consul_url: Consul server URL
|
||||
"""
|
||||
self.consul_url = consul_url
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.consul_client = None
|
||||
self.base_key = "qbitcheck/connection_monitor/"
|
||||
|
||||
if CONSUL_AVAILABLE:
|
||||
self._initialize_consul_client()
|
||||
else:
|
||||
self.logger.warning("python-consul package not available. State persistence disabled.")
|
||||
|
||||
def _initialize_consul_client(self) -> bool:
|
||||
"""Initialize Consul client if available"""
|
||||
if not CONSUL_AVAILABLE:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Parse URL to extract host and port
|
||||
url_parts = self.consul_url.split('://')
|
||||
if len(url_parts) < 2:
|
||||
raise ValueError(f"Invalid Consul URL format: {self.consul_url}")
|
||||
|
||||
host_port = url_parts[1].split(':')
|
||||
host = host_port[0]
|
||||
port = int(host_port[1]) if len(host_port) > 1 else 8500
|
||||
|
||||
self.consul_client = consul.Consul(host=host, port=port)
|
||||
self.logger.info(f"Consul client initialized for {self.consul_url}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to initialize Consul client: {e}")
|
||||
self.consul_client = None
|
||||
return False
|
||||
|
||||
def save_state(self, state_data: Dict[str, Any],
|
||||
remediation_data: Dict[str, Any],
|
||||
stability_data: Dict[str, Any],
|
||||
vpn_data: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Save state to Consul KV store
|
||||
|
||||
Args:
|
||||
state_data: Connection state data
|
||||
remediation_data: Remediation state data
|
||||
stability_data: Stability tracking data
|
||||
vpn_data: VPN status and IP data
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.consul_client:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Save each section to Consul
|
||||
self.consul_client.kv.put(f"{self.base_key}state", json.dumps(state_data))
|
||||
self.consul_client.kv.put(f"{self.base_key}remediation", json.dumps(remediation_data))
|
||||
self.consul_client.kv.put(f"{self.base_key}stability", json.dumps(stability_data))
|
||||
self.consul_client.kv.put(f"{self.base_key}vpn", json.dumps(vpn_data))
|
||||
|
||||
self.logger.debug("State successfully saved to Consul")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to save state to Consul: {e}")
|
||||
return False
|
||||
|
||||
def load_state(self) -> Tuple[Optional[Dict[str, Any]],
|
||||
Optional[Dict[str, Any]],
|
||||
Optional[Dict[str, Any]],
|
||||
Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Load state from Consul KV store
|
||||
|
||||
Returns:
|
||||
Tuple of (state_data, remediation_data, stability_data, vpn_data)
|
||||
"""
|
||||
if not self.consul_client:
|
||||
return None, None, None, None
|
||||
|
||||
try:
|
||||
state_data = None
|
||||
remediation_data = None
|
||||
stability_data = None
|
||||
vpn_data = None
|
||||
|
||||
# Load connection state
|
||||
_, state_kv = self.consul_client.kv.get(f"{self.base_key}state")
|
||||
if state_kv:
|
||||
state_data = json.loads(state_kv['Value'].decode('utf-8'))
|
||||
|
||||
# Load remediation state
|
||||
_, remediation_kv = self.consul_client.kv.get(f"{self.base_key}remediation")
|
||||
if remediation_kv:
|
||||
remediation_data = json.loads(remediation_kv['Value'].decode('utf-8'))
|
||||
|
||||
# Load stability tracking
|
||||
_, stability_kv = self.consul_client.kv.get(f"{self.base_key}stability")
|
||||
if stability_kv:
|
||||
stability_data = json.loads(stability_kv['Value'].decode('utf-8'))
|
||||
|
||||
# Load VPN state
|
||||
_, vpn_kv = self.consul_client.kv.get(f"{self.base_key}vpn")
|
||||
if vpn_kv:
|
||||
vpn_data = json.loads(vpn_kv['Value'].decode('utf-8'))
|
||||
|
||||
self.logger.info("State successfully loaded from Consul")
|
||||
return state_data, remediation_data, stability_data, vpn_data
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to load state from Consul: {e}")
|
||||
return None, None, None, None
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Consul persistence is available"""
|
||||
return self.consul_client is not None and CONSUL_AVAILABLE
|
||||
|
||||
def get_consul_status(self) -> Dict[str, Any]:
|
||||
"""Get Consul connection status"""
|
||||
return {
|
||||
'available': self.is_available(),
|
||||
'url': self.consul_url,
|
||||
'client_initialized': self.consul_client is not None
|
||||
}
|
||||
263
persistence/state_manager.py
Normal file
263
persistence/state_manager.py
Normal file
@@ -0,0 +1,263 @@
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from .consul_persistence import ConsulPersistence
|
||||
|
||||
class StateManager:
|
||||
"""Manages connection monitoring state with optional Consul persistence"""
|
||||
|
||||
def __init__(self, consul_url: Optional[str] = None):
|
||||
"""
|
||||
Initialize state manager
|
||||
|
||||
Args:
|
||||
consul_url: Optional Consul server URL for persistence
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize Consul persistence if URL provided
|
||||
self.consul_persistence = ConsulPersistence(consul_url) if consul_url else None
|
||||
|
||||
# Default state values
|
||||
self.connection_state = 'stable'
|
||||
self.last_state_change_time = time.time()
|
||||
self.consecutive_failures = 0
|
||||
self.consecutive_stable_checks = 0
|
||||
self.last_failure_time = None
|
||||
|
||||
# Remediation state
|
||||
self.remediation_state = None
|
||||
self.remediation_start_time = None
|
||||
self.stabilization_checks = 0
|
||||
|
||||
# Stability tracking
|
||||
self.stability_start_time = None
|
||||
|
||||
# VPN status tracking
|
||||
self.vpn_status = 'unknown'
|
||||
self.last_vpn_status_change = time.time()
|
||||
self.public_ip = None
|
||||
self.last_public_ip_change = None
|
||||
self.public_ip_details = {}
|
||||
|
||||
# Load state from Consul if available
|
||||
self._load_state()
|
||||
|
||||
def _load_state(self):
|
||||
"""Load state from persistence if available"""
|
||||
if self.consul_persistence and self.consul_persistence.is_available():
|
||||
state_data, remediation_data, stability_data, vpn_data = self.consul_persistence.load_state()
|
||||
|
||||
if state_data:
|
||||
self.connection_state = state_data.get('connection_state', 'stable')
|
||||
self.last_state_change_time = state_data.get('last_state_change_time', time.time())
|
||||
self.consecutive_failures = state_data.get('consecutive_failures', 0)
|
||||
self.consecutive_stable_checks = state_data.get('consecutive_stable_checks', 0)
|
||||
self.last_failure_time = state_data.get('last_failure_time')
|
||||
|
||||
if remediation_data:
|
||||
self.remediation_state = remediation_data.get('state')
|
||||
self.remediation_start_time = remediation_data.get('start_time')
|
||||
self.stabilization_checks = remediation_data.get('stabilization_checks', 0)
|
||||
|
||||
if stability_data:
|
||||
self.stability_start_time = stability_data.get('start_time')
|
||||
|
||||
if vpn_data:
|
||||
self.vpn_status = vpn_data.get('vpn_status', 'unknown')
|
||||
self.last_vpn_status_change = vpn_data.get('last_vpn_status_change', time.time())
|
||||
self.public_ip = vpn_data.get('public_ip')
|
||||
self.last_public_ip_change = vpn_data.get('last_public_ip_change')
|
||||
self.public_ip_details = vpn_data.get('public_ip_details', {})
|
||||
|
||||
self.logger.debug(f"Loaded state: connection={self.connection_state}, "
|
||||
f"remediation={self.remediation_state}, "
|
||||
f"failures={self.consecutive_failures}, "
|
||||
f"vpn={self.vpn_status}")
|
||||
|
||||
def save_state(self):
|
||||
"""Save current state to persistence if available"""
|
||||
if self.consul_persistence and self.consul_persistence.is_available():
|
||||
state_data = {
|
||||
'connection_state': self.connection_state,
|
||||
'last_state_change_time': self.last_state_change_time,
|
||||
'consecutive_failures': self.consecutive_failures,
|
||||
'consecutive_stable_checks': self.consecutive_stable_checks,
|
||||
'last_failure_time': self.last_failure_time
|
||||
}
|
||||
|
||||
remediation_data = {
|
||||
'state': self.remediation_state,
|
||||
'start_time': self.remediation_start_time,
|
||||
'stabilization_checks': self.stabilization_checks
|
||||
}
|
||||
|
||||
stability_data = {
|
||||
'start_time': self.stability_start_time
|
||||
}
|
||||
|
||||
# VPN state data
|
||||
vpn_data = {
|
||||
'vpn_status': self.vpn_status,
|
||||
'last_vpn_status_change': self.last_vpn_status_change,
|
||||
'public_ip': self.public_ip,
|
||||
'last_public_ip_change': self.last_public_ip_change,
|
||||
'public_ip_details': self.public_ip_details
|
||||
}
|
||||
|
||||
return self.consul_persistence.save_state(state_data, remediation_data, stability_data, vpn_data)
|
||||
return False
|
||||
|
||||
def reset_remediation_state(self):
|
||||
"""Reset remediation state to initial values"""
|
||||
self.remediation_state = None
|
||||
self.remediation_start_time = None
|
||||
self.stabilization_checks = 0
|
||||
self.stability_start_time = None
|
||||
self.save_state()
|
||||
|
||||
def start_remediation(self):
|
||||
"""Start remediation process"""
|
||||
self.remediation_state = 'stopping_torrents'
|
||||
self.remediation_start_time = time.time()
|
||||
self.stabilization_checks = 0
|
||||
self.save_state()
|
||||
|
||||
def update_remediation_state(self, new_state: str):
|
||||
"""Update remediation state"""
|
||||
self.remediation_state = new_state
|
||||
self.save_state()
|
||||
|
||||
def increment_stabilization_checks(self):
|
||||
"""Increment stabilization check counter"""
|
||||
self.stabilization_checks += 1
|
||||
self.save_state()
|
||||
|
||||
def start_stability_timer(self):
|
||||
"""Start stability timer"""
|
||||
self.stability_start_time = time.time()
|
||||
self.save_state()
|
||||
|
||||
def reset_stability_timer(self):
|
||||
"""Reset stability timer"""
|
||||
self.stability_start_time = None
|
||||
self.save_state()
|
||||
|
||||
def update_vpn_status(self, vpn_status: str):
|
||||
"""Update VPN status and track changes"""
|
||||
if vpn_status != self.vpn_status:
|
||||
self.vpn_status = vpn_status
|
||||
self.last_vpn_status_change = time.time()
|
||||
self.save_state()
|
||||
|
||||
def update_public_ip(self, public_ip: str, ip_details: Dict[str, Any]):
|
||||
"""Update public IP and track changes"""
|
||||
if public_ip != self.public_ip:
|
||||
self.public_ip = public_ip
|
||||
self.public_ip_details = ip_details
|
||||
self.last_public_ip_change = time.time()
|
||||
self.save_state()
|
||||
|
||||
def get_state_summary(self) -> Dict[str, Any]:
|
||||
"""Get summary of current state"""
|
||||
return {
|
||||
'connection_state': self.connection_state,
|
||||
'consecutive_failures': self.consecutive_failures,
|
||||
'remediation_state': self.remediation_state,
|
||||
'stability_start_time': self.stability_start_time,
|
||||
'consul_available': self.consul_persistence.is_available() if self.consul_persistence else False
|
||||
}
|
||||
|
||||
def get_debug_metrics(self) -> Dict[str, Any]:
|
||||
"""Get debug metrics for logging in improved format with color support"""
|
||||
current_time = time.time()
|
||||
|
||||
# ANSI color codes
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BLUE = '\033[94m'
|
||||
MAGENTA = '\033[95m'
|
||||
CYAN = '\033[96m'
|
||||
RESET = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
|
||||
# Helper function for compact duration formatting
|
||||
def format_compact_duration(seconds: float) -> str:
|
||||
if seconds == 0:
|
||||
return "0s"
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
if hours > 0:
|
||||
return f"{hours}h{minutes:02d}m{secs:02d}s"
|
||||
elif minutes > 0:
|
||||
return f"{minutes}m{secs:02d}s"
|
||||
else:
|
||||
return f"{secs}s"
|
||||
|
||||
# Build formatted metrics with colors
|
||||
metrics = []
|
||||
colored_metrics = []
|
||||
|
||||
# Connection state
|
||||
state_duration = current_time - self.last_state_change_time
|
||||
state_color = GREEN if self.connection_state == 'stable' else RED
|
||||
metrics.append(f"Connection: {self.connection_state} ({format_compact_duration(state_duration)})")
|
||||
colored_metrics.append(f"{BOLD}Connection:{RESET} {state_color}{self.connection_state} ({format_compact_duration(state_duration)}){RESET}")
|
||||
|
||||
# Failures
|
||||
failure_color = RED if self.consecutive_failures > 0 else GREEN
|
||||
failure_info = f"Failures: {self.consecutive_failures}"
|
||||
last_failure_info = f" (Last: {GREEN}Never{RESET})"
|
||||
if self.last_failure_time:
|
||||
time_since_failure = current_time - self.last_failure_time
|
||||
failure_recency_color = GREEN if time_since_failure > 300 else YELLOW # Green after 5min
|
||||
last_failure_info = f" (Last: {failure_recency_color}{format_compact_duration(time_since_failure)} ago{RESET})"
|
||||
metrics.append(f"{failure_info}{last_failure_info.replace(RESET, '').replace(GREEN, '').replace(YELLOW, '')}")
|
||||
colored_metrics.append(f"{BOLD}Failures:{RESET} {failure_color}{failure_info}{RESET}{last_failure_info}")
|
||||
|
||||
# Remediation
|
||||
remediation_color = YELLOW if self.remediation_state else GREEN
|
||||
remediation_info = f"Remediation: {self.remediation_state or 'None'}"
|
||||
if self.remediation_state:
|
||||
remediation_duration = current_time - self.remediation_start_time
|
||||
remediation_info += f" ({format_compact_duration(remediation_duration)})"
|
||||
metrics.append(remediation_info)
|
||||
colored_metrics.append(f"{BOLD}Remediation:{RESET} {remediation_color}{remediation_info}{RESET}")
|
||||
|
||||
# Stability timer
|
||||
stability_info = "Stability: Not active"
|
||||
if self.stability_start_time:
|
||||
elapsed_stable = current_time - self.stability_start_time
|
||||
stability_color = GREEN if elapsed_stable >= 1800 else YELLOW # Green after 30min
|
||||
stability_info = f"Stability: {stability_color}{format_compact_duration(elapsed_stable)}{RESET}"
|
||||
else:
|
||||
stability_info = f"{CYAN}Stability: Not active{RESET}"
|
||||
metrics.append(stability_info.replace(RESET, '').replace(GREEN, '').replace(YELLOW, '').replace(CYAN, ''))
|
||||
colored_metrics.append(f"{BOLD}Stability:{RESET} {stability_info}")
|
||||
|
||||
# VPN status
|
||||
vpn_color = GREEN if self.vpn_status == 'running' else RED
|
||||
vpn_info = f"VPN Uptime: {format_compact_duration(current_time - self.last_vpn_status_change)}"
|
||||
metrics.append(vpn_info)
|
||||
colored_metrics.append(f"{BOLD}VPN:{RESET} {vpn_color}{vpn_info}{RESET}")
|
||||
|
||||
# Public IP
|
||||
ip_color = GREEN if self.public_ip and self.public_ip != 'unknown' else YELLOW
|
||||
ip_info = f"IP: {self.public_ip or 'unknown'}"
|
||||
if self.public_ip and self.last_public_ip_change:
|
||||
ip_duration = current_time - self.last_public_ip_change
|
||||
ip_info += f" ({format_compact_duration(ip_duration)})"
|
||||
metrics.append(ip_info)
|
||||
colored_metrics.append(f"{BOLD}IP:{RESET} {ip_color}{ip_info}{RESET}")
|
||||
|
||||
return {
|
||||
'multiline': "\n ".join([""] + colored_metrics), # For hierarchical format with colors
|
||||
'compact': " | ".join(metrics) # For single-line format without colors
|
||||
}
|
||||
|
||||
def _format_duration(self, seconds: float) -> str:
|
||||
"""Format duration in human-readable format"""
|
||||
from utils.time_utils import format_human_readable_time
|
||||
return format_human_readable_time(seconds)
|
||||
14
utils/__init__.py
Normal file
14
utils/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Utility functions for qBittorrent connection monitoring
|
||||
"""
|
||||
|
||||
from .time_utils import format_human_readable_time, calculate_time_remaining
|
||||
from .formatters import format_connection_status, format_debug_metrics, format_torrent_list
|
||||
|
||||
__all__ = [
|
||||
'format_human_readable_time',
|
||||
'calculate_time_remaining',
|
||||
'format_connection_status',
|
||||
'format_debug_metrics',
|
||||
'format_torrent_list'
|
||||
]
|
||||
BIN
utils/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
utils/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/formatters.cpython-313.pyc
Normal file
BIN
utils/__pycache__/formatters.cpython-313.pyc
Normal file
Binary file not shown.
BIN
utils/__pycache__/time_utils.cpython-313.pyc
Normal file
BIN
utils/__pycache__/time_utils.cpython-313.pyc
Normal file
Binary file not shown.
56
utils/formatters.py
Normal file
56
utils/formatters.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
def format_connection_status(status: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format connection status for logging
|
||||
|
||||
Args:
|
||||
status: Connection status dictionary
|
||||
|
||||
Returns:
|
||||
Formatted status string
|
||||
"""
|
||||
connection_status = status.get('connection_status', 'unknown')
|
||||
dht_nodes = status.get('dht_nodes', 0)
|
||||
return f"Status: {connection_status}, DHT Nodes: {dht_nodes}"
|
||||
|
||||
def format_debug_metrics(metrics: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format debug metrics for logging
|
||||
|
||||
Args:
|
||||
metrics: Dictionary of debug metrics
|
||||
|
||||
Returns:
|
||||
Formatted debug metrics string
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if 'transition_info' in metrics:
|
||||
parts.append(metrics['transition_info'])
|
||||
if 'failure_info' in metrics:
|
||||
parts.append(metrics['failure_info'])
|
||||
if 'last_failure_info' in metrics:
|
||||
parts.append(metrics['last_failure_info'])
|
||||
if 'remediation_info' in metrics:
|
||||
parts.append(metrics['remediation_info'])
|
||||
if 'stability_info' in metrics:
|
||||
parts.append(metrics['stability_info'])
|
||||
|
||||
return " - ".join(parts)
|
||||
|
||||
def format_torrent_list(torrents: list) -> str:
|
||||
"""
|
||||
Format torrent list for logging
|
||||
|
||||
Args:
|
||||
torrents: List of torrent dictionaries
|
||||
|
||||
Returns:
|
||||
Formatted torrent list string
|
||||
"""
|
||||
if not torrents:
|
||||
return "No torrents"
|
||||
|
||||
hashes = [torrent.get('hash', 'unknown') for torrent in torrents]
|
||||
return f"{len(hashes)} torrents: {'|'.join(hashes[:3])}{'...' if len(hashes) > 3 else ''}"
|
||||
49
utils/time_utils.py
Normal file
49
utils/time_utils.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
|
||||
def format_human_readable_time(seconds: float) -> str:
|
||||
"""
|
||||
Convert seconds to human-readable time format using datetime.timedelta
|
||||
|
||||
Args:
|
||||
seconds: Number of seconds to format
|
||||
|
||||
Returns:
|
||||
Human-readable time string (e.g., "1 day 2 hours 3 minutes 4 seconds")
|
||||
"""
|
||||
td = timedelta(seconds=seconds)
|
||||
|
||||
# Extract components
|
||||
days = td.days
|
||||
hours, remainder = divmod(td.seconds, 3600)
|
||||
minutes, seconds_remaining = divmod(remainder, 60)
|
||||
|
||||
# Build the human-readable string
|
||||
parts = []
|
||||
if days > 0:
|
||||
parts.append(f"{days} day" if days == 1 else f"{days} days")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} hour" if hours == 1 else f"{hours} hours")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} minute" if minutes == 1 else f"{minutes} minutes")
|
||||
if seconds_remaining > 0 or not parts: # Include seconds if no other parts or if seconds exist
|
||||
parts.append(f"{seconds_remaining} second" if seconds_remaining == 1 else f"{seconds_remaining} seconds")
|
||||
|
||||
return " ".join(parts)
|
||||
|
||||
def calculate_time_remaining(start_time: float, duration_required: float, current_time: Optional[float] = None) -> float:
|
||||
"""
|
||||
Calculate remaining time for a duration requirement
|
||||
|
||||
Args:
|
||||
start_time: When the timer started
|
||||
duration_required: Total duration required
|
||||
current_time: Current time (defaults to time.time())
|
||||
|
||||
Returns:
|
||||
Remaining time in seconds (0 if elapsed time exceeds requirement)
|
||||
"""
|
||||
import time
|
||||
current = current_time or time.time()
|
||||
elapsed = current - start_time
|
||||
return max(0, duration_required - elapsed)
|
||||
Reference in New Issue
Block a user