189 lines
8.1 KiB
Python
189 lines
8.1 KiB
Python
import logging
|
|
import time
|
|
from typing import Optional
|
|
from requests import Session
|
|
|
|
from api.qbittorrent_client import QBittorrentClient
|
|
from api.nomad_client import NomadClient
|
|
from persistence.state_manager import StateManager
|
|
from utils.time_utils import format_human_readable_time
|
|
|
|
class RemediationManager:
|
|
"""Manages the remediation state machine for connection issues"""
|
|
|
|
def __init__(self,
|
|
qbittorrent_client: QBittorrentClient,
|
|
nomad_client: NomadClient,
|
|
state_manager: StateManager,
|
|
tracker_name: str,
|
|
max_consecutive_failures: int = 20,
|
|
stability_wait_time: int = 1800,
|
|
stability_duration_required: int = 3600,
|
|
logger: logging.Logger = None):
|
|
"""
|
|
Initialize remediation manager
|
|
|
|
Args:
|
|
qbittorrent_client: QBittorrent client instance
|
|
nomad_client: Nomad client instance
|
|
state_manager: State manager instance
|
|
tracker_name: Tracker name for torrent operations
|
|
max_consecutive_failures: Maximum failures before remediation
|
|
stability_wait_time: Time to wait for stability
|
|
stability_duration_required: Required stability duration
|
|
logger: Optional logger instance (uses module logger if not provided)
|
|
"""
|
|
self.qbittorrent_client = qbittorrent_client
|
|
self.nomad_client = nomad_client
|
|
self.state_manager = state_manager
|
|
self.tracker_name = tracker_name
|
|
self.max_consecutive_failures = max_consecutive_failures
|
|
self.stability_wait_time = stability_wait_time
|
|
self.stability_duration_required = stability_duration_required
|
|
self.logger = logger or logging.getLogger(__name__)
|
|
|
|
# Remediation session
|
|
self.remediation_session: Optional[Session] = None
|
|
|
|
def start_remediation(self) -> bool:
|
|
"""
|
|
Start the remediation process (non-blocking)
|
|
|
|
Returns:
|
|
True if remediation started successfully
|
|
"""
|
|
self.logger.warning("Connection instability detected. Starting remediation...")
|
|
|
|
# Login to qBittorrent
|
|
self.remediation_session = self.qbittorrent_client.login()
|
|
if not self.remediation_session:
|
|
self.logger.error("Could not log in to qBittorrent. Aborting remediation.")
|
|
return False
|
|
|
|
# Update state
|
|
self.state_manager.start_remediation()
|
|
self.logger.info(f"Remediation started. State: {self.state_manager.remediation_state}")
|
|
return True
|
|
|
|
def process_remediation(self) -> bool:
|
|
"""
|
|
Process the current remediation state (non-blocking)
|
|
|
|
Returns:
|
|
True if remediation completed successfully, False otherwise
|
|
"""
|
|
if self.state_manager.remediation_state is None:
|
|
return False
|
|
|
|
try:
|
|
# Log detailed remediation state information
|
|
remediation_duration = time.time() - self.state_manager.remediation_start_time
|
|
self.logger.debug(f"Processing remediation state: {self.state_manager.remediation_state} "
|
|
f"(duration: {format_human_readable_time(remediation_duration)})")
|
|
|
|
if self.state_manager.remediation_state == 'stopping_torrents':
|
|
return self._process_stopping_torrents()
|
|
|
|
elif self.state_manager.remediation_state == 'restarting_nomad':
|
|
return self._process_restarting_nomad()
|
|
|
|
elif self.state_manager.remediation_state == 'waiting_for_stability':
|
|
return self._process_waiting_for_stability()
|
|
|
|
elif self.state_manager.remediation_state == 'restarting_torrents':
|
|
return self._process_restarting_torrents()
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Unexpected error during remediation: {e}")
|
|
self.logger.error("Resetting remediation state due to unexpected error")
|
|
self.state_manager.reset_remediation_state()
|
|
return False
|
|
|
|
return False
|
|
|
|
def _process_stopping_torrents(self) -> bool:
|
|
"""Process stopping torrents state"""
|
|
if self.qbittorrent_client.stop_tracker_torrents(self.remediation_session, self.tracker_name):
|
|
self.logger.info("Torrents stopped successfully, proceeding to restart Nomad task")
|
|
self.state_manager.update_remediation_state('restarting_nomad')
|
|
return False
|
|
else:
|
|
self.logger.error("Failed to stop torrents - retrying in next cycle")
|
|
return False
|
|
|
|
def _process_restarting_nomad(self) -> bool:
|
|
"""Process restarting nomad state"""
|
|
if self.nomad_client.restart_task_via_allocation(
|
|
job_id="qbittorrent",
|
|
task_name="qbittorrent"
|
|
):
|
|
self.logger.info("Nomad task restarted successfully, waiting for stable connectivity")
|
|
self.state_manager.update_remediation_state('waiting_for_stability')
|
|
return False
|
|
else:
|
|
self.logger.error("Nomad task restart failed - retrying in next cycle")
|
|
return False
|
|
|
|
def _process_waiting_for_stability(self) -> bool:
|
|
"""Process waiting for stability state"""
|
|
# Check if we've exceeded the stabilization timeout
|
|
elapsed_time = time.time() - self.state_manager.remediation_start_time
|
|
if elapsed_time > self.stability_wait_time:
|
|
self.logger.error(f"Stabilization timeout reached after {format_human_readable_time(elapsed_time)}")
|
|
self.state_manager.reset_remediation_state()
|
|
return False
|
|
|
|
# This state just waits - stability checking is handled in main monitor
|
|
return False
|
|
|
|
def _process_restarting_torrents(self) -> bool:
|
|
"""Process restarting torrents state"""
|
|
try:
|
|
if self.qbittorrent_client.restart_tracker_torrents(self.remediation_session, self.tracker_name):
|
|
self.logger.info("Remediation completed successfully")
|
|
self.state_manager.reset_remediation_state()
|
|
return True
|
|
else:
|
|
self.logger.error("Failed to restart torrents")
|
|
self.state_manager.reset_remediation_state()
|
|
return False
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to restart torrents: {e}")
|
|
self.state_manager.reset_remediation_state()
|
|
return False
|
|
|
|
def should_start_remediation(self) -> bool:
|
|
"""
|
|
Check if remediation should be started
|
|
|
|
Returns:
|
|
True if remediation should be started
|
|
"""
|
|
return (self.state_manager.consecutive_failures >= self.max_consecutive_failures and
|
|
self.state_manager.remediation_state is None)
|
|
|
|
def check_stability_requirement_met(self) -> bool:
|
|
"""
|
|
Check if stability requirement has been met
|
|
|
|
Returns:
|
|
True if stability requirement met
|
|
"""
|
|
if (self.state_manager.remediation_state == 'waiting_for_stability' and
|
|
self.state_manager.stability_start_time is not None):
|
|
|
|
elapsed_stable_time = time.time() - self.state_manager.stability_start_time
|
|
if elapsed_stable_time >= self.stability_duration_required:
|
|
self.logger.info("1 hour of stable connectivity achieved, proceeding to restart torrents")
|
|
self.state_manager.update_remediation_state('restarting_torrents')
|
|
self.state_manager.reset_stability_timer()
|
|
return True
|
|
|
|
return False
|
|
|
|
def reset_stability_on_connection_loss(self):
|
|
"""Reset stability timer when connection is lost"""
|
|
if self.state_manager.stability_start_time is not None:
|
|
self.logger.warning("Connection lost during stabilization, resetting 1-hour timer")
|
|
self.state_manager.reset_stability_timer() |