import logging import time from typing import Optional from requests import Session from api.qbittorrent_client import QBittorrentClient from api.nomad_client import NomadClient from persistence.state_manager import StateManager from utils.time_utils import format_human_readable_time class RemediationManager: """Manages the remediation state machine for connection issues""" def __init__(self, qbittorrent_client: QBittorrentClient, nomad_client: NomadClient, state_manager: StateManager, tracker_name: str, max_consecutive_failures: int = 20, stability_wait_time: int = 1800, stability_duration_required: int = 3600, logger: logging.Logger = None): """ Initialize remediation manager Args: qbittorrent_client: QBittorrent client instance nomad_client: Nomad client instance state_manager: State manager instance tracker_name: Tracker name for torrent operations max_consecutive_failures: Maximum failures before remediation stability_wait_time: Time to wait for stability stability_duration_required: Required stability duration logger: Optional logger instance (uses module logger if not provided) """ self.qbittorrent_client = qbittorrent_client self.nomad_client = nomad_client self.state_manager = state_manager self.tracker_name = tracker_name self.max_consecutive_failures = max_consecutive_failures self.stability_wait_time = stability_wait_time self.stability_duration_required = stability_duration_required self.logger = logger or logging.getLogger(__name__) # Remediation session self.remediation_session: Optional[Session] = None def start_remediation(self) -> bool: """ Start the remediation process (non-blocking) Returns: True if remediation started successfully """ self.logger.warning("Connection instability detected. Starting remediation...") # Login to qBittorrent self.remediation_session = self.qbittorrent_client.login() if not self.remediation_session: self.logger.error("Could not log in to qBittorrent. Aborting remediation.") return False # Update state self.state_manager.start_remediation() self.logger.info(f"Remediation started. State: {self.state_manager.remediation_state}") return True def process_remediation(self) -> bool: """ Process the current remediation state (non-blocking) Returns: True if remediation completed successfully, False otherwise """ if self.state_manager.remediation_state is None: return False try: # Log detailed remediation state information remediation_duration = time.time() - self.state_manager.remediation_start_time self.logger.debug(f"Processing remediation state: {self.state_manager.remediation_state} " f"(duration: {format_human_readable_time(remediation_duration)})") if self.state_manager.remediation_state == 'stopping_torrents': return self._process_stopping_torrents() elif self.state_manager.remediation_state == 'restarting_nomad': return self._process_restarting_nomad() elif self.state_manager.remediation_state == 'waiting_for_stability': return self._process_waiting_for_stability() elif self.state_manager.remediation_state == 'restarting_torrents': return self._process_restarting_torrents() except Exception as e: self.logger.error(f"Unexpected error during remediation: {e}") self.logger.error("Resetting remediation state due to unexpected error") self.state_manager.reset_remediation_state() return False return False def _process_stopping_torrents(self) -> bool: """Process stopping torrents state""" if self.qbittorrent_client.stop_tracker_torrents(self.remediation_session, self.tracker_name): self.logger.info("Torrents stopped successfully, proceeding to restart Nomad task") self.state_manager.update_remediation_state('restarting_nomad') return False else: self.logger.error("Failed to stop torrents - retrying in next cycle") return False def _process_restarting_nomad(self) -> bool: """Process restarting nomad state""" if self.nomad_client.restart_task_via_allocation( job_id="qbittorrent", task_name="qbittorrent" ): self.logger.info("Nomad task restarted successfully, waiting for stable connectivity") self.state_manager.update_remediation_state('waiting_for_stability') return False else: self.logger.error("Nomad task restart failed - retrying in next cycle") return False def _process_waiting_for_stability(self) -> bool: """Process waiting for stability state""" # Check if we've exceeded the stabilization timeout elapsed_time = time.time() - self.state_manager.remediation_start_time if elapsed_time > self.stability_wait_time: self.logger.error(f"Stabilization timeout reached after {format_human_readable_time(elapsed_time)}") self.state_manager.reset_remediation_state() return False # This state just waits - stability checking is handled in main monitor return False def _process_restarting_torrents(self) -> bool: """Process restarting torrents state""" try: if self.qbittorrent_client.restart_tracker_torrents(self.remediation_session, self.tracker_name): self.logger.info("Remediation completed successfully") self.state_manager.reset_remediation_state() return True else: self.logger.error("Failed to restart torrents") self.state_manager.reset_remediation_state() return False except Exception as e: self.logger.error(f"Failed to restart torrents: {e}") self.state_manager.reset_remediation_state() return False def should_start_remediation(self) -> bool: """ Check if remediation should be started Returns: True if remediation should be started """ return (self.state_manager.consecutive_failures >= self.max_consecutive_failures and self.state_manager.remediation_state is None) def check_stability_requirement_met(self) -> bool: """ Check if stability requirement has been met Returns: True if stability requirement met """ if (self.state_manager.remediation_state == 'waiting_for_stability' and self.state_manager.stability_start_time is not None): elapsed_stable_time = time.time() - self.state_manager.stability_start_time if elapsed_stable_time >= self.stability_duration_required: self.logger.info("1 hour of stable connectivity achieved, proceeding to restart torrents") self.state_manager.update_remediation_state('restarting_torrents') self.state_manager.reset_stability_timer() return True return False def reset_stability_on_connection_loss(self): """Reset stability timer when connection is lost""" if self.state_manager.stability_start_time is not None: self.logger.warning("Connection lost during stabilization, resetting 1-hour timer") self.state_manager.reset_stability_timer()