conductor(checkpoint): Checkpoint end of Phase 1

This commit is contained in:
2026-02-08 11:15:55 -08:00
parent 22ec8a5cc0
commit 860000bd04
8 changed files with 204 additions and 50 deletions

View File

@@ -1,5 +1,6 @@
import subprocess
import re
import sys
def get_node_map():
"""
@@ -18,8 +19,14 @@ def get_node_map():
if len(parts) >= 4:
node_map[parts[0]] = parts[3]
return node_map
except FileNotFoundError:
print("Warning: 'nomad' binary not found in PATH.", file=sys.stderr)
return {}
except subprocess.CalledProcessError as e:
print(f"Warning: Failed to query Nomad nodes: {e}", file=sys.stderr)
return {}
except Exception as e:
print(f"Error getting node map: {e}")
print(f"Error getting node map: {e}", file=sys.stderr)
return {}
def get_allocation_id(node_name, job_id):
@@ -57,8 +64,10 @@ def get_allocation_id(node_name, job_id):
return l.split("=")[1].strip()
return alloc_id
except FileNotFoundError:
return None # Warning already printed by get_node_map likely
except Exception as e:
print(f"Error getting allocation ID: {e}")
print(f"Error getting allocation ID: {e}", file=sys.stderr)
return None
@@ -81,7 +90,23 @@ def get_allocation_logs(alloc_id, tail=20):
)
return result.stdout
except Exception as e:
return f"Error fetching logs: {e}"
# Don't print stack trace, just the error
return f"Nomad Error: {str(e)}"
def exec_command(alloc_id, command, task="navidrome"):
"""
Executes a command inside a specific allocation and task.
"""
try:
args = ["nomad", "alloc", "exec", "-task", task, alloc_id] + command
result = subprocess.run(
args,
capture_output=True, text=True, check=True
)
return result.stdout
except Exception as e:
# Don't print stack trace, just return error string
return f"Nomad Error: {str(e)}"
def restart_allocation(alloc_id):
"""
@@ -94,5 +119,5 @@ def restart_allocation(alloc_id):
)
return True
except Exception as e:
print(f"Error restarting allocation: {e}")
print(f"Error restarting allocation: {e}", file=sys.stderr)
return False