conductor(checkpoint): Checkpoint end of Phase 1
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
|
||||
def get_node_map():
|
||||
"""
|
||||
@@ -18,8 +19,14 @@ def get_node_map():
|
||||
if len(parts) >= 4:
|
||||
node_map[parts[0]] = parts[3]
|
||||
return node_map
|
||||
except FileNotFoundError:
|
||||
print("Warning: 'nomad' binary not found in PATH.", file=sys.stderr)
|
||||
return {}
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Warning: Failed to query Nomad nodes: {e}", file=sys.stderr)
|
||||
return {}
|
||||
except Exception as e:
|
||||
print(f"Error getting node map: {e}")
|
||||
print(f"Error getting node map: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
def get_allocation_id(node_name, job_id):
|
||||
@@ -57,8 +64,10 @@ def get_allocation_id(node_name, job_id):
|
||||
return l.split("=")[1].strip()
|
||||
return alloc_id
|
||||
|
||||
except FileNotFoundError:
|
||||
return None # Warning already printed by get_node_map likely
|
||||
except Exception as e:
|
||||
print(f"Error getting allocation ID: {e}")
|
||||
print(f"Error getting allocation ID: {e}", file=sys.stderr)
|
||||
|
||||
return None
|
||||
|
||||
@@ -81,7 +90,23 @@ def get_allocation_logs(alloc_id, tail=20):
|
||||
)
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
return f"Error fetching logs: {e}"
|
||||
# Don't print stack trace, just the error
|
||||
return f"Nomad Error: {str(e)}"
|
||||
|
||||
def exec_command(alloc_id, command, task="navidrome"):
|
||||
"""
|
||||
Executes a command inside a specific allocation and task.
|
||||
"""
|
||||
try:
|
||||
args = ["nomad", "alloc", "exec", "-task", task, alloc_id] + command
|
||||
result = subprocess.run(
|
||||
args,
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
# Don't print stack trace, just return error string
|
||||
return f"Nomad Error: {str(e)}"
|
||||
|
||||
def restart_allocation(alloc_id):
|
||||
"""
|
||||
@@ -94,5 +119,5 @@ def restart_allocation(alloc_id):
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error restarting allocation: {e}")
|
||||
print(f"Error restarting allocation: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user