docs(conductor): Synchronize tech-stack and commit monitor script updates
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
def get_node_map():
|
||||
"""
|
||||
@@ -31,7 +32,7 @@ def get_node_map():
|
||||
|
||||
def get_job_allocations(job_id):
|
||||
"""
|
||||
Returns a list of all active allocations for a job with their IPs.
|
||||
Returns a list of all active allocations for a job with their IPs and uptimes.
|
||||
"""
|
||||
try:
|
||||
# 1. Get list of allocations
|
||||
@@ -56,8 +57,10 @@ def get_job_allocations(job_id):
|
||||
if any(p == "running" for p in parts[3:]):
|
||||
alloc_ids.append(alloc_id)
|
||||
|
||||
# 2. For each allocation, get its IP
|
||||
# 2. For each allocation, get its IP and Uptime
|
||||
allocations = []
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
for alloc_id in alloc_ids:
|
||||
res_alloc = subprocess.run(
|
||||
["nomad", "alloc", "status", alloc_id],
|
||||
@@ -67,6 +70,7 @@ def get_job_allocations(job_id):
|
||||
node_name = ""
|
||||
ip = ""
|
||||
full_id = alloc_id
|
||||
uptime = "N/A"
|
||||
|
||||
for l in res_alloc.stdout.splitlines():
|
||||
if l.startswith("ID") and "=" in l:
|
||||
@@ -79,11 +83,32 @@ def get_job_allocations(job_id):
|
||||
m = re.search(r"(\d+\.\d+\.\d+\.\d+):", l)
|
||||
if m:
|
||||
ip = m.group(1)
|
||||
|
||||
# Extract Uptime from Started At
|
||||
if "Started At" in l and "=" in l:
|
||||
# e.g. "Started At = 2026-02-09T14:04:28Z"
|
||||
ts_str = l.split("=")[1].strip()
|
||||
if ts_str and ts_str != "N/A":
|
||||
try:
|
||||
# Parse ISO timestamp
|
||||
started_at = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
||||
duration = now - started_at
|
||||
# Format duration
|
||||
secs = int(duration.total_seconds())
|
||||
if secs < 60:
|
||||
uptime = f"{secs}s"
|
||||
elif secs < 3600:
|
||||
uptime = f"{secs//60}m{secs%60}s"
|
||||
else:
|
||||
uptime = f"{secs//3600}h{(secs%3600)//60}m"
|
||||
except Exception:
|
||||
uptime = ts_str
|
||||
|
||||
allocations.append({
|
||||
"id": full_id,
|
||||
"node": node_name,
|
||||
"ip": ip
|
||||
"ip": ip,
|
||||
"uptime": uptime
|
||||
})
|
||||
|
||||
return allocations
|
||||
@@ -183,4 +208,4 @@ def restart_allocation(alloc_id):
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error restarting allocation: {e}", file=sys.stderr)
|
||||
return False
|
||||
return False
|
||||
Reference in New Issue
Block a user