Files
NomadBackup/backup.py
2025-11-21 13:07:40 -08:00

244 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""
Backup HashiCorp Nomad job specifications.
Compatible with Nomad 1.10.2
"""
import requests
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
class NomadBackup:
def __init__(self, nomad_addr: str = "http://localhost:4646", token: Optional[str] = None):
"""
Initialize Nomad backup client.
Args:
nomad_addr: Nomad API address (default: http://localhost:4646)
token: Nomad ACL token if authentication is enabled
"""
self.nomad_addr = nomad_addr.rstrip('/')
self.headers = {}
if token:
self.headers['X-Nomad-Token'] = token
def get_jobs(self) -> List[Dict]:
"""Retrieve list of all jobs from Nomad."""
url = f"{self.nomad_addr}/v1/jobs"
try:
resp = requests.get(url, headers=self.headers)
resp.raise_for_status()
return resp.json()
except requests.exceptions.RequestException as e:
print(f"Error retrieving jobs: {e}")
sys.exit(1)
def get_job_version(self, job_id: str) -> Optional[int]:
"""Get the current version of a job."""
url = f"{self.nomad_addr}/v1/job/{job_id}"
try:
resp = requests.get(url, headers=self.headers)
resp.raise_for_status()
job_info = resp.json()
return job_info.get('Version')
except requests.exceptions.RequestException as e:
print(f"Error retrieving job version for {job_id}: {e}")
return None
def get_job_spec(self, job_id: str) -> tuple:
"""
Retrieve original job submission (what was actually defined).
Args:
job_id: The job ID to retrieve
Returns:
Tuple of (job specification, format) where format is 'hcl' or 'json'
"""
# First get the current version
version = self.get_job_version(job_id)
if version is None:
return None, None
# Use submission endpoint to get original jobspec without defaults
url = f"{self.nomad_addr}/v1/job/{job_id}/submission"
params = {'version': version}
try:
resp = requests.get(url, headers=self.headers, params=params)
resp.raise_for_status()
submission = resp.json()
# Check the Format field to determine if it was HCL or JSON
job_format = submission.get('Format', 'json').lower()
# The submission endpoint returns metadata wrapper
# Extract the actual job spec from the Source field
if 'Source' in submission and submission['Source']:
source = submission['Source']
if job_format in ['hcl', 'hcl2']:
# Return HCL as-is (it's already a string with the HCL content)
return source, 'hcl'
else:
# Try to parse as JSON
try:
return json.loads(source), 'json'
except json.JSONDecodeError:
print(f" Warning: Could not parse Source as JSON for {job_id}, using Job field instead")
if 'Job' in submission:
return submission['Job'], 'json'
return submission, 'json'
elif 'Job' in submission:
# Some submissions may have Job field directly (always JSON)
return submission['Job'], 'json'
else:
print(f" Warning: No Source or Job found in submission for {job_id}, returning full submission")
return submission, 'json'
except requests.exceptions.RequestException as e:
print(f"Error retrieving job {job_id}: {e}")
return None, None
def sanitize_filename(self, job_id: str) -> str:
"""Convert job ID to safe filename."""
# Replace slashes and other problematic characters
return job_id.replace('/', '_').replace('\\', '_').replace(':', '_')
def backup_jobs(self, output_dir: str = "nomad_backup"):
"""
Backup all Nomad jobs to individual JSON/HCL files.
Args:
output_dir: Directory to save backup files
"""
# Create output directory (without timestamp for Git version control)
backup_path = Path(output_dir)
backup_path.mkdir(parents=True, exist_ok=True)
print(f"Backing up Nomad jobs to: {backup_path}")
# Get all jobs
jobs = self.get_jobs()
print(f"Found {len(jobs)} jobs")
# Filter out periodic child jobs (they don't have submissions)
# Periodic children have IDs like "parent/periodic-timestamp"
parent_jobs = []
for job in jobs:
job_id = job['ID']
# Skip if this looks like a periodic child job
if '/periodic-' in job_id or job.get('ParentID'):
print(f"Skipping periodic child job: {job_id}")
continue
parent_jobs.append(job)
print(f"Backing up {len(parent_jobs)} parent jobs (excluding periodic children)")
success_count = 0
failed_jobs = []
# Backup each job
for job in parent_jobs:
job_id = job['ID']
job_name = job.get('Name', job_id)
print(f"Backing up job: {job_name} ({job_id})")
# Get job specification
job_spec, job_format = self.get_job_spec(job_id)
if job_spec:
# Determine file extension based on format
ext = 'hcl' if job_format == 'hcl' else 'json'
filename = f"{self.sanitize_filename(job_id)}.{ext}"
filepath = backup_path / filename
try:
with open(filepath, 'w') as f:
if job_format == 'hcl':
# Write HCL as plain text
f.write(job_spec)
else:
# Write JSON with formatting
json.dump(job_spec, f, indent=2)
print(f" ✓ Saved to {filename}")
success_count += 1
except IOError as e:
print(f" ✗ Failed to write file: {e}")
failed_jobs.append(job_id)
else:
print(f" ✗ Failed to retrieve job spec")
failed_jobs.append(job_id)
# Summary
print("\n" + "="*50)
print(f"Backup complete!")
print(f"Successfully backed up: {success_count}/{len(parent_jobs)} jobs")
print(f"Backup location: {backup_path.absolute()}")
if failed_jobs:
print(f"\nFailed jobs ({len(failed_jobs)}):")
for job_id in failed_jobs:
print(f" - {job_id}")
return 1
return 0
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Backup HashiCorp Nomad job specifications',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Backup jobs from default local Nomad
python nomad_backup.py
# Backup from remote Nomad cluster
python nomad_backup.py --addr https://nomad.example.com:4646
# Backup with ACL token
python nomad_backup.py --token your-nomad-token
# Custom output directory
python nomad_backup.py --output /backups/nomad
"""
)
parser.add_argument(
'--addr',
default=os.environ.get('NOMAD_ADDR', 'http://localhost:4646'),
help='Nomad API address (default: $NOMAD_ADDR or http://localhost:4646)'
)
parser.add_argument(
'--token',
default=os.environ.get('NOMAD_TOKEN'),
help='Nomad ACL token (default: $NOMAD_TOKEN)'
)
parser.add_argument(
'--output', '-o',
default='nomad_backup',
help='Output directory for backups (default: nomad_backup)'
)
args = parser.parse_args()
# Create backup client and run backup
backup = NomadBackup(nomad_addr=args.addr, token=args.token)
exit_code = backup.backup_jobs(output_dir=args.output)
sys.exit(exit_code)
if __name__ == '__main__':
main()