244 lines
8.5 KiB
Python
244 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Backup HashiCorp Nomad job specifications.
|
|
Compatible with Nomad 1.10.2
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
class NomadBackup:
|
|
def __init__(self, nomad_addr: str = "http://localhost:4646", token: Optional[str] = None):
|
|
"""
|
|
Initialize Nomad backup client.
|
|
|
|
Args:
|
|
nomad_addr: Nomad API address (default: http://localhost:4646)
|
|
token: Nomad ACL token if authentication is enabled
|
|
"""
|
|
self.nomad_addr = nomad_addr.rstrip('/')
|
|
self.headers = {}
|
|
if token:
|
|
self.headers['X-Nomad-Token'] = token
|
|
|
|
def get_jobs(self) -> List[Dict]:
|
|
"""Retrieve list of all jobs from Nomad."""
|
|
url = f"{self.nomad_addr}/v1/jobs"
|
|
try:
|
|
resp = requests.get(url, headers=self.headers)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error retrieving jobs: {e}")
|
|
sys.exit(1)
|
|
|
|
def get_job_version(self, job_id: str) -> Optional[int]:
|
|
"""Get the current version of a job."""
|
|
url = f"{self.nomad_addr}/v1/job/{job_id}"
|
|
try:
|
|
resp = requests.get(url, headers=self.headers)
|
|
resp.raise_for_status()
|
|
job_info = resp.json()
|
|
return job_info.get('Version')
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error retrieving job version for {job_id}: {e}")
|
|
return None
|
|
|
|
def get_job_spec(self, job_id: str) -> tuple:
|
|
"""
|
|
Retrieve original job submission (what was actually defined).
|
|
|
|
Args:
|
|
job_id: The job ID to retrieve
|
|
|
|
Returns:
|
|
Tuple of (job specification, format) where format is 'hcl' or 'json'
|
|
"""
|
|
# First get the current version
|
|
version = self.get_job_version(job_id)
|
|
if version is None:
|
|
return None, None
|
|
|
|
# Use submission endpoint to get original jobspec without defaults
|
|
url = f"{self.nomad_addr}/v1/job/{job_id}/submission"
|
|
params = {'version': version}
|
|
|
|
try:
|
|
resp = requests.get(url, headers=self.headers, params=params)
|
|
resp.raise_for_status()
|
|
submission = resp.json()
|
|
|
|
# Check the Format field to determine if it was HCL or JSON
|
|
job_format = submission.get('Format', 'json').lower()
|
|
|
|
# The submission endpoint returns metadata wrapper
|
|
# Extract the actual job spec from the Source field
|
|
if 'Source' in submission and submission['Source']:
|
|
source = submission['Source']
|
|
|
|
if job_format in ['hcl', 'hcl2']:
|
|
# Return HCL as-is (it's already a string with the HCL content)
|
|
return source, 'hcl'
|
|
else:
|
|
# Try to parse as JSON
|
|
try:
|
|
return json.loads(source), 'json'
|
|
except json.JSONDecodeError:
|
|
print(f" Warning: Could not parse Source as JSON for {job_id}, using Job field instead")
|
|
if 'Job' in submission:
|
|
return submission['Job'], 'json'
|
|
return submission, 'json'
|
|
elif 'Job' in submission:
|
|
# Some submissions may have Job field directly (always JSON)
|
|
return submission['Job'], 'json'
|
|
else:
|
|
print(f" Warning: No Source or Job found in submission for {job_id}, returning full submission")
|
|
return submission, 'json'
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error retrieving job {job_id}: {e}")
|
|
return None, None
|
|
|
|
def sanitize_filename(self, job_id: str) -> str:
|
|
"""Convert job ID to safe filename."""
|
|
# Replace slashes and other problematic characters
|
|
return job_id.replace('/', '_').replace('\\', '_').replace(':', '_')
|
|
|
|
def backup_jobs(self, output_dir: str = "nomad_backup"):
|
|
"""
|
|
Backup all Nomad jobs to individual JSON/HCL files.
|
|
|
|
Args:
|
|
output_dir: Directory to save backup files
|
|
"""
|
|
# Create output directory (without timestamp for Git version control)
|
|
backup_path = Path(output_dir)
|
|
backup_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"Backing up Nomad jobs to: {backup_path}")
|
|
|
|
# Get all jobs
|
|
jobs = self.get_jobs()
|
|
print(f"Found {len(jobs)} jobs")
|
|
|
|
# Filter out periodic child jobs (they don't have submissions)
|
|
# Periodic children have IDs like "parent/periodic-timestamp"
|
|
parent_jobs = []
|
|
for job in jobs:
|
|
job_id = job['ID']
|
|
# Skip if this looks like a periodic child job
|
|
if '/periodic-' in job_id or job.get('ParentID'):
|
|
print(f"Skipping periodic child job: {job_id}")
|
|
continue
|
|
parent_jobs.append(job)
|
|
|
|
print(f"Backing up {len(parent_jobs)} parent jobs (excluding periodic children)")
|
|
|
|
success_count = 0
|
|
failed_jobs = []
|
|
|
|
# Backup each job
|
|
for job in parent_jobs:
|
|
job_id = job['ID']
|
|
job_name = job.get('Name', job_id)
|
|
|
|
print(f"Backing up job: {job_name} ({job_id})")
|
|
|
|
# Get job specification
|
|
job_spec, job_format = self.get_job_spec(job_id)
|
|
|
|
if job_spec:
|
|
# Determine file extension based on format
|
|
ext = 'hcl' if job_format == 'hcl' else 'json'
|
|
filename = f"{self.sanitize_filename(job_id)}.{ext}"
|
|
filepath = backup_path / filename
|
|
|
|
try:
|
|
with open(filepath, 'w') as f:
|
|
if job_format == 'hcl':
|
|
# Write HCL as plain text
|
|
f.write(job_spec)
|
|
else:
|
|
# Write JSON with formatting
|
|
json.dump(job_spec, f, indent=2)
|
|
print(f" ✓ Saved to {filename}")
|
|
success_count += 1
|
|
except IOError as e:
|
|
print(f" ✗ Failed to write file: {e}")
|
|
failed_jobs.append(job_id)
|
|
else:
|
|
print(f" ✗ Failed to retrieve job spec")
|
|
failed_jobs.append(job_id)
|
|
|
|
# Summary
|
|
print("\n" + "="*50)
|
|
print(f"Backup complete!")
|
|
print(f"Successfully backed up: {success_count}/{len(parent_jobs)} jobs")
|
|
print(f"Backup location: {backup_path.absolute()}")
|
|
|
|
if failed_jobs:
|
|
print(f"\nFailed jobs ({len(failed_jobs)}):")
|
|
for job_id in failed_jobs:
|
|
print(f" - {job_id}")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Backup HashiCorp Nomad job specifications',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Backup jobs from default local Nomad
|
|
python nomad_backup.py
|
|
|
|
# Backup from remote Nomad cluster
|
|
python nomad_backup.py --addr https://nomad.example.com:4646
|
|
|
|
# Backup with ACL token
|
|
python nomad_backup.py --token your-nomad-token
|
|
|
|
# Custom output directory
|
|
python nomad_backup.py --output /backups/nomad
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--addr',
|
|
default=os.environ.get('NOMAD_ADDR', 'http://localhost:4646'),
|
|
help='Nomad API address (default: $NOMAD_ADDR or http://localhost:4646)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--token',
|
|
default=os.environ.get('NOMAD_TOKEN'),
|
|
help='Nomad ACL token (default: $NOMAD_TOKEN)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output', '-o',
|
|
default='nomad_backup',
|
|
help='Output directory for backups (default: nomad_backup)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Create backup client and run backup
|
|
backup = NomadBackup(nomad_addr=args.addr, token=args.token)
|
|
exit_code = backup.backup_jobs(output_dir=args.output)
|
|
|
|
sys.exit(exit_code)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |