NomadBackup/backup.py

#!/usr/bin/env python3
"""
Backup HashiCorp Nomad job specifications.
Compatible with Nomad 1.10.2
"""

import requests
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional


class NomadBackup:
    def __init__(self, nomad_addr: str = "http://localhost:4646", token: Optional[str] = None):
        """
        Initialize Nomad backup client.

        Args:
            nomad_addr: Nomad API address (default: http://localhost:4646)
            token: Nomad ACL token if authentication is enabled
        """
        self.nomad_addr = nomad_addr.rstrip('/')
        self.headers = {}
        if token:
            self.headers['X-Nomad-Token'] = token

    def get_jobs(self) -> List[Dict]:
        """Retrieve list of all jobs from Nomad."""
        url = f"{self.nomad_addr}/v1/jobs"
        try:
            resp = requests.get(url, headers=self.headers)
            resp.raise_for_status()
            return resp.json()
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving jobs: {e}")
            sys.exit(1)

    def get_job_version(self, job_id: str) -> Optional[int]:
        """Get the current version of a job."""
        url = f"{self.nomad_addr}/v1/job/{job_id}"
        try:
            resp = requests.get(url, headers=self.headers)
            resp.raise_for_status()
            job_info = resp.json()
            return job_info.get('Version')
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving job version for {job_id}: {e}")
            return None

    def get_job_spec(self, job_id: str) -> tuple:
        """
        Retrieve original job submission (what was actually defined).

        Args:
            job_id: The job ID to retrieve

        Returns:
            Tuple of (job specification, format) where format is 'hcl' or 'json'
        """
        # First get the current version
        version = self.get_job_version(job_id)
        if version is None:
            return None, None

        # Use submission endpoint to get original jobspec without defaults
        url = f"{self.nomad_addr}/v1/job/{job_id}/submission"
        params = {'version': version}

        try:
            resp = requests.get(url, headers=self.headers, params=params)
            resp.raise_for_status()
            submission = resp.json()

            # Check the Format field to determine if it was HCL or JSON
            job_format = submission.get('Format', 'json').lower()

            # The submission endpoint returns metadata wrapper
            # Extract the actual job spec from the Source field
            if 'Source' in submission and submission['Source']:
                source = submission['Source']

                if job_format in ['hcl', 'hcl2']:
                    # Return HCL as-is (it's already a string with the HCL content)
                    return source, 'hcl'
                else:
                    # Try to parse as JSON
                    try:
                        return json.loads(source), 'json'
                    except json.JSONDecodeError:
                        print(f"  Warning: Could not parse Source as JSON for {job_id}, using Job field instead")
                        if 'Job' in submission:
                            return submission['Job'], 'json'
                        return submission, 'json'
            elif 'Job' in submission:
                # Some submissions may have Job field directly (always JSON)
                return submission['Job'], 'json'
            else:
                print(f"  Warning: No Source or Job found in submission for {job_id}, returning full submission")
                return submission, 'json'
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving job {job_id}: {e}")
            return None, None

    def sanitize_filename(self, job_id: str) -> str:
        """Convert job ID to safe filename."""
        # Replace slashes and other problematic characters
        return job_id.replace('/', '_').replace('\\', '_').replace(':', '_')

    def backup_jobs(self, output_dir: str = "nomad_backup"):
        """
        Backup all Nomad jobs to individual JSON/HCL files.

        Args:
            output_dir: Directory to save backup files
        """
        # Create output directory (without timestamp for Git version control)
        backup_path = Path(output_dir)
        backup_path.mkdir(parents=True, exist_ok=True)

        print(f"Backing up Nomad jobs to: {backup_path}")

        # Get all jobs
        jobs = self.get_jobs()
        print(f"Found {len(jobs)} jobs")

        # Filter out periodic child jobs (they don't have submissions)
        # Periodic children have IDs like "parent/periodic-timestamp"
        parent_jobs = []
        for job in jobs:
            job_id = job['ID']
            # Skip if this looks like a periodic child job
            if '/periodic-' in job_id or job.get('ParentID'):
                print(f"Skipping periodic child job: {job_id}")
                continue
            parent_jobs.append(job)

        print(f"Backing up {len(parent_jobs)} parent jobs (excluding periodic children)")

        success_count = 0
        failed_jobs = []

        # Backup each job
        for job in parent_jobs:
            job_id = job['ID']
            job_name = job.get('Name', job_id)

            print(f"Backing up job: {job_name} ({job_id})")

            # Get job specification
            job_spec, job_format = self.get_job_spec(job_id)

            if job_spec:
                # Determine file extension based on format
                ext = 'hcl' if job_format == 'hcl' else 'json'
                filename = f"{self.sanitize_filename(job_id)}.{ext}"
                filepath = backup_path / filename

                try:
                    with open(filepath, 'w') as f:
                        if job_format == 'hcl':
                            # Write HCL as plain text
                            f.write(job_spec)
                        else:
                            # Write JSON with formatting
                            json.dump(job_spec, f, indent=2)
                    print(f"  ✓ Saved to {filename}")
                    success_count += 1
                except IOError as e:
                    print(f"  ✗ Failed to write file: {e}")
                    failed_jobs.append(job_id)
            else:
                print(f"  ✗ Failed to retrieve job spec")
                failed_jobs.append(job_id)

        # Summary
        print("\n" + "="*50)
        print(f"Backup complete!")
        print(f"Successfully backed up: {success_count}/{len(parent_jobs)} jobs")
        print(f"Backup location: {backup_path.absolute()}")

        if failed_jobs:
            print(f"\nFailed jobs ({len(failed_jobs)}):")
            for job_id in failed_jobs:
                print(f"  - {job_id}")
            return 1

        return 0


def main():
    """Main entry point."""
    import argparse

    parser = argparse.ArgumentParser(
        description='Backup HashiCorp Nomad job specifications',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Backup jobs from default local Nomad
  python nomad_backup.py

  # Backup from remote Nomad cluster
  python nomad_backup.py --addr https://nomad.example.com:4646

  # Backup with ACL token
  python nomad_backup.py --token your-nomad-token

  # Custom output directory
  python nomad_backup.py --output /backups/nomad
        """
    )

    parser.add_argument(
        '--addr',
        default=os.environ.get('NOMAD_ADDR', 'http://localhost:4646'),
        help='Nomad API address (default: $NOMAD_ADDR or http://localhost:4646)'
    )

    parser.add_argument(
        '--token',
        default=os.environ.get('NOMAD_TOKEN'),
        help='Nomad ACL token (default: $NOMAD_TOKEN)'
    )

    parser.add_argument(
        '--output', '-o',
        default='nomad_backup',
        help='Output directory for backups (default: nomad_backup)'
    )

    args = parser.parse_args()

    # Create backup client and run backup
    backup = NomadBackup(nomad_addr=args.addr, token=args.token)
    exit_code = backup.backup_jobs(output_dir=args.output)

    sys.exit(exit_code)


if __name__ == '__main__':
    main()