Files
NomadBackup/nomad_backup/prometheus.hcl
2025-11-21 21:10:40 +00:00

196 lines
4.0 KiB
HCL

job "prometheus" {
# region = "global"
datacenters = ["dc1"]
type = "service"
# priority = 50
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "regexp"
value = "odroid.*"
}
# constraint {
# attribute = "${attr.unique.hostname}"
# operator = "regexp"
# value = "pi.*"
# }
update {
# Stagger updates every 60 seconds
stagger = "60s"
max_parallel = 1
}
group "prometheus" {
count = 2
restart {
attempts = 2
interval = "1m"
delay = "10s"
mode = "delay"
}
task "prometheus" {
driver = "docker"
config {
image = "prom/prometheus"
args = ["--web.enable-admin-api","--config.file=/etc/prometheus/prometheus.yml"]
ports = ["http"]
memory_hard_limit = "512"
volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
"local/alerts.yml:/etc/prometheus/alerts.yml",
]
// "/mnt/gv0/prom_data:/data",
// "/etc/localtime:/etc/localtime",
logging {
type = "json-file"
}
}
service {
name = "${TASKGROUP}"
tags = ["global", "prometheus"]
port = "http"
check {
name = "alive"
type = "http"
interval = "10s"
timeout = "120s"
path = "/status"
port = "http"
}
}
template {
change_mode = "signal"
change_signal = "SIGHUP"
data = "{{ key \"prometheus_yml\" }}"
destination = "local/prometheus.yml"
}
template {
change_mode = "restart"
destination = "local/alerts.yml"
data = "{{ key \"alerts\" }}"
}
resources {
cpu = 500 # 500 MHz
memory = 48 # 128MB
}
# Specify configuration related to log rotation
logs {
max_files = 10
max_file_size = 15
}
kill_timeout = "10s"
} ## end prometheus
task "alertmanager" {
driver = "docker"
config {
image = "prom/alertmanager"
ports = ["alerthttp"]
// volumes = [
// "local/alertmanager.yml:/etc/prometheus/prometheus.yml",
// ]
args = ["--config.file=/local/alertmanager.yml"]
// "/mnt/gv0/prom_data:/data",
// "/etc/localtime:/etc/localtime",
logging {
type = "json-file"
}
}
service {
name = "${TASK}"
tags = ["global", "prometheus"]
port = "alerthttp"
check {
name = "alive"
type = "http"
interval = "60s"
timeout = "120s"
path = "/status"
port = "http"
}
}
template {
data = <<EOH
global:
receivers:
- name: default-receiver
- name: gotify-webhook
webhook_configs:
- url: "http://prometheus.service.dc1.consul:9094/gotify_webhook"
route:
group_wait: 10s
group_interval: 5m
receiver: gotify-webhook
repeat_interval: 3h
EOH
destination = "local/alertmanager.yml"
}
resources {
cpu = 128 # 500 MHz
memory = 48 # 128MB
}
# Specify configuration related to log rotation
logs {
max_files = 10
max_file_size = 15
}
kill_timeout = "10s"
} ## end alertmanager
task "gotifybridge" {
driver = "docker"
config {
image = "ghcr.io/druggeri/alertmanager_gotify_bridge"
ports = ["gotifybridge"]
args = ["--debug"]
}
env {
GOTIFY_TOKEN="AQ7l7NVgtylam86"
GOTIFY_ENDPOINT="http://gotify.service.dc1.consul/message"
}
}
network {
port "http" {
static = 9090
to = 9090
}
port "alerthttp" {
static = 9093
to = 9093
}
port "gotifybridge" {
static = 9094
to = 8080
}
}
}
}