This blog post is an excellent point of reference on how you can approach the logging aggregation problem while on Nomad.
As I’m using Consul, I followed a slightly different approach. I have decided to deploy promtail as a global service on
each node and scrape the /var/nomad/alloc/*logs/*
directory. Additionally, I slurp systemd-journal to debug potential
system services issues as well.
I’m not fully convinced that this is the right way. Still, it seems to do the job - even if the given service doesn’t exist on the particular host that promtail is running on - nothing terrible happens - allocation id directory won’t exist on that host, so we will never scrape anything. The most significant upside here is that scrapings logs from new/existing services require adding a service tag - no need to configure extra sidecar service for each job. So it’s a really continent solution.
Ultimately, I push everything to Grafana Cloud. It offers a free tier with 50GB of log storage (14 days retention), a great deal for hobby usage.
Here is the final Nomad job:
locals {
image_tag = "grafana/promtail:2.5.0"
}
job "promtail" {
datacenters = ["dc1"]
type = "system"
# I'm going to use vault to read some credentials below
vault {
policies = ["nomad-read"]
}
group "promtail" {
count = 1
network {
port "http" {
static = 3200
# I'm using internal network to expose ports
host_network = "private"
}
}
restart {
attempts = 3
delay = "20s"
mode = "delay"
}
task "promtail" {
driver = "docker"
logs {
max_files = 3
max_file_size = 5
}
env {
HOSTNAME = "${attr.unique.hostname}"
}
template {
data = <<EOTC
positions:
filename: /data/positions.yaml
server:
log_level: info
clients:
# Grab your token from: Security -> Api Keys -> MetricsPublisher token
# Url and username can be found in Your stack -> Loki -> Details
- url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push
basic_auth:
{{- with secret "kv-v1/nomad/loki" }}
username: {{ .Data.username }}
password: {{ .Data.password }}
{{ end }}
scrape_configs:
- job_name: journal
journal:
json: false
max_age: 12h
path: /var/log/journal
labels:
job: systemd-journal
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ['__journal__hostname']
target_label: 'nodename'
- source_labels: ['__journal_syslog_identifier']
target_label: 'syslog_identifier'
- job_name: 'nomad-logs'
consul_sd_configs:
- server: '{{ env "NOMAD_IP_http" }}:8500'
# I'm using Consul's ACL thus I need token
token: {{ with secret "kv-v1/nomad/prometheus/consul" }}{{ .Data.key }}{{ end }}
relabel_configs:
# If there is a nomad service registered in consul with 'logs.promtail=true' tag - grab it
- source_labels: [__meta_consul_tags]
regex: '(.*)logs\.promtail=true(.*)'
action: keep
- source_labels: [__meta_consul_node]
target_label: __host__
- source_labels: [__meta_consul_service_metadata_external_source]
target_label: source
regex: (.*)
replacement: '$1'
- source_labels: [__meta_consul_service_id]
regex: '_nomad-task-([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})-.*'
target_label: 'task_id'
replacement: '$1'
- source_labels: [__meta_consul_service]
target_label: job
- source_labels: ['__meta_consul_node']
regex: '(.*)'
target_label: 'instance'
replacement: '$1'
- source_labels: [__meta_consul_service_id]
regex: '_nomad-task-([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})-.*'
target_label: '__path__'
replacement: '/nomad/$1/alloc/logs/*std*.{?,??}'
EOTC
destination = "/local/promtail.yml"
}
config {
image = local.image_tag
ports = ["http"]
args = [
"-config.file=/local/promtail.yml",
"-server.http-listen-port=${NOMAD_PORT_http}",
]
volumes = [
# keep promtail's positions file on the host
"/data/promtail:/data",
# and read journal & nomad logs
"/var/nomad/alloc:/nomad/:ro",
"/var/log/journal/:/var/log/journal/:ro"
]
}
resources {
cpu = 100
memory = 50
memory_max = 100
}
service {
name = "promtail"
port = "http"
tags = ["monitoring"]
check {
name = "Promtail HTTP"
type = "http"
path = "/targets"
interval = "5s"
timeout = "2s"
check_restart {
limit = 2
grace = "60s"
ignore_warnings = false
}
}
}
}
}
}
Once you have that up & running you can register any nomad service with logs.promtail=true
tag and it will be picked up by the promtail.