This blog post is an excellent point of reference on how you can approach the logging aggregation problem while on Nomad.

As I’m using Consul, I followed a slightly different approach. I have decided to deploy promtail as a global service on each node and scrape the /var/nomad/alloc/*logs/* directory. Additionally, I slurp systemd-journal to debug potential system services issues as well.

I’m not fully convinced that this is the right way. Still, it seems to do the job - even if the given service doesn’t exist on the particular host that promtail is running on - nothing terrible happens - allocation id directory won’t exist on that host, so we will never scrape anything. The most significant upside here is that scrapings logs from new/existing services require adding a service tag - no need to configure extra sidecar service for each job. So it’s a really continent solution.

Ultimately, I push everything to Grafana Cloud. It offers a free tier with 50GB of log storage (14 days retention), a great deal for hobby usage.

Here is the final Nomad job:

locals {
  image_tag = "grafana/promtail:2.5.0"
}

job "promtail" {
  datacenters = ["dc1"]
  type = "system"

  # I'm going to use vault to read some credentials below
  vault {
    policies = ["nomad-read"]
  }

  group "promtail" {
    count = 1

    network {
      port "http" {
        static = 3200
        # I'm using internal network to expose ports
        host_network = "private"
      }
    }

    restart {
      attempts = 3
      delay    = "20s"
      mode     = "delay"
    }

    task "promtail" {
      driver = "docker"

      logs {
        max_files     = 3
        max_file_size = 5
      }

      env {
        HOSTNAME = "${attr.unique.hostname}"
      }

      template {
data        = <<EOTC
positions:
  filename: /data/positions.yaml

server:
  log_level: info

clients:
  # Grab your token from: Security -> Api Keys -> MetricsPublisher token
  # Url and username can be found in Your stack -> Loki -> Details
  - url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push
    basic_auth:
      {{- with secret "kv-v1/nomad/loki" }}
      username: {{ .Data.username }}
      password: {{ .Data.password }}
      {{ end }}

scrape_configs:
- job_name: journal
  journal:
    json: false
    max_age: 12h
    path: /var/log/journal
    labels:
      job: systemd-journal
  relabel_configs:
  - source_labels: ['__journal__systemd_unit']
    target_label: 'unit'
  - source_labels: ['__journal__hostname']
    target_label: 'nodename'
  - source_labels: ['__journal_syslog_identifier']
    target_label: 'syslog_identifier'

- job_name: 'nomad-logs'
  consul_sd_configs:
    - server: '{{ env "NOMAD_IP_http" }}:8500'
      # I'm using Consul's ACL thus I need token
      token: {{ with secret "kv-v1/nomad/prometheus/consul" }}{{ .Data.key }}{{ end }}

  relabel_configs:
    # If there is a nomad service registered in consul with 'logs.promtail=true' tag - grab it
    - source_labels: [__meta_consul_tags]
      regex: '(.*)logs\.promtail=true(.*)'
      action: keep

    - source_labels: [__meta_consul_node]
      target_label: __host__

    - source_labels: [__meta_consul_service_metadata_external_source]
      target_label: source
      regex: (.*)
      replacement: '$1'

    - source_labels: [__meta_consul_service_id]
      regex: '_nomad-task-([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})-.*'
      target_label:  'task_id'
      replacement: '$1'

    - source_labels: [__meta_consul_service]
      target_label: job

    - source_labels: ['__meta_consul_node']
      regex:         '(.*)'
      target_label:  'instance'
      replacement:   '$1'

    - source_labels: [__meta_consul_service_id]
      regex: '_nomad-task-([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})-.*'
      target_label:  '__path__'

      replacement: '/nomad/$1/alloc/logs/*std*.{?,??}'
EOTC
        destination = "/local/promtail.yml"
      }

      config {
        image = local.image_tag
        ports = ["http"]
        args = [
          "-config.file=/local/promtail.yml",
          "-server.http-listen-port=${NOMAD_PORT_http}",
        ]
        volumes = [
          # keep promtail's positions file on the host
          "/data/promtail:/data",
          # and read journal & nomad logs
          "/var/nomad/alloc:/nomad/:ro",
          "/var/log/journal/:/var/log/journal/:ro"
        ]
      }

      resources {
        cpu    = 100
        memory = 50
        memory_max = 100
      }

      service {
        name = "promtail"
        port = "http"
        tags = ["monitoring"]

        check {
          name     = "Promtail HTTP"
          type     = "http"
          path     = "/targets"
          interval = "5s"
          timeout  = "2s"

          check_restart {
            limit           = 2
            grace           = "60s"
            ignore_warnings = false
          }
        }
      }
    }
  }
}

Once you have that up & running you can register any nomad service with logs.promtail=true tag and it will be picked up by the promtail.