dialogue-core/src/main/metrics/dialogue-core-metrics.yml

options:
  javaPackage: com.palantir.dialogue.core
  javaVisibility: packagePrivate
namespaces:
  client:
    docs: General client metrics produced by dialogue. These metrics are meant to be applicable to all conjure clients
          without being implementation-specific.
    metrics:
      response:
        type: timer
        tags: [channel-name, service-name, endpoint, status]
        docs: |
          Request time split by status and endpoint. Possible status values are:
          * success: 2xx requests, always excludes time spent reading the response body.
          * failure:
            - QoS failures (429, 308, 503)
            - 500 requests
            - IOExceptions
      deprecations:
        type: meter
        tags: [service-name]
        docs: Rate of deprecated endpoints being invoked.

  dialogue.client:
    docs: Dialogue-specific metrics that are not necessarily applicable to other client implementations.
    metrics:
      response.leak:
        type: meter
        tags: [client-name, service-name, endpoint]
        docs: Rate that responses are garbage collected without being closed. This should only occur in the case of a programming error.
      request.retry:
        type: meter
        tags: [channel-name, reason]
        docs: Rate at which the RetryingChannel retries requests (across all endpoints).
      requests.queued:
        type: counter
        tags: [channel-name]
        docs: Number of queued requests waiting to execute.
      requests.endpoint.queued:
        type: counter
        tags: [channel-name, service-name, endpoint]
        docs: Number of queued requests waiting to execute for a specific endpoint due to server QoS.
      requests.sticky.queued:
        type: counter
        tags: [ channel-name ]
        docs: Number of sticky queued requests waiting to try to be executed.
      request.queued.time:
        type: timer
        tags: [channel-name]
        docs: Time spent waiting in the queue before execution.
      request.endpoint.queued.time:
        type: timer
        tags: [channel-name, service-name, endpoint]
        docs: Time spent waiting in the queue before execution on a specific endpoint due to server QoS.
      request.sticky.queued.time:
        type: timer
        tags: [ channel-name ]
        docs: Time spent waiting in the sticky queue before execution attempt.
      # Note: the 'dialogue.client.create' metric is also defined in the apache metrics.
      create:
        type: meter
        tags: [client-name, client-type]
        docs: Marked every time a new client is created.
      reload:
        type: meter
        tags: [ client-name, client-type ]
        docs: Marked every time a clients targets are reloaded, including initial creation.

  dialogue.concurrencylimiter:
    docs: Instrumentation for the ConcurrencyLimitedChannel
    metrics:
      max:
        type: gauge
        tags: [channel-name, hostIndex]
        docs: The maximum number of concurrent requests which are currently permitted. Additively increases with successes and multiplicatively decreases with failures.
      in-flight:
        type: gauge
        tags: [channel-name, hostIndex]
        docs: The number of concurrent requests which are currently running.

  dialogue.pinuntilerror:
    docs: Instrumentation for the PIN_UNTIL_ERROR node selection strategy.
    metrics:
      success:
        type: meter
        tags: [channel-name, hostIndex]
        docs: Meter of the requests that were successfully made, tagged by the index of the inner channel. (Note if there are >10 nodes this metric will not be recorded).
      nextNode:
        type: meter
        tags: [channel-name, reason]
        docs: Marked every time we switch to a new node, includes the reason why we switched (limited, responseCode, throwable).
      reshuffle:
        tags: [channel-name]
        type: meter
        docs: Marked every time we reshuffle all the nodes.

  dialogue.roundrobin:
    docs: Instrumentation for the ROUND_ROBIN node selection strategy (currently implemented by BalancedChannel).
    metrics:
      success:
        type: meter
        tags: [channel-name, hostIndex]
        docs: Meter of the requests that were successfully made, tagged by the index of the host. (Note if there are >10
          nodes this metric will not be recorded).

  dialogue.balanced:
    docs: Instrumentation for BalancedChannel internals.
    metrics:
      score:
        type: gauge
        tags: [channel-name, hostIndex]
        docs: The score that the BalancedChannel currently assigns to each host (computed based on inflight requests and
          recent failures). Requests are routed to the channel with the lowest score. (Note if there are >10 nodes this
          metric will not be recorded).

  dialogue.nodeselection:
    docs: Instrumentation for which node selection strategy is used
    metrics:
      strategy:
        type: meter
        tags: [channel-name, strategy]
        docs: Marked every time the node selection strategy changes