Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added setup for production observability (metrics via OTEL) #1924

Merged
merged 12 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,6 @@ CLIENT_SECRET_GITHUB_LOGIN=

CLIENT_ID_GITLAB_LOGIN=
CLIENT_SECRET_GITLAB_LOGIN=

OTEL_COLLECTOR_OTLP_URL=
OTEL_TELEMETRY_COLLECTION_ENABLED=
sheensantoscapadngan marked this conversation as resolved.
Show resolved Hide resolved
1,706 changes: 1,673 additions & 33 deletions backend/package-lock.json

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@
"@node-saml/passport-saml": "^4.0.4",
"@octokit/rest": "^20.0.2",
"@octokit/webhooks-types": "^7.3.1",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/auto-instrumentations-node": "^0.46.1",
"@opentelemetry/exporter-metrics-otlp-proto": "^0.51.1",
"@opentelemetry/exporter-prometheus": "^0.51.1",
"@opentelemetry/instrumentation": "^0.51.1",
"@opentelemetry/resources": "^1.24.1",
"@opentelemetry/sdk-metrics": "^1.24.1",
"@opentelemetry/semantic-conventions": "^1.24.1",
"@serdnam/pino-cloudwatch-transport": "^1.0.4",
"@sindresorhus/slugify": "^2.2.1",
"@ucast/mongo2js": "^1.3.4",
Expand Down
4 changes: 3 additions & 1 deletion backend/src/lib/config/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ const envSchema = z
.transform((val) => val === "true")
.optional(),
INFISICAL_CLOUD: zodStrBool.default("false"),
MAINTENANCE_MODE: zodStrBool.default("false")
MAINTENANCE_MODE: zodStrBool.default("false"),
OTEL_TELEMETRY_COLLECTION_ENABLED: zodStrBool.default("false"),
OTEL_COLLECTOR_OTLP_URL: zpStr(z.string().optional())
})
.transform((data) => ({
...data,
Expand Down
46 changes: 46 additions & 0 deletions backend/src/lib/telemetry/instrumentation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import opentelemetry from "@opentelemetry/api";
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
import { PrometheusExporter } from "@opentelemetry/exporter-prometheus";
import { registerInstrumentations } from "@opentelemetry/instrumentation";
import { Resource } from "@opentelemetry/resources";
import { AggregationTemporality, MeterProvider, PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";

export const initTelemetry = async ({ otlpURL }: { otlpURL?: string }) => {
const resource = Resource.default().merge(
new Resource({
[SEMRESATTRS_SERVICE_NAME]: "infisical-server",
[SEMRESATTRS_SERVICE_VERSION]: "0.1.0"
})
);

const metricReaders = [];
if (otlpURL) {
const otlpExporter = new OTLPMetricExporter({
url: `${otlpURL}/v1/metrics`,
maidul98 marked this conversation as resolved.
Show resolved Hide resolved
temporalityPreference: AggregationTemporality.DELTA
});

metricReaders.push(
new PeriodicExportingMetricReader({
exporter: otlpExporter,
exportIntervalMillis: 30000
})
);
} else {
const promExporter = new PrometheusExporter();
metricReaders.push(promExporter);
}

const meterProvider = new MeterProvider({
resource,
readers: metricReaders
});

opentelemetry.metrics.setGlobalMeterProvider(meterProvider);

registerInstrumentations({
instrumentations: [getNodeAutoInstrumentations()]
});
};
6 changes: 6 additions & 0 deletions backend/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { initDbConnection } from "./db";
import { keyStoreFactory } from "./keystore/keystore";
import { formatSmtpConfig, initEnvConfig } from "./lib/config/env";
import { initLogger } from "./lib/logger";
import { initTelemetry } from "./lib/telemetry/instrumentation";
import { queueServiceFactory } from "./queue";
import { main } from "./server/app";
import { bootstrapCheck } from "./server/boot-strap-check";
Expand All @@ -13,6 +14,11 @@ dotenv.config();
const run = async () => {
const logger = await initLogger();
const appCfg = initEnvConfig(logger);

if (appCfg.OTEL_TELEMETRY_COLLECTION_ENABLED) {
await initTelemetry({ otlpURL: appCfg.OTEL_COLLECTOR_OTLP_URL });
sheensantoscapadngan marked this conversation as resolved.
Show resolved Hide resolved
}

const db = initDbConnection({
dbConnectionUri: appCfg.DB_CONNECTION_URI,
dbRootCert: appCfg.DB_ROOT_CERT
Expand Down
5 changes: 5 additions & 0 deletions backend/src/server/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { TQueueServiceFactory } from "@app/queue";
import { TSmtpService } from "@app/services/smtp/smtp-service";

import { globalRateLimiterCfg } from "./config/rateLimiter";
import { apiMetrics } from "./plugins/api-metrics";
import { fastifyErrHandler } from "./plugins/error-handler";
import { registerExternalNextjs } from "./plugins/external-nextjs";
import { serializerCompiler, validatorCompiler, ZodTypeProvider } from "./plugins/fastify-zod";
Expand Down Expand Up @@ -63,6 +64,10 @@ export const main = async ({ db, smtp, logger, queue, keyStore }: TMain) => {
// pull ip based on various proxy headers
await server.register(fastifyIp);

if (appCfg.OTEL_TELEMETRY_COLLECTION_ENABLED) {
await server.register(apiMetrics);
}

await server.register(fastifySwagger);
await server.register(fastifyFormBody);
await server.register(fastifyErrHandler);
Expand Down
21 changes: 21 additions & 0 deletions backend/src/server/plugins/api-metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import opentelemetry from "@opentelemetry/api";
import fp from "fastify-plugin";

export const apiMetrics = fp(async (fastify) => {
const apiMeter = opentelemetry.metrics.getMeter("API");
const latencyHistogram = apiMeter.createHistogram("API latency", {
sheensantoscapadngan marked this conversation as resolved.
Show resolved Hide resolved
unit: "ms"
});

fastify.addHook("onResponse", async (request, reply) => {
const { method } = request;
const route = request.routerPath;
const { statusCode } = reply;

latencyHistogram.record(reply.elapsedTime, {
route,
method,
statusCode
});
});
});
201 changes: 201 additions & 0 deletions docker-compose.dev.observable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
version: "3.9"

services:
nginx:
container_name: infisical-dev-nginx
image: nginx
restart: always
ports:
- 8080:80
volumes:
- ./nginx/default.dev.conf:/etc/nginx/conf.d/default.conf:ro
depends_on:
- backend
- frontend

db:
image: postgres:14-alpine
ports:
- "5432:5432"
volumes:
- postgres-data:/var/lib/postgresql/data
environment:
POSTGRES_PASSWORD: infisical
POSTGRES_USER: infisical
POSTGRES_DB: infisical

redis:
image: redis
container_name: infisical-dev-redis
environment:
- ALLOW_EMPTY_PASSWORD=yes
ports:
- 6379:6379
volumes:
- redis_data:/data

redis-commander:
container_name: infisical-dev-redis-commander
image: rediscommander/redis-commander
restart: always
depends_on:
- redis
environment:
- REDIS_HOSTS=local:redis:6379
ports:
- "8085:8081"

db-test:
profiles: ["test"]
image: postgres:14-alpine
ports:
- "5430:5432"
environment:
POSTGRES_PASSWORD: infisical
POSTGRES_USER: infisical
POSTGRES_DB: infisical-test

db-migration:
container_name: infisical-db-migration
depends_on:
- db
build:
context: ./backend
dockerfile: Dockerfile.dev
env_file: .env
environment:
- DB_CONNECTION_URI=postgres://infisical:infisical@db/infisical?sslmode=disable
command: npm run migration:latest
volumes:
- ./backend/src:/app/src

backend:
container_name: infisical-dev-api
build:
context: ./backend
dockerfile: Dockerfile.dev
depends_on:
db:
condition: service_started
redis:
condition: service_started
db-migration:
condition: service_completed_successfully
env_file:
- .env
ports:
- 4000:4000
- 9464:9464 # for OTEL collection of Prometheus metrics
sheensantoscapadngan marked this conversation as resolved.
Show resolved Hide resolved
environment:
- NODE_ENV=development
- DB_CONNECTION_URI=postgres://infisical:infisical@db/infisical?sslmode=disable
- TELEMETRY_ENABLED=false
volumes:
- ./backend/src:/app/src
extra_hosts:
- "host.docker.internal:host-gateway"

frontend:
container_name: infisical-dev-frontend
restart: unless-stopped
depends_on:
- backend
build:
context: ./frontend
dockerfile: Dockerfile.dev
volumes:
- ./frontend/src:/app/src/ # mounted whole src to avoid missing reload on new files
- ./frontend/public:/app/public
env_file: .env
environment:
- NEXT_PUBLIC_ENV=development
- INFISICAL_TELEMETRY_ENABLED=false

pgadmin:
image: dpage/pgadmin4
restart: always
environment:
PGADMIN_DEFAULT_EMAIL: [email protected]
PGADMIN_DEFAULT_PASSWORD: pass
ports:
- 5050:80
depends_on:
- db

smtp-server:
container_name: infisical-dev-smtp-server
image: lytrax/mailhog:latest # https://github.com/mailhog/MailHog/issues/353#issuecomment-821137362
restart: always
logging:
driver: "none" # disable saving logs
ports:
- 1025:1025 # SMTP server
- 8025:8025 # Web UI

openldap: # note: more advanced configuration is available
image: osixia/openldap:1.5.0
restart: always
environment:
LDAP_ORGANISATION: Acme
LDAP_DOMAIN: acme.com
LDAP_ADMIN_PASSWORD: admin
ports:
- 389:389
- 636:636
volumes:
- ldap_data:/var/lib/ldap
- ldap_config:/etc/ldap/slapd.d
profiles: [ldap]

phpldapadmin: # username: cn=admin,dc=acme,dc=com, pass is admin
image: osixia/phpldapadmin:latest
restart: always
environment:
- PHPLDAPADMIN_LDAP_HOSTS=openldap
- PHPLDAPADMIN_HTTPS=false
ports:
- 6433:80
depends_on:
- openldap
profiles: [ldap]

otel-collector:
image: otel/opentelemetry-collector-contrib
volumes:
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- 1888:1888 # pprof extension
- 8888:8888 # Prometheus metrics exposed by the Collector
- 8889:8889 # Prometheus exporter metrics
- 13133:13133 # health_check extension
- 4317:4317 # OTLP gRPC receiver
- 4318:4318 # OTLP http receiver
- 55679:55679 # zpages extension

prometheus:
image: prom/prometheus
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"

grafana:
image: grafana/grafana
container_name: grafana
restart: unless-stopped
environment:
- GF_LOG_LEVEL=debug
ports:
- "3000:3000"
volumes:
- "grafana_storage:/var/lib/grafana"
volumes:
postgres-data:
driver: local
redis_data:
driver: local
ldap_data:
ldap_config:
grafana_storage:
35 changes: 35 additions & 0 deletions otel-collector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318
prometheus:
config:
scrape_configs:
- job_name: otel-collector
scrape_interval: 30s
static_configs:
- targets: [backend:9464]
metric_relabel_configs:
- action: labeldrop
regex: "service_instance_id|service_name"
processors:
batch:

exporters:
prometheus:
endpoint: "0.0.0.0:8889"
resource_to_telemetry_conversion:
enabled: true
extensions:
health_check:
pprof:
zpages:

service:
extensions: [health_check, pprof, zpages]
pipelines:
metrics:
receivers: [otlp, prometheus]
processors: [batch]
exporters: [prometheus]
5 changes: 5 additions & 0 deletions prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
scrape_configs:
- job_name: "otel-collector"
scrape_interval: 30s
static_configs:
- targets: ["otel-collector:8889"]
Loading