Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion framework/configstore/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error {
return err
}
if err := migrationAddOutputCostPerVideoPerSecond(ctx, db); err != nil {

return err
}
if err := migrationDropEnableGovernanceColumn(ctx, db); err != nil {
Expand Down Expand Up @@ -338,6 +337,9 @@ func triggerMigrations(ctx context.Context, db *gorm.DB) error {
if err := migrationAddFlexTierPricingColumns(ctx, db); err != nil {
return err
}
if err := migrationNormalizeOtelTraceType(ctx, db); err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -5078,3 +5080,47 @@ func migrationAddWhitelistedRoutesJSONColumn(ctx context.Context, db *gorm.DB) e
}
return nil
}

// migrationNormalizeOtelTraceType rewrites the legacy OTEL plugin trace_type value "otel" to "genai_extension".
// No-op if the plugin row is missing or trace_type is already correct.
func migrationNormalizeOtelTraceType(ctx context.Context, db *gorm.DB) error {
m := migrator.New(db, migrator.DefaultOptions, []*migrator.Migration{{
ID: "normalize_otel_trace_type",
Migrate: func(tx *gorm.DB) error {
tx = tx.WithContext(ctx)

var plugin tables.TablePlugin
err := tx.Where("name = ?", "otel").First(&plugin).Error
if err != nil {
if err == gorm.ErrRecordNotFound {
return nil
}
return fmt.Errorf("failed to load otel plugin row: %w", err)
}

cfgMap, ok := plugin.Config.(map[string]any)
if !ok || len(cfgMap) == 0 {
return nil
}
if tt, _ := cfgMap["trace_type"].(string); tt != "otel" {
return nil
}

cfgMap["trace_type"] = "genai_extension"
plugin.Config = cfgMap
plugin.ConfigJSON = ""
plugin.EncryptionStatus = tables.EncryptionStatusPlainText

if err := tx.Save(&plugin).Error; err != nil {
return fmt.Errorf("failed to save normalized otel config: %w", err)
}
log.Printf("[Migration] Normalized otel trace_type 'otel' to 'genai_extension'")
return nil
},
Rollback: func(tx *gorm.DB) error { return nil },
}})
if err := m.Migrate(); err != nil {
return fmt.Errorf("error running normalize_otel_trace_type migration: %s", err.Error())
}
return nil
}
2 changes: 2 additions & 0 deletions plugins/otel/changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- fix: sets default for `insecure` to `true` for config.json
- fix: includes fallbacks in otel metrics
129 changes: 76 additions & 53 deletions plugins/otel/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/bytedance/sonic"
"github.com/maximhq/bifrost/core/schemas"
"github.com/maximhq/bifrost/framework/modelcatalog"
"go.opentelemetry.io/otel/attribute"
commonpb "go.opentelemetry.io/proto/otlp/common/v1"
)

Expand Down Expand Up @@ -50,14 +51,36 @@ type Config struct {
TraceType TraceType `json:"trace_type"`
Protocol Protocol `json:"protocol"`
TLSCACert string `json:"tls_ca_cert"`
Insecure bool `json:"insecure"` // Skip TLS when true; ignored if TLSCACert is set
Insecure bool `json:"insecure"` // Skip TLS when true; ignored if TLSCACert is set. Defaults to true when omitted.

// Metrics push configuration
MetricsEnabled bool `json:"metrics_enabled"`
MetricsEndpoint string `json:"metrics_endpoint"`
MetricsPushInterval int `json:"metrics_push_interval"` // in seconds, default 15
}

// UnmarshalJSON applies field defaults that the zero-value wouldn't capture.
// Specifically, Insecure defaults to true when the key is omitted so http://
// collectors work out-of-the-box without forcing users to set it explicitly.
func (c *Config) UnmarshalJSON(data []byte) error {
type alias Config
aux := struct {
Insecure *bool `json:"insecure"`
*alias
}{
alias: (*alias)(c),
}
if err := sonic.Unmarshal(data, &aux); err != nil {
return err
}
if aux.Insecure == nil {
c.Insecure = true
} else {
c.Insecure = *aux.Insecure
}
return nil
}
Comment thread
greptile-apps[bot] marked this conversation as resolved.

// OtelPlugin is the plugin for OpenTelemetry.
// It implements the ObservabilityPlugin interface to receive completed traces
// from the tracing middleware and forward them to an OTEL collector.
Expand Down Expand Up @@ -278,7 +301,6 @@ func (p *OtelPlugin) Inject(ctx context.Context, trace *schemas.Trace) error {
}

// Helper functions for type-safe attribute extraction from trace spans

func getStringAttr(attrs map[string]any, key string) string {
if attrs == nil {
return ""
Expand Down Expand Up @@ -319,76 +341,77 @@ func getFloat64Attr(attrs map[string]any, key string) float64 {
return 0
}

// buildSpanAttrs extracts metric dimension attrs from a single attempt span.
func buildSpanAttrs(span *schemas.Span) []attribute.KeyValue {
attrs := span.Attributes
method := getStringAttr(attrs, "request.type")
if method == "" {
method = span.Name
}
return BuildBifrostAttributes(
getStringAttr(attrs, schemas.AttrProviderName),
getStringAttr(attrs, schemas.AttrRequestModel),
method,
getStringAttr(attrs, schemas.AttrVirtualKeyID),
getStringAttr(attrs, schemas.AttrVirtualKeyName),
getStringAttr(attrs, schemas.AttrSelectedKeyID),
getStringAttr(attrs, schemas.AttrSelectedKeyName),
getIntAttr(attrs, schemas.AttrNumberOfRetries),
getIntAttr(attrs, schemas.AttrFallbackIndex),
getStringAttr(attrs, schemas.AttrTeamID),
getStringAttr(attrs, schemas.AttrTeamName),
getStringAttr(attrs, schemas.AttrCustomerID),
getStringAttr(attrs, schemas.AttrCustomerName),
)
}

// recordMetricsFromTrace extracts metrics data from a completed trace and records them
// via the OTEL metrics exporter. This is called from Inject after trace emission.
//
// Per-attempt metrics (upstream_requests, errors, success, latency) are recorded once
// per llm.call/retry span so fallback attempts and failed retries are counted with
// their own provider/model/fallback_index labels. Per-trace metrics (tokens, cost,
// TTFT) are recorded once, keyed off the final (latest) attempt span.
func (p *OtelPlugin) recordMetricsFromTrace(ctx context.Context, trace *schemas.Trace) {
if trace == nil || p.metricsExporter == nil {
return
}

// Prefer the last attempt span (LLM call or retry) so metrics reflect the final outcome.
var llmSpan *schemas.Span
var finalSpan *schemas.Span
for _, span := range trace.Spans {
if span.Kind != schemas.SpanKindLLMCall && span.Kind != schemas.SpanKindRetry {
continue
}
if llmSpan == nil || span.EndTime.After(llmSpan.EndTime) {
llmSpan = span
}
}
if llmSpan == nil {
llmSpan = trace.RootSpan
}

if llmSpan == nil {
return
}
spanAttrs := buildSpanAttrs(span)

attrs := llmSpan.Attributes
p.metricsExporter.RecordUpstreamRequest(ctx, spanAttrs...)

// Extract all metric dimensions from span attributes
provider := getStringAttr(attrs, schemas.AttrProviderName)
model := getStringAttr(attrs, schemas.AttrRequestModel)
// Prefer request.type attribute to keep the method stable across retries
method := getStringAttr(attrs, "request.type")
if method == "" {
method = llmSpan.Name
}
virtualKeyID := getStringAttr(attrs, schemas.AttrVirtualKeyID)
virtualKeyName := getStringAttr(attrs, schemas.AttrVirtualKeyName)
selectedKeyID := getStringAttr(attrs, schemas.AttrSelectedKeyID)
selectedKeyName := getStringAttr(attrs, schemas.AttrSelectedKeyName)
numberOfRetries := getIntAttr(attrs, schemas.AttrNumberOfRetries)
fallbackIndex := getIntAttr(attrs, schemas.AttrFallbackIndex)
teamID := getStringAttr(attrs, schemas.AttrTeamID)
teamName := getStringAttr(attrs, schemas.AttrTeamName)
customerID := getStringAttr(attrs, schemas.AttrCustomerID)
customerName := getStringAttr(attrs, schemas.AttrCustomerName)

// Build common attributes for all metrics
otelAttrs := BuildBifrostAttributes(
provider, model, method,
virtualKeyID, virtualKeyName,
selectedKeyID, selectedKeyName,
numberOfRetries, fallbackIndex,
teamID, teamName, customerID, customerName,
)
if !span.StartTime.IsZero() && !span.EndTime.IsZero() {
latencySeconds := span.EndTime.Sub(span.StartTime).Seconds()
p.metricsExporter.RecordUpstreamLatency(ctx, latencySeconds, spanAttrs...)
}

// Record upstream request count
p.metricsExporter.RecordUpstreamRequest(ctx, otelAttrs...)
if span.Status == schemas.SpanStatusError {
p.metricsExporter.RecordErrorRequest(ctx, spanAttrs...)
} else {
p.metricsExporter.RecordSuccessRequest(ctx, spanAttrs...)
}

// Record latency (from span duration)
if !llmSpan.StartTime.IsZero() && !llmSpan.EndTime.IsZero() {
latencySeconds := llmSpan.EndTime.Sub(llmSpan.StartTime).Seconds()
p.metricsExporter.RecordUpstreamLatency(ctx, latencySeconds, otelAttrs...)
if finalSpan == nil || span.EndTime.After(finalSpan.EndTime) {
finalSpan = span
}
}

// Record success or error based on span status
if llmSpan.Status == schemas.SpanStatusError {
p.metricsExporter.RecordErrorRequest(ctx, otelAttrs...)
} else {
p.metricsExporter.RecordSuccessRequest(ctx, otelAttrs...)
if finalSpan == nil {
finalSpan = trace.RootSpan
}
if finalSpan == nil {
return
}

attrs := finalSpan.Attributes
otelAttrs := buildSpanAttrs(finalSpan)

// Record token usage - try both naming conventions
inputTokens := getIntAttr(attrs, schemas.AttrPromptTokens)
Expand Down
2 changes: 1 addition & 1 deletion plugins/otel/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.39.0"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
)
Expand Down
4 changes: 2 additions & 2 deletions transports/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@
"type": "string",
"description": "Type of trace to use for the OTEL collector",
"enum": [
"otel"
"genai_extension"
]
},
"protocol": {
Comment thread
sammaji marked this conversation as resolved.
Expand Down Expand Up @@ -3642,4 +3642,4 @@
"additionalProperties": false
}
}
}
}
20 changes: 14 additions & 6 deletions ui/app/workspace/observability/fragments/otelFormFragment.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ interface OtelFormFragmentProps {
service_name?: string;
collector_url?: string;
headers?: Record<string, string>;
trace_type?: "otel" | "genai_extension" | "vercel" | "arize_otel";
trace_type?: "genai_extension" | "vercel" | "open_inference";
protocol?: "http" | "grpc";
// TLS configuration
tls_ca_cert?: string;
Expand All @@ -37,7 +37,13 @@ interface OtelFormFragmentProps {
isLoading?: boolean;
}

export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelete, isDeleting = false, isLoading = false }: OtelFormFragmentProps) {
export function OtelFormFragment({
currentConfig: initialConfig,
onSave,
onDelete,
isDeleting = false,
isLoading = false,
}: OtelFormFragmentProps) {
const hasOtelAccess = useRbac(RbacResource.Observability, RbacOperation.Update);
const [isSaving, setIsSaving] = useState(false);
const form = useForm<OtelFormSchema, any, OtelFormSchema>({
Expand All @@ -50,7 +56,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet
service_name: initialConfig?.service_name ?? "bifrost",
collector_url: initialConfig?.collector_url ?? "",
headers: initialConfig?.headers ?? {},
trace_type: initialConfig?.trace_type ?? "otel",
trace_type: initialConfig?.trace_type ?? "genai_extension",
protocol: initialConfig?.protocol ?? "http",
tls_ca_cert: initialConfig?.tls_ca_cert ?? "",
insecure: initialConfig?.insecure ?? true,
Expand Down Expand Up @@ -94,7 +100,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet
service_name: initialConfig?.service_name ?? "bifrost",
collector_url: initialConfig?.collector_url || "",
headers: initialConfig?.headers || {},
trace_type: initialConfig?.trace_type || "otel",
trace_type: initialConfig?.trace_type || "genai_extension",
protocol: initialConfig?.protocol || "http",
tls_ca_cert: initialConfig?.tls_ca_cert ?? "",
insecure: initialConfig?.insecure ?? true,
Expand All @@ -106,7 +112,9 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet
}, [form, initialConfig]);

const traceTypeOptions: { value: string; label: string; disabled?: boolean; disabledReason?: string }[] = [
{ value: "otel", label: "OTEL - GenAI Extension" },
{ value: "genai_extension", label: "OTel GenAI Extension (Recommended)" },
{ value: "vercel", label: "Vercel AI SDK", disabled: true, disabledReason: "Coming soon" },
{ value: "open_inference", label: "Arize OpenInference", disabled: true, disabledReason: "Coming soon" },
];
const protocolOptions: { value: string; label: string; disabled?: boolean; disabledReason?: string }[] = [
{ value: "http", label: "HTTP" },
Expand Down Expand Up @@ -406,7 +414,7 @@ export function OtelFormFragment({ currentConfig: initialConfig, onSave, onDelet
service_name: initialConfig?.service_name ?? "bifrost",
collector_url: initialConfig?.collector_url ?? "",
headers: initialConfig?.headers ?? {},
trace_type: initialConfig?.trace_type ?? "otel",
trace_type: initialConfig?.trace_type ?? "genai_extension",
protocol: initialConfig?.protocol ?? "http",
tls_ca_cert: initialConfig?.tls_ca_cert ?? "",
insecure: initialConfig?.insecure ?? true,
Expand Down
14 changes: 7 additions & 7 deletions ui/components/ui/select.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ function SelectContent({ className, children, position = "popper", ...props }: R
<SelectPrimitive.Content
data-slot="select-content"
className={cn(
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 fixed z-[9999] max-h-(--radix-select-content-available-height) min-w-[8rem] origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-sm border shadow-md !pointer-events-auto",
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 !pointer-events-auto fixed z-[9999] max-h-(--radix-select-content-available-height) min-w-[8rem] origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-sm border shadow-md",
position === "popper" &&
"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1 w-(--radix-select-trigger-width)",
"w-(--radix-select-trigger-width) data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
className,
)}
position={position}
Expand All @@ -61,7 +61,7 @@ function SelectContent({ className, children, position = "popper", ...props }: R
<SelectScrollUpButton />
<SelectPrimitive.Viewport
className={cn(
"p-1 !pointer-events-auto",
"!pointer-events-auto p-1",
position === "popper" && "h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)] scroll-my-1",
)}
>
Expand All @@ -88,11 +88,12 @@ interface SelectItemProps extends React.ComponentProps<typeof SelectPrimitive.It
function SelectItem({ className, children, disabled, disabledReason, icon, ...props }: SelectItemProps) {
return (
<SelectPrimitive.Item
disabled={disabled}
data-disabled={disabled}
data-disabled-reason={disabledReason}
data-slot="select-item"
className={cn(
"hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground data-[disabled]:text-muted-foreground relative flex w-full cursor-pointer items-center gap-2 rounded-sm py-1.5 pl-2 text-sm outline-hidden select-none !pointer-events-auto data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2",
"hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground data-[disabled]:text-muted-foreground !pointer-events-auto relative flex w-full cursor-pointer items-center gap-2 rounded-sm py-1.5 pl-2 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2",
className,
!disabled && "pr-8",
disabled && "pr-1",
Expand Down Expand Up @@ -159,6 +160,5 @@ export {
SelectScrollUpButton,
SelectSeparator,
SelectTrigger,
SelectValue
};

SelectValue,
};
4 changes: 2 additions & 2 deletions ui/lib/types/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -711,10 +711,10 @@ export const otelConfigSchema = z
service_name: z.string().optional(),
collector_url: z.string().default(""),
trace_type: z
.enum(["otel", "genai_extension", "vercel", "arize_otel"], {
.enum(["genai_extension", "vercel", "open_inference"], {
message: "Please select a trace type",
})
.default("otel"),
.default("genai_extension"),
Comment thread
sammaji marked this conversation as resolved.
Comment thread
sammaji marked this conversation as resolved.
headers: z.record(z.string(), z.string()).optional(),
protocol: z
.enum(["http", "grpc"], {
Expand Down
Loading