|  | 
|  | 1 | +// Licensed to the .NET Foundation under one or more agreements. | 
|  | 2 | +// The .NET Foundation licenses this file to you under the MIT license. | 
|  | 3 | + | 
|  | 4 | +using System.Text.Json; | 
|  | 5 | +using Microsoft.Extensions.Diagnostics.HealthChecks; | 
|  | 6 | + | 
|  | 7 | +namespace Aspire.Hosting.OpenAI; | 
|  | 8 | + | 
|  | 9 | +/// <summary> | 
|  | 10 | +/// An adaptive health check for OpenAI resources that changes behavior based on configuration. | 
|  | 11 | +/// </summary> | 
|  | 12 | +/// <param name="httpClientFactory">The HTTP client factory.</param> | 
|  | 13 | +/// <param name="resource">The OpenAI resource.</param> | 
|  | 14 | +internal sealed class OpenAIHealthCheck(IHttpClientFactory httpClientFactory, OpenAIResource resource) : IHealthCheck | 
|  | 15 | +{ | 
|  | 16 | +    private const string DefaultEndpoint = "https://api.openai.com/v1"; | 
|  | 17 | +    private HealthCheckResult? _result; | 
|  | 18 | + | 
|  | 19 | +    /// <summary> | 
|  | 20 | +    /// Checks the health of the OpenAI resource. | 
|  | 21 | +    /// </summary> | 
|  | 22 | +    public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) | 
|  | 23 | +    { | 
|  | 24 | +        if (_result is not null) | 
|  | 25 | +        { | 
|  | 26 | +            return _result.Value; | 
|  | 27 | +        } | 
|  | 28 | + | 
|  | 29 | +        try | 
|  | 30 | +        { | 
|  | 31 | +            // Case 1: Default endpoint - use StatusPageHealthCheck | 
|  | 32 | +            if (resource.Endpoint == DefaultEndpoint) | 
|  | 33 | +            { | 
|  | 34 | +                return await CheckStatusPageAsync(cancellationToken).ConfigureAwait(false); | 
|  | 35 | +            } | 
|  | 36 | + | 
|  | 37 | +            // Case 2: Custom endpoint without model health check - return healthy | 
|  | 38 | +            // We can't check the endpoint without a model, so we just return healthy | 
|  | 39 | +            // The model-level health check will do the actual verification if WithHealthCheck is called | 
|  | 40 | +            _result = HealthCheckResult.Healthy("Custom OpenAI endpoint configured"); | 
|  | 41 | +            return _result.Value; | 
|  | 42 | +        } | 
|  | 43 | +        catch (Exception ex) | 
|  | 44 | +        { | 
|  | 45 | +            _result = HealthCheckResult.Unhealthy($"Failed to check OpenAI resource: {ex.Message}", ex); | 
|  | 46 | +            return _result.Value; | 
|  | 47 | +        } | 
|  | 48 | +    } | 
|  | 49 | + | 
|  | 50 | +    private async Task<HealthCheckResult> CheckStatusPageAsync(CancellationToken cancellationToken) | 
|  | 51 | +    { | 
|  | 52 | +        var client = httpClientFactory.CreateClient("OpenAIHealthCheck"); | 
|  | 53 | +        var statusEndpoint = new Uri("https://status.openai.com/api/v2/status.json"); | 
|  | 54 | +        var timeout = TimeSpan.FromSeconds(5); | 
|  | 55 | + | 
|  | 56 | +        using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); | 
|  | 57 | +        cts.CancelAfter(timeout); | 
|  | 58 | + | 
|  | 59 | +        using var req = new HttpRequestMessage(HttpMethod.Get, statusEndpoint); | 
|  | 60 | +        req.Headers.Accept.ParseAdd("application/json"); | 
|  | 61 | + | 
|  | 62 | +        HttpResponseMessage resp; | 
|  | 63 | +        try | 
|  | 64 | +        { | 
|  | 65 | +            resp = await client.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, cts.Token).ConfigureAwait(false); | 
|  | 66 | +        } | 
|  | 67 | +        catch (OperationCanceledException oce) when (!cancellationToken.IsCancellationRequested) | 
|  | 68 | +        { | 
|  | 69 | +            _result = HealthCheckResult.Unhealthy($"StatusPage request timed out after {timeout.TotalSeconds:0.#}s.", oce); | 
|  | 70 | +            return _result.Value; | 
|  | 71 | +        } | 
|  | 72 | +        catch (Exception ex) | 
|  | 73 | +        { | 
|  | 74 | +            _result = HealthCheckResult.Unhealthy("StatusPage request failed.", ex); | 
|  | 75 | +            return _result.Value; | 
|  | 76 | +        } | 
|  | 77 | + | 
|  | 78 | +        if (!resp.IsSuccessStatusCode) | 
|  | 79 | +        { | 
|  | 80 | +            _result = HealthCheckResult.Unhealthy($"StatusPage returned {(int)resp.StatusCode} {resp.ReasonPhrase}."); | 
|  | 81 | +            return _result.Value; | 
|  | 82 | +        } | 
|  | 83 | + | 
|  | 84 | +        try | 
|  | 85 | +        { | 
|  | 86 | +            using var stream = await resp.Content.ReadAsStreamAsync(cts.Token).ConfigureAwait(false); | 
|  | 87 | +            using var doc = await JsonDocument.ParseAsync(stream, cancellationToken: cts.Token).ConfigureAwait(false); | 
|  | 88 | + | 
|  | 89 | +            if (!doc.RootElement.TryGetProperty("status", out var statusEl)) | 
|  | 90 | +            { | 
|  | 91 | +                _result = HealthCheckResult.Unhealthy("Missing 'status' object in StatusPage response."); | 
|  | 92 | +                return _result.Value; | 
|  | 93 | +            } | 
|  | 94 | + | 
|  | 95 | +            var indicator = statusEl.TryGetProperty("indicator", out var indEl) && indEl.ValueKind == JsonValueKind.String | 
|  | 96 | +                ? indEl.GetString() ?? string.Empty | 
|  | 97 | +                : string.Empty; | 
|  | 98 | + | 
|  | 99 | +            var description = statusEl.TryGetProperty("description", out var descEl) && descEl.ValueKind == JsonValueKind.String | 
|  | 100 | +                ? descEl.GetString() ?? string.Empty | 
|  | 101 | +                : string.Empty; | 
|  | 102 | + | 
|  | 103 | +            var data = new Dictionary<string, object> | 
|  | 104 | +            { | 
|  | 105 | +                ["indicator"] = indicator, | 
|  | 106 | +                ["description"] = description, | 
|  | 107 | +                ["endpoint"] = statusEndpoint.ToString() | 
|  | 108 | +            }; | 
|  | 109 | + | 
|  | 110 | +            _result = indicator switch | 
|  | 111 | +            { | 
|  | 112 | +                "none" => HealthCheckResult.Healthy(description.Length > 0 ? description : "All systems operational."), | 
|  | 113 | +                "minor" => HealthCheckResult.Degraded(description.Length > 0 ? description : "Minor service issues."), | 
|  | 114 | +                "major" => HealthCheckResult.Unhealthy(description.Length > 0 ? description : "Major service outage."), | 
|  | 115 | +                "critical" => HealthCheckResult.Unhealthy(description.Length > 0 ? description : "Critical service outage."), | 
|  | 116 | +                _ => HealthCheckResult.Unhealthy($"Unknown indicator '{indicator}'", data: data) | 
|  | 117 | +            }; | 
|  | 118 | + | 
|  | 119 | +            return _result.Value; | 
|  | 120 | +        } | 
|  | 121 | +        catch (JsonException jex) | 
|  | 122 | +        { | 
|  | 123 | +            _result = HealthCheckResult.Unhealthy("Failed to parse StatusPage JSON.", jex); | 
|  | 124 | +            return _result.Value; | 
|  | 125 | +        } | 
|  | 126 | +    } | 
|  | 127 | +} | 
0 commit comments