diff --git a/ci/vale/styles/config/vocabularies/aiq/accept.txt b/ci/vale/styles/config/vocabularies/aiq/accept.txt index f2b0e8b28..fa88e2bb6 100644 --- a/ci/vale/styles/config/vocabularies/aiq/accept.txt +++ b/ci/vale/styles/config/vocabularies/aiq/accept.txt @@ -61,6 +61,8 @@ groundedness isort Jira jsonlines +Langfuse +LangSmith # libcudf isn't styled in the way that cuDF is https://docs.rapids.ai/api/libcudf/stable/ libcudf LLM(s?) diff --git a/docs/source/workflows/observe/index.md b/docs/source/workflows/observe/index.md index 3bdbeea3a..b3942733f 100644 --- a/docs/source/workflows/observe/index.md +++ b/docs/source/workflows/observe/index.md @@ -17,10 +17,10 @@ limitations under the License. # Observe Workflows -The AIQ toolkit Observability Module provides support for configurable telemetry setup to do logging tracing and metrics for AIQ toolkit workflows. -- Enables users to configure telemetry options from a predefined list based on their preferences. -- Listens real-time usage statistics pushed by `IntermediateStepManager`. -- Translates the usage statistics to OpenTelemetry format and push to the configured provider/method. (e.g., phoenix, OTelCollector, console, file) +The AIQ toolkit Observability Module provides support for configuring logging, tracing, and metrics for AIQ toolkit workflows. Users can configure telemetry options from a predefined list based on their preferences. The logging and tracing exporters: + +- Listen for usage statistics pushed by `IntermediateStepManager`. +- Translate the usage statistics to OpenTelemetry format and push to the configured provider/method. (e.g., phoenix, OTelCollector, console, file) These features enable AIQ toolkit developers to test their workflows locally and integrate observability seamlessly. @@ -38,31 +38,10 @@ This will install: ## Configurable Components -Users can set up telemetry configuration within the workflow configuration file. - -### **Logging Configuration** -Users can write logs to: -- **Console** (`console`) -- **Temporary file** (`file`) -- **Both** (by specifying both options) - -#### **Configuration Fields** -- **`_type`**: Accepted values → `console`, `file` -- **`level`**: Log level (e.g., `DEBUG`, `INFO`, `WARN`, `ERROR`) -- **`path`** *(for file logging only)*: File path where logs will be stored. - -### **Tracing Configuration** -Users can set up tracing using: -- **Phoenix** (requires `[telemetry]` extra) -- **Custom providers** *(See registration section below.)* - -#### **Configuration Fields** -- **`_type`**: The name of the registered provider. -- **`endpoint`**: The provider's listening endpoint. -- **`project`**: The associated project name. +The observability module is configured using the `general.telemetry` section in the workflow configuration file. This section contains two subsections: `logging` and `tracing` and each subsection can contain one or more telemetry providers. +Illustrated below is a sample configuration file with all configurable components. -Sample Configuration: ```yaml general: telemetry: @@ -81,6 +60,81 @@ general: project: simple_calculator ``` +### **Logging Configuration** + +The `logging` section contains one or more logging providers. Each provider has a `_type` and optional configuration fields. The following logging providers are supported by default: + +- `console`: Writes logs to the console. +- `file`: Writes logs to a file. + +To see the complete list of configuration fields for each provider, utilize the `aiq info -t logging` command which will display the configuration fields for each provider. For example: + +```bash +$ aiq info -t logging + AIQ Toolkit Search Results +┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ package ┃ version ┃ component_type ┃ component_name ┃ description ┃ +┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ aiqtoolkit │ 1.2.0.dev15+g2322037 │ logging │ console │ A logger to write runtime logs to the console. │ +│ │ │ │ │ │ +│ │ │ │ │ Args: │ +│ │ │ │ │ _type (str): The type of the object. │ +│ │ │ │ │ level (str): The logging level of console logger. │ +├────────────┼──────────────────────┼────────────────┼────────────────┼───────────────────────────────────────────────────────────┤ +│ aiqtoolkit │ 1.2.0.dev15+g2322037 │ logging │ file │ A logger to write runtime logs to a file. │ +│ │ │ │ │ │ +│ │ │ │ │ Args: │ +│ │ │ │ │ _type (str): The type of the object. │ +│ │ │ │ │ path (str): The file path to save the logging output. │ +│ │ │ │ │ level (str): The logging level of file logger. │ +└────────────┴──────────────────────┴────────────────┴────────────────┴───────────────────────────────────────────────────────────┘ +``` + +### **Tracing Configuration** + +The `tracing` section contains one or more tracing providers. Each provider has a `_type` and optional configuration fields. The following tracing providers are supported by default: + +- [**W&B Weave**](https://wandb.ai/site/weave/) + - Example configuration: + ```yaml + tracing: + weave: + _type: weave + project: "aiqtoolkit-demo" + ``` + - See [Observing with W&B Weave](./observe-workflow-with-weave.md) for more information +- [**Phoenix**](https://phoenix.arize.com/) + - Example configuration: + ```yaml + tracing: + phoenix: + _type: phoenix + endpoint: http://localhost:6006/v1/traces + project: "aiqtoolkit-demo" + ``` + - See [Observing with Phoenix](./observe-workflow-with-phoenix.md) for more information +- [**Langfuse**](https://langfuse.com/) + - Example configuration: + ```yaml + tracing: + langfuse: + _type: langfuse + endpoint: http://localhost:3000/api/public/otel/v1/traces + ``` +- [**LangSmith**](https://www.langchain.com/langsmith) + - Example configuration: + ```yaml + tracing: + langsmith: + _type: langsmith + project: default + ``` +- **Custom providers** + - See [Registering a New Telemetry Provider as a Plugin](#registering-a-new-telemetry-provider-as-a-plugin) for more information + + +To see the complete list of configuration fields for each provider, utilize the `aiq info -t tracing` command which will display the configuration fields for each provider. + ### AIQ Toolkit Observability Components diff --git a/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langfuse.yml b/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langfuse.yml new file mode 100644 index 000000000..a62dca62d --- /dev/null +++ b/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langfuse.yml @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +general: + use_uvloop: true + telemetry: + tracing: + langfuse: + _type: langfuse + endpoint: http://localhost:3000/api/public/otel/v1/traces + +functions: + calculator_multiply: + _type: calculator_multiply + calculator_inequality: + _type: calculator_inequality + calculator_divide: + _type: aiq_simple_calculator/calculator_divide + current_datetime: + _type: current_datetime + calculator_subtract: + _type: calculator_subtract + +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.1-70b-instruct + temperature: 0.0 + max_tokens: 1024 + openai_llm: + _type: openai + model_name: gpt-3.5-turbo + max_tokens: 2000 + +workflow: + _type: react_agent + tool_names: + - calculator_multiply + - calculator_inequality + - current_datetime + - calculator_divide + - calculator_subtract + llm_name: nim_llm + verbose: true + retry_parsing_errors: true + max_retries: 3 diff --git a/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langsmith.yml b/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langsmith.yml new file mode 100644 index 000000000..0571f18f5 --- /dev/null +++ b/examples/simple_calculator/src/aiq_simple_calculator/configs/config-langsmith.yml @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +general: + use_uvloop: true + telemetry: + tracing: + langsmith: + _type: langsmith + project: default + +functions: + calculator_multiply: + _type: calculator_multiply + calculator_inequality: + _type: calculator_inequality + calculator_divide: + _type: aiq_simple_calculator/calculator_divide + current_datetime: + _type: current_datetime + calculator_subtract: + _type: calculator_subtract + +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.1-70b-instruct + temperature: 0.0 + max_tokens: 1024 + openai_llm: + _type: openai + model_name: gpt-3.5-turbo + max_tokens: 2000 + +workflow: + _type: react_agent + tool_names: + - calculator_multiply + - calculator_inequality + - current_datetime + - calculator_divide + - calculator_subtract + llm_name: nim_llm + verbose: true + retry_parsing_errors: true + max_retries: 3 diff --git a/src/aiq/observability/register.py b/src/aiq/observability/register.py index 40f6da7e3..d48249a5a 100644 --- a/src/aiq/observability/register.py +++ b/src/aiq/observability/register.py @@ -14,6 +14,7 @@ # limitations under the License. import logging +import os from pydantic import Field @@ -22,6 +23,7 @@ from aiq.cli.register_workflow import register_telemetry_exporter from aiq.data_models.logging import LoggingBaseConfig from aiq.data_models.telemetry_exporter import TelemetryExporterBaseConfig +from aiq.utils.optional_imports import telemetry_optional_import from aiq.utils.optional_imports import try_import_opentelemetry from aiq.utils.optional_imports import try_import_phoenix @@ -42,11 +44,67 @@ async def phoenix_telemetry_exporter(config: PhoenixTelemetryExporter, builder: # If the dependencies are not installed, a TelemetryOptionalImportError will be raised phoenix = try_import_phoenix() # noqa: F841 from phoenix.otel import HTTPSpanExporter + yield HTTPSpanExporter(config.endpoint) except ConnectionError as ex: - logger.warning("Unable to connect to Phoenix at port 6006. Are you sure Phoenix is running?\n %s", - ex, - exc_info=True) + logger.warning( + "Unable to connect to Phoenix at port 6006. Are you sure Phoenix is running?\n %s", + ex, + exc_info=True, + ) + + +class LangfuseTelemetryExporter(TelemetryExporterBaseConfig, name="langfuse"): + """A telemetry exporter to transmit traces to externally hosted langfuse service.""" + + endpoint: str = Field(description="The langfuse OTEL endpoint (/api/public/otel/v1/traces)") + public_key: str = Field(description="The Langfuse public key", default="") + secret_key: str = Field(description="The Langfuse secret key", default="") + + +@register_telemetry_exporter(config_type=LangfuseTelemetryExporter) +async def langfuse_telemetry_exporter(config: LangfuseTelemetryExporter, builder: Builder): + """Create a Langfuse telemetry exporter.""" + + import base64 + + trace_exporter = telemetry_optional_import("opentelemetry.exporter.otlp.proto.http.trace_exporter") + + secret_key = config.secret_key or os.environ.get("LANGFUSE_SECRET_KEY") + public_key = config.public_key or os.environ.get("LANGFUSE_PUBLIC_KEY") + if not secret_key or not public_key: + raise ValueError("secret and public keys are required for langfuse") + + credentials = f"{public_key}:{secret_key}".encode("utf-8") + auth_header = base64.b64encode(credentials).decode("utf-8") + headers = {"Authorization": f"Basic {auth_header}"} + + yield trace_exporter.OTLPSpanExporter(endpoint=config.endpoint, headers=headers) + + +class LangsmithTelemetryExporter(TelemetryExporterBaseConfig, name="langsmith"): + """A telemetry exporter to transmit traces to externally hosted langsmith service.""" + + endpoint: str = Field( + description="The langsmith OTEL endpoint", + default="https://api.smith.langchain.com/otel/v1/traces", + ) + api_key: str = Field(description="The Langsmith API key", default="") + project: str = Field(description="The project name to group the telemetry traces.") + + +@register_telemetry_exporter(config_type=LangsmithTelemetryExporter) +async def langsmith_telemetry_exporter(config: LangsmithTelemetryExporter, builder: Builder): + """Create a Langsmith telemetry exporter.""" + + trace_exporter = telemetry_optional_import("opentelemetry.exporter.otlp.proto.http.trace_exporter") + + api_key = config.api_key or os.environ.get("LANGSMITH_API_KEY") + if not api_key: + raise ValueError("API key is required for langsmith") + + headers = {"x-api-key": api_key, "LANGSMITH_PROJECT": config.project} + yield trace_exporter.OTLPSpanExporter(endpoint=config.endpoint, headers=headers) class OtelCollectorTelemetryExporter(TelemetryExporterBaseConfig, name="otelcollector"): @@ -73,8 +131,8 @@ class ConsoleLoggingMethod(LoggingBaseConfig, name="console"): @register_logging_method(config_type=ConsoleLoggingMethod) async def console_logging_method(config: ConsoleLoggingMethod, builder: Builder): """ - Build and return a StreamHandler for console-based logging. - """ + Build and return a StreamHandler for console-based logging. + """ level = getattr(logging, config.level.upper(), logging.INFO) handler = logging.StreamHandler() handler.setLevel(level) @@ -91,8 +149,8 @@ class FileLoggingMethod(LoggingBaseConfig, name="file"): @register_logging_method(config_type=FileLoggingMethod) async def file_logging_method(config: FileLoggingMethod, builder: Builder): """ - Build and return a FileHandler for file-based logging. - """ + Build and return a FileHandler for file-based logging. + """ level = getattr(logging, config.level.upper(), logging.INFO) handler = logging.FileHandler(filename=config.path, mode="a", encoding="utf-8") handler.setLevel(level)