diff --git a/README.md b/README.md
index 01d739844..048a3d697 100644
--- a/README.md
+++ b/README.md
@@ -328,13 +328,14 @@ The following sets of tools are available (toolsets marked with ✓ in the Defau
 
 <!-- AVAILABLE-TOOLSETS-START -->
 
-| Toolset  | Description                                                                                                                                                          | Default |
-|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
-| config   | View and manage the current local Kubernetes configuration (kubeconfig)                                                                                              | ✓       |
-| core     | Most common tools for Kubernetes management (Pods, Generic Resources, Events, etc.)                                                                                  | ✓       |
-| kiali    | Most common tools for managing Kiali, check the [Kiali documentation](https://github.com/containers/kubernetes-mcp-server/blob/main/docs/KIALI.md) for more details. |         |
-| kubevirt | KubeVirt virtual machine management tools                                                                                                                            |         |
-| helm     | Tools for managing Helm charts and releases                                                                                                                          | ✓       |
+| Toolset       | Description                                                                                                                                                     | Default |
+|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
+| config        | View and manage the current local Kubernetes configuration (kubeconfig)                                                                                         | ✓       |
+| core          | Most common tools for Kubernetes management (Pods, Generic Resources, Events, etc.)                                                                             | ✓       |
+| ossm          | Most common tools for managing OSSM, check the [OSSM documentation](https://github.com/openshift/openshift-mcp-server/blob/main/docs/OSSM.md) for more details. |         |
+| kubevirt      | KubeVirt virtual machine management tools                                                                                                                       |         |
+| observability | Cluster observability tools for querying Prometheus metrics and Alertmanager alerts                                                                             | ✓       |
+| helm          | Tools for managing Helm charts and releases                                                                                                                     | ✓       |
 
 <!-- AVAILABLE-TOOLSETS-END -->
 
@@ -350,6 +351,8 @@ In case multi-cluster support is enabled (default) and you have access to multip
 
 - **configuration_contexts_list** - List all available context names and associated server urls from the kubeconfig file
 
+- **targets_list** - List all available targets
+
 - **configuration_view** - Get the current Kubernetes configuration content as a kubeconfig YAML
   - `minified` (`boolean`) - Return a minified version of the configuration. If set to true, keeps only the current-context and the relevant pieces of the configuration for that context. If set to false, all contexts, clusters, auth-infos, and users are returned in the configuration. (Optional, default true)
 
@@ -379,9 +382,11 @@ In case multi-cluster support is enabled (default) and you have access to multip
   - `name` (`string`) - Name of the Node to get the resource consumption from (Optional, all Nodes if not provided)
 
 - **pods_list** - List all the Kubernetes pods in the current cluster from all namespaces
+  - `fieldSelector` (`string`) - Optional Kubernetes field selector to filter pods by field values (e.g. 'status.phase=Running', 'spec.nodeName=node1'). Supported fields: metadata.name, metadata.namespace, spec.nodeName, spec.restartPolicy, spec.schedulerName, spec.serviceAccountName, status.phase (Pending/Running/Succeeded/Failed/Unknown), status.podIP, status.nominatedNodeName. Note: CrashLoopBackOff is a container state, not a pod phase, so it cannot be filtered directly. See https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
   - `labelSelector` (`string`) - Optional Kubernetes label selector (e.g. 'app=myapp,env=prod' or 'app in (myapp,yourapp)'), use this option when you want to filter the pods by label
 
 - **pods_list_in_namespace** - List all the Kubernetes pods in the specified namespace in the current cluster
+  - `fieldSelector` (`string`) - Optional Kubernetes field selector to filter pods by field values (e.g. 'status.phase=Running', 'spec.nodeName=node1'). Supported fields: metadata.name, metadata.namespace, spec.nodeName, spec.restartPolicy, spec.schedulerName, spec.serviceAccountName, status.phase (Pending/Running/Succeeded/Failed/Unknown), status.podIP, status.nominatedNodeName. Note: CrashLoopBackOff is a container state, not a pod phase, so it cannot be filtered directly. See https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
   - `labelSelector` (`string`) - Optional Kubernetes label selector (e.g. 'app=myapp,env=prod' or 'app in (myapp,yourapp)'), use this option when you want to filter the pods by label
   - `namespace` (`string`) **(required)** - Namespace to list pods from
 
@@ -421,8 +426,9 @@ In case multi-cluster support is enabled (default) and you have access to multip
 - **resources_list** - List Kubernetes resources and objects in the current cluster by providing their apiVersion and kind and optionally the namespace and label selector
 (common apiVersion and kind include: v1 Pod, v1 Service, v1 Node, apps/v1 Deployment, networking.k8s.io/v1 Ingress, route.openshift.io/v1 Route)
   - `apiVersion` (`string`) **(required)** - apiVersion of the resources (examples of valid apiVersion are: v1, apps/v1, networking.k8s.io/v1)
+  - `fieldSelector` (`string`) - Optional Kubernetes field selector to filter resources by field values (e.g. 'status.phase=Running', 'metadata.name=myresource'). Supported fields vary by resource type. For Pods: metadata.name, metadata.namespace, spec.nodeName, spec.restartPolicy, spec.schedulerName, spec.serviceAccountName, status.phase (Pending/Running/Succeeded/Failed/Unknown), status.podIP, status.nominatedNodeName. See https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
   - `kind` (`string`) **(required)** - kind of the resources (examples of valid kind are: Pod, Service, Deployment, Ingress)
-  - `labelSelector` (`string`) - Optional Kubernetes label selector (e.g. 'app=myapp,env=prod' or 'app in (myapp,yourapp)'), use this option when you want to filter the pods by label
+  - `labelSelector` (`string`) - Optional Kubernetes label selector (e.g. 'app=myapp,env=prod' or 'app in (myapp,yourapp)'), use this option when you want to filter the resources by label
   - `namespace` (`string`) - Optional Namespace to retrieve the namespaced resources from (ignored in case of cluster scoped resources). If not provided, will list resources from all namespaces
 
 - **resources_get** - Get a Kubernetes resource in the current cluster by providing its apiVersion, kind, optionally the namespace, and its name
@@ -454,15 +460,15 @@ In case multi-cluster support is enabled (default) and you have access to multip
 
 <details>
 
-<summary>kiali</summary>
+<summary>ossm</summary>
 
-- **kiali_mesh_graph** - Returns the topology of a specific namespaces, health, status of the mesh and namespaces. Includes a mesh health summary overview with aggregated counts of healthy, degraded, and failing apps, workloads, and services. Use this for high-level overviews
+- **ossm_mesh_graph** - Returns the topology of a specific namespaces, health, status of the mesh and namespaces. Includes a mesh health summary overview with aggregated counts of healthy, degraded, and failing apps, workloads, and services. Use this for high-level overviews
   - `graphType` (`string`) - Optional type of graph to return: 'versionedApp', 'app', 'service', 'workload', 'mesh'
   - `namespace` (`string`) - Optional single namespace to include in the graph (alternative to namespaces)
   - `namespaces` (`string`) - Optional comma-separated list of namespaces to include in the graph
   - `rateInterval` (`string`) - Optional rate interval for fetching (e.g., '10m', '5m', '1h').
 
-- **kiali_manage_istio_config** - Manages Istio configuration objects (Gateways, VirtualServices, etc.). Can list (objects and validations), get, create, patch, or delete objects
+- **ossm_manage_istio_config** - Manages Istio configuration objects (Gateways, VirtualServices, etc.). Can list (objects and validations), get, create, patch, or delete objects
   - `action` (`string`) **(required)** - Action to perform: list, get, create, patch, or delete
   - `group` (`string`) - API group of the Istio object (e.g., 'networking.istio.io', 'gateway.networking.k8s.io')
   - `json_data` (`string`) - JSON data to apply or create the object
@@ -471,12 +477,12 @@ In case multi-cluster support is enabled (default) and you have access to multip
   - `namespace` (`string`) - Namespace containing the Istio object
   - `version` (`string`) - API version of the Istio object (e.g., 'v1', 'v1beta1')
 
-- **kiali_get_resource_details** - Gets lists or detailed info for Kubernetes resources (services, workloads) within the mesh
+- **ossm_get_resource_details** - Gets lists or detailed info for Kubernetes resources (services, workloads) within the mesh
   - `namespaces` (`string`) - Comma-separated list of namespaces to get services from (e.g. 'bookinfo' or 'bookinfo,default'). If not provided, will list services from all accessible namespaces
   - `resource_name` (`string`) - Name of the resource to get details for (optional string - if provided, gets details; if empty, lists all).
   - `resource_type` (`string`) - Type of resource to get details for (service, workload)
 
-- **kiali_get_metrics** - Gets lists or detailed info for Kubernetes resources (services, workloads) within the mesh
+- **ossm_get_metrics** - Gets lists or detailed info for Kubernetes resources (services, workloads) within the mesh
   - `byLabels` (`string`) - Comma-separated list of labels to group metrics by (e.g., 'source_workload,destination_service'). Optional
   - `direction` (`string`) - Traffic direction: 'inbound' or 'outbound'. Optional, defaults to 'outbound'
   - `duration` (`string`) - Time range to get metrics for (optional string - if provided, gets metrics (e.g., '1m', '5m', '1h'); if empty, get default 30m).
@@ -489,14 +495,14 @@ In case multi-cluster support is enabled (default) and you have access to multip
   - `resource_type` (`string`) **(required)** - Type of resource to get details for (service, workload)
   - `step` (`string`) - Step between data points in seconds (e.g., '15'). Optional, defaults to 15 seconds
 
-- **kiali_workload_logs** - Get logs for a specific workload's pods in a namespace. Only requires namespace and workload name - automatically discovers pods and containers. Optionally filter by container name, time range, and other parameters. Container is auto-detected if not specified.
+- **ossm_workload_logs** - Get logs for a specific workload's pods in a namespace. Only requires namespace and workload name - automatically discovers pods and containers. Optionally filter by container name, time range, and other parameters. Container is auto-detected if not specified.
   - `container` (`string`) - Optional container name to filter logs. If not provided, automatically detects and uses the main application container (excludes istio-proxy and istio-init)
   - `namespace` (`string`) **(required)** - Namespace containing the workload
   - `since` (`string`) - Time duration to fetch logs from (e.g., '5m', '1h', '30s'). If not provided, returns recent logs
   - `tail` (`integer`) - Number of lines to retrieve from the end of logs (default: 100)
   - `workload` (`string`) **(required)** - Name of the workload to get logs for
 
-- **kiali_get_traces** - Gets traces for a specific resource (app, service, workload) in a namespace, or gets detailed information for a specific trace by its ID. If traceId is provided, it returns detailed trace information and other parameters are not required.
+- **ossm_get_traces** - Gets traces for a specific resource (app, service, workload) in a namespace, or gets detailed information for a specific trace by its ID. If traceId is provided, it returns detailed trace information and other parameters are not required.
   - `clusterName` (`string`) - Cluster name for multi-cluster environments (optional, only used when traceId is not provided)
   - `endMicros` (`string`) - End time for traces in microseconds since epoch (optional, defaults to 10 minutes after startMicros if not provided, only used when traceId is not provided)
   - `limit` (`integer`) - Maximum number of traces to return (default: 100, only used when traceId is not provided)
@@ -534,6 +540,53 @@ In case multi-cluster support is enabled (default) and you have access to multip
 
 <details>
 
+<summary>observability</summary>
+
+- **prometheus_query** - Execute an instant PromQL query against the cluster's Thanos Querier.
+Returns current metric values at the specified time (or current time if not specified).
+Use this for point-in-time metric values.
+
+Common queries:
+- up{job="apiserver"} - Check if API server is up
+- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace
+- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate
+- kube_pod_status_phase{phase="Running"} - Running pods count
+  - `query` (`string`) **(required)** - PromQL query string (e.g., 'up{job="apiserver"}', 'sum by(namespace) (container_memory_usage_bytes)')
+  - `time` (`string`) - Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.
+
+- **prometheus_query_range** - Execute a range PromQL query against the cluster's Thanos Querier.
+Returns metric values over a time range with specified resolution.
+Use this for time-series data, trends, and historical analysis.
+
+Supports relative times:
+- 'now' for current time
+- '-10m', '-1h', '-1d' for relative past times
+
+Example: Get CPU usage over the last hour with 1-minute resolution.
+  - `end` (`string`) **(required)** - End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time
+  - `query` (`string`) **(required)** - PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')
+  - `start` (`string`) **(required)** - Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')
+  - `step` (`string`) - Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'
+
+- **alertmanager_alerts** - Query active and pending alerts from the cluster's Alertmanager.
+Useful for monitoring cluster health, detecting issues, and incident response.
+
+Returns alerts with their labels, annotations, status, and timing information.
+Can filter by active/silenced/inhibited state.
+
+Common use cases:
+- Check for critical alerts affecting the cluster
+- Monitor for specific alert types (e.g., high CPU, disk pressure)
+- Verify alert silences are working correctly
+  - `active` (`boolean`) - Filter for active (firing) alerts. Default: true
+  - `filter` (`string`) - Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'
+  - `inhibited` (`boolean`) - Include inhibited alerts in the results. Default: false
+  - `silenced` (`boolean`) - Include silenced alerts in the results. Default: false
+
+</details>
+
+<details>
+
 <summary>helm</summary>
 
 - **helm_install** - Install a Helm chart in the current or provided namespace
diff --git a/docs/OBSERVABILITY.md b/docs/OBSERVABILITY.md
new file mode 100644
index 000000000..71d49372a
--- /dev/null
+++ b/docs/OBSERVABILITY.md
@@ -0,0 +1,247 @@
+# Observability Toolset
+
+This toolset provides tools for querying OpenShift cluster observability data including Prometheus metrics and Alertmanager alerts.
+
+## Tools
+
+### prometheus_query
+
+Execute instant PromQL queries against the cluster's Thanos Querier.
+
+**Parameters:**
+- `query` (required) - PromQL query string
+- `time` (optional) - Evaluation timestamp (RFC3339, Unix timestamp, or relative like `-5m`, `now`)
+
+**Example:**
+```
+Query: up{job="apiserver"}
+```
+
+### prometheus_query_range
+
+Execute range PromQL queries for time-series data.
+
+**Parameters:**
+- `query` (required) - PromQL query string
+- `start` (required) - Start time (RFC3339, Unix timestamp, or relative like `-1h`)
+- `end` (required) - End time (RFC3339, Unix timestamp, or relative like `now`)
+- `step` (optional) - Query resolution step (default: `1m`)
+
+**Example:**
+```
+Query: rate(container_cpu_usage_seconds_total[5m])
+Start: -1h
+End: now
+Step: 1m
+```
+
+### alertmanager_alerts
+
+Query alerts from the cluster's Alertmanager.
+
+**Parameters:**
+- `active` (optional) - Include active alerts (default: true)
+- `silenced` (optional) - Include silenced alerts (default: false)
+- `inhibited` (optional) - Include inhibited alerts (default: false)
+- `filter` (optional) - Label filter in PromQL format (e.g., `alertname="Watchdog"`)
+
+**Example:**
+```
+Active: true
+Filter: severity="critical"
+```
+
+## Enable the Observability Toolset
+
+### Option 1: Command Line
+
+```bash
+kubernetes-mcp-server --toolsets core,config,helm,observability
+```
+
+### Option 2: Configuration File
+
+```toml
+toolsets = ["core", "config", "helm", "observability"]
+```
+
+### Option 3: MCP Client Configuration
+
+```json
+{
+  "mcpServers": {
+    "kubernetes": {
+      "command": "npx",
+      "args": ["-y", "kubernetes-mcp-server@latest", "--toolsets", "core,config,helm,observability"]
+    }
+  }
+}
+```
+
+## Configuration
+
+The observability toolset supports optional configuration via the config file:
+
+```toml
+[observability]
+# Custom monitoring namespace (default: "openshift-monitoring")
+monitoring_namespace = "custom-monitoring"
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `monitoring_namespace` | `openshift-monitoring` | Namespace where Prometheus and Alertmanager routes are located |
+
+## Prerequisites
+
+The observability tools require:
+
+1. **OpenShift cluster** - These tools are designed for OpenShift and rely on OpenShift-specific routes
+2. **Monitoring stack enabled** - The cluster must have the monitoring stack deployed (default in OpenShift)
+3. **Proper RBAC** - The user/service account must have permissions to:
+   - Read routes in `openshift-monitoring` namespace
+   - Access the Thanos Querier and Alertmanager APIs
+
+## How It Works
+
+### Route Discovery
+
+The tools automatically discover the Prometheus (Thanos Querier) and Alertmanager endpoints by reading OpenShift routes:
+
+- **Thanos Querier**: `thanos-querier` route in `openshift-monitoring` namespace
+- **Alertmanager**: `alertmanager-main` route in `openshift-monitoring` namespace
+
+### Authentication
+
+The tools use the bearer token from your Kubernetes configuration to authenticate with the monitoring endpoints. This is the same credential used to access the cluster.
+
+### Relative Time Support
+
+Time parameters support multiple formats:
+
+| Format | Example | Description |
+|--------|---------|-------------|
+| RFC3339 | `2024-01-15T10:00:00Z` | Absolute timestamp |
+| Unix | `1705312800` | Unix timestamp in seconds |
+| Relative | `-10m`, `-1h`, `-1d` | Relative to current time |
+| Keyword | `now` | Current time |
+
+## Security Considerations
+
+### Allowed Prometheus Endpoints
+
+Only read-only Prometheus API endpoints are allowed:
+- `/api/v1/query` - Instant queries
+- `/api/v1/query_range` - Range queries
+- `/api/v1/series` - Series metadata
+- `/api/v1/labels` - Label names
+- `/api/v1/label/<name>/values` - Label values
+
+Administrative endpoints (like `/api/v1/admin/*`) are blocked.
+
+### Allowed Alertmanager Endpoints
+
+Only alert query endpoints are allowed:
+- `/api/v2/alerts` - List alerts
+- `/api/v2/silences` - List silences
+- `/api/v1/alerts` - Legacy alert endpoint
+
+### Query Limits
+
+- Maximum query length: 10,000 characters
+- Maximum response size: 10MB
+
+## Common Use Cases
+
+### Cluster Health
+
+**Check if all API servers are up:**
+```
+Query: up{job="apiserver"}
+```
+
+**API server request latency (99th percentile):**
+```
+Query: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket[5m])) by (le, verb))
+```
+
+### Node and Pod Metrics
+
+**Node CPU usage percentage:**
+```
+Query: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
+```
+
+**Pods in CrashLoopBackOff:**
+```
+Query: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"} > 0
+```
+
+**Container memory usage by namespace:**
+```
+Query: sum by(namespace) (container_memory_working_set_bytes{container!=""})
+```
+
+### Alerting
+
+**Get all firing critical alerts:**
+```
+Tool: alertmanager_alerts
+Active: true
+Filter: severity="critical"
+```
+
+**Count alerts by severity:**
+```
+Query: count by(severity) (ALERTS{alertstate="firing"})
+```
+
+### Network
+
+**Network receive rate by pod:**
+```
+Query: rate(container_network_receive_bytes_total[5m])
+Start: -1h
+End: now
+Step: 1m
+```
+
+### etcd Health
+
+**etcd leader changes:**
+```
+Query: changes(etcd_server_leader_changes_seen_total[1h])
+```
+
+**etcd disk sync duration:**
+```
+Query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m]))
+```
+
+## Troubleshooting
+
+### "failed to get route" Error
+
+The monitoring routes may not exist or the user lacks permissions:
+```bash
+oc get routes -n openshift-monitoring
+```
+
+### "no bearer token available" Error
+
+Ensure your kubeconfig has a valid token:
+```bash
+oc whoami
+oc get pods -n openshift-monitoring
+```
+
+### Empty Results from Prometheus
+
+Verify the query works in the OpenShift console:
+1. Go to **Observe** > **Metrics**
+2. Enter your PromQL query
+3. Check for results
+
+### TLS Certificate Errors
+
+The tools use `InsecureSkipVerify` for route access. If you need strict TLS verification, this would require additional configuration.
diff --git a/internal/tools/update-readme/main.go b/internal/tools/update-readme/main.go
index 9273d15a5..b2181483c 100644
--- a/internal/tools/update-readme/main.go
+++ b/internal/tools/update-readme/main.go
@@ -18,6 +18,7 @@ import (
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/helm"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
+	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability"
 )
 
 type OpenShift struct{}
diff --git a/pkg/config/config_default.go b/pkg/config/config_default.go
index febea70cf..67c5f82fc 100644
--- a/pkg/config/config_default.go
+++ b/pkg/config/config_default.go
@@ -9,7 +9,7 @@ import (
 func Default() *StaticConfig {
 	defaultConfig := StaticConfig{
 		ListOutput: "table",
-		Toolsets:   []string{"core", "config", "helm"},
+		Toolsets:   []string{"core", "config", "helm", "observability"},
 	}
 	overrides := defaultOverrides()
 	mergedConfig := mergeConfig(defaultConfig, overrides)
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index 862551773..e3a88f75c 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -247,8 +247,8 @@ func (s *ConfigSuite) TestReadConfigValidPreservesDefaultsForMissingFields() {
 		s.Equalf("table", config.ListOutput, "Expected ListOutput to be table, got %s", config.ListOutput)
 	})
 	s.Run("toolsets defaulted correctly", func() {
-		s.Require().Lenf(config.Toolsets, 3, "Expected 3 toolsets, got %d", len(config.Toolsets))
-		for _, toolset := range []string{"core", "config", "helm"} {
+		s.Require().Lenf(config.Toolsets, 4, "Expected 4 toolsets, got %d", len(config.Toolsets))
+		for _, toolset := range []string{"core", "config", "helm", "observability"} {
 			s.Containsf(config.Toolsets, toolset, "Expected toolsets to contain %s", toolset)
 		}
 	})
@@ -568,7 +568,7 @@ func (s *ConfigSuite) TestStandaloneConfigDirPreservesDefaults() {
 	s.Run("preserves default values", func() {
 		s.Equal("9999", config.Port, "port should be from drop-in")
 		s.Equal("table", config.ListOutput, "list_output should be default")
-		s.Equal([]string{"core", "config", "helm"}, config.Toolsets, "toolsets should be default")
+		s.Equal([]string{"core", "config", "helm", "observability"}, config.Toolsets, "toolsets should be default")
 	})
 }
 
@@ -585,7 +585,7 @@ func (s *ConfigSuite) TestStandaloneConfigDirEmpty() {
 
 	s.Run("returns defaults for empty directory", func() {
 		s.Equal("table", config.ListOutput, "list_output should be default")
-		s.Equal([]string{"core", "config", "helm"}, config.Toolsets, "toolsets should be default")
+		s.Equal([]string{"core", "config", "helm", "observability"}, config.Toolsets, "toolsets should be default")
 	})
 }
 
@@ -914,7 +914,7 @@ func (s *ConfigSuite) TestBothConfigAndConfigDirEmpty() {
 
 	s.Run("returns default configuration", func() {
 		s.Equal("table", config.ListOutput)
-		s.Equal([]string{"core", "config", "helm"}, config.Toolsets)
+		s.Equal([]string{"core", "config", "helm", "observability"}, config.Toolsets)
 		s.Equal(0, config.LogLevel)
 	})
 }
@@ -1034,7 +1034,7 @@ func (s *ConfigSuite) TestEmptyConfigFile() {
 		s.Equal("9999", config.Port, "port should be from drop-in")
 		// Defaults should still be applied for unset values
 		s.Equal("table", config.ListOutput, "list_output should be default")
-		s.Equal([]string{"core", "config", "helm"}, config.Toolsets, "toolsets should be default")
+		s.Equal([]string{"core", "config", "helm", "observability"}, config.Toolsets, "toolsets should be default")
 	})
 }
 
diff --git a/pkg/mcp/modules.go b/pkg/mcp/modules.go
index 255f42177..7502c2e12 100644
--- a/pkg/mcp/modules.go
+++ b/pkg/mcp/modules.go
@@ -6,4 +6,5 @@ import (
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/helm"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
+	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability"
 )
diff --git a/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json b/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
index da8e244a3..ce0ef9a61 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
@@ -1,4 +1,47 @@
 [
+  {
+    "annotations": {
+      "title": "Alertmanager: Get Alerts",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Query active and pending alerts from the cluster's Alertmanager.\nUseful for monitoring cluster health, detecting issues, and incident response.\n\nReturns alerts with their labels, annotations, status, and timing information.\nCan filter by active/silenced/inhibited state.\n\nCommon use cases:\n- Check for critical alerts affecting the cluster\n- Monitor for specific alert types (e.g., high CPU, disk pressure)\n- Verify alert silences are working correctly",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "active": {
+          "default": true,
+          "description": "Filter for active (firing) alerts. Default: true",
+          "type": "boolean"
+        },
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "enum": [
+            "extra-cluster",
+            "fake-context"
+          ],
+          "type": "string"
+        },
+        "filter": {
+          "description": "Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'",
+          "type": "string"
+        },
+        "inhibited": {
+          "default": false,
+          "description": "Include inhibited alerts in the results. Default: false",
+          "type": "boolean"
+        },
+        "silenced": {
+          "default": false,
+          "description": "Include silenced alerts in the results. Default: false",
+          "type": "boolean"
+        }
+      }
+    },
+    "name": "alertmanager_alerts"
+  },
   {
     "annotations": {
       "title": "Configuration: Contexts List",
@@ -610,6 +653,87 @@
     },
     "name": "pods_top"
   },
+  {
+    "annotations": {
+      "title": "Prometheus: Instant Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute an instant PromQL query against the cluster's Thanos Querier.\nReturns current metric values at the specified time (or current time if not specified).\nUse this for point-in-time metric values.\n\nCommon queries:\n- up{job=\"apiserver\"} - Check if API server is up\n- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace\n- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate\n- kube_pod_status_phase{phase=\"Running\"} - Running pods count",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "enum": [
+            "extra-cluster",
+            "fake-context"
+          ],
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'up{job=\"apiserver\"}', 'sum by(namespace) (container_memory_usage_bytes)')",
+          "type": "string"
+        },
+        "time": {
+          "description": "Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query"
+      ]
+    },
+    "name": "prometheus_query"
+  },
+  {
+    "annotations": {
+      "title": "Prometheus: Range Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute a range PromQL query against the cluster's Thanos Querier.\nReturns metric values over a time range with specified resolution.\nUse this for time-series data, trends, and historical analysis.\n\nSupports relative times:\n- 'now' for current time\n- '-10m', '-1h', '-1d' for relative past times\n\nExample: Get CPU usage over the last hour with 1-minute resolution.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "enum": [
+            "extra-cluster",
+            "fake-context"
+          ],
+          "type": "string"
+        },
+        "end": {
+          "description": "End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time",
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')",
+          "type": "string"
+        },
+        "start": {
+          "description": "Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')",
+          "type": "string"
+        },
+        "step": {
+          "default": "1m",
+          "description": "Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query",
+        "start",
+        "end"
+      ]
+    },
+    "name": "prometheus_query_range"
+  },
   {
     "annotations": {
       "title": "Resources: Create or Update",
diff --git a/pkg/mcp/testdata/toolsets-full-tools-multicluster.json b/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
index 691cccaee..559573122 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
@@ -1,4 +1,43 @@
 [
+  {
+    "annotations": {
+      "title": "Alertmanager: Get Alerts",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Query active and pending alerts from the cluster's Alertmanager.\nUseful for monitoring cluster health, detecting issues, and incident response.\n\nReturns alerts with their labels, annotations, status, and timing information.\nCan filter by active/silenced/inhibited state.\n\nCommon use cases:\n- Check for critical alerts affecting the cluster\n- Monitor for specific alert types (e.g., high CPU, disk pressure)\n- Verify alert silences are working correctly",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "active": {
+          "default": true,
+          "description": "Filter for active (firing) alerts. Default: true",
+          "type": "boolean"
+        },
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "type": "string"
+        },
+        "filter": {
+          "description": "Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'",
+          "type": "string"
+        },
+        "inhibited": {
+          "default": false,
+          "description": "Include inhibited alerts in the results. Default: false",
+          "type": "boolean"
+        },
+        "silenced": {
+          "default": false,
+          "description": "Include silenced alerts in the results. Default: false",
+          "type": "boolean"
+        }
+      }
+    },
+    "name": "alertmanager_alerts"
+  },
   {
     "annotations": {
       "title": "Configuration: Contexts List",
@@ -546,6 +585,79 @@
     },
     "name": "pods_top"
   },
+  {
+    "annotations": {
+      "title": "Prometheus: Instant Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute an instant PromQL query against the cluster's Thanos Querier.\nReturns current metric values at the specified time (or current time if not specified).\nUse this for point-in-time metric values.\n\nCommon queries:\n- up{job=\"apiserver\"} - Check if API server is up\n- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace\n- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate\n- kube_pod_status_phase{phase=\"Running\"} - Running pods count",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'up{job=\"apiserver\"}', 'sum by(namespace) (container_memory_usage_bytes)')",
+          "type": "string"
+        },
+        "time": {
+          "description": "Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query"
+      ]
+    },
+    "name": "prometheus_query"
+  },
+  {
+    "annotations": {
+      "title": "Prometheus: Range Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute a range PromQL query against the cluster's Thanos Querier.\nReturns metric values over a time range with specified resolution.\nUse this for time-series data, trends, and historical analysis.\n\nSupports relative times:\n- 'now' for current time\n- '-10m', '-1h', '-1d' for relative past times\n\nExample: Get CPU usage over the last hour with 1-minute resolution.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "context": {
+          "description": "Optional parameter selecting which context to run the tool in. Defaults to fake-context if not set",
+          "type": "string"
+        },
+        "end": {
+          "description": "End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time",
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')",
+          "type": "string"
+        },
+        "start": {
+          "description": "Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')",
+          "type": "string"
+        },
+        "step": {
+          "default": "1m",
+          "description": "Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query",
+        "start",
+        "end"
+      ]
+    },
+    "name": "prometheus_query_range"
+  },
   {
     "annotations": {
       "title": "Resources: Create or Update",
diff --git a/pkg/mcp/testdata/toolsets-full-tools-openshift.json b/pkg/mcp/testdata/toolsets-full-tools-openshift.json
index 21e7fd600..6b5ef3112 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-openshift.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-openshift.json
@@ -1,4 +1,39 @@
 [
+  {
+    "annotations": {
+      "title": "Alertmanager: Get Alerts",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Query active and pending alerts from the cluster's Alertmanager.\nUseful for monitoring cluster health, detecting issues, and incident response.\n\nReturns alerts with their labels, annotations, status, and timing information.\nCan filter by active/silenced/inhibited state.\n\nCommon use cases:\n- Check for critical alerts affecting the cluster\n- Monitor for specific alert types (e.g., high CPU, disk pressure)\n- Verify alert silences are working correctly",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "active": {
+          "default": true,
+          "description": "Filter for active (firing) alerts. Default: true",
+          "type": "boolean"
+        },
+        "filter": {
+          "description": "Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'",
+          "type": "string"
+        },
+        "inhibited": {
+          "default": false,
+          "description": "Include inhibited alerts in the results. Default: false",
+          "type": "boolean"
+        },
+        "silenced": {
+          "default": false,
+          "description": "Include silenced alerts in the results. Default: false",
+          "type": "boolean"
+        }
+      }
+    },
+    "name": "alertmanager_alerts"
+  },
   {
     "annotations": {
       "title": "Configuration: View",
@@ -479,6 +514,71 @@
     },
     "name": "projects_list"
   },
+  {
+    "annotations": {
+      "title": "Prometheus: Instant Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute an instant PromQL query against the cluster's Thanos Querier.\nReturns current metric values at the specified time (or current time if not specified).\nUse this for point-in-time metric values.\n\nCommon queries:\n- up{job=\"apiserver\"} - Check if API server is up\n- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace\n- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate\n- kube_pod_status_phase{phase=\"Running\"} - Running pods count",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "description": "PromQL query string (e.g., 'up{job=\"apiserver\"}', 'sum by(namespace) (container_memory_usage_bytes)')",
+          "type": "string"
+        },
+        "time": {
+          "description": "Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query"
+      ]
+    },
+    "name": "prometheus_query"
+  },
+  {
+    "annotations": {
+      "title": "Prometheus: Range Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute a range PromQL query against the cluster's Thanos Querier.\nReturns metric values over a time range with specified resolution.\nUse this for time-series data, trends, and historical analysis.\n\nSupports relative times:\n- 'now' for current time\n- '-10m', '-1h', '-1d' for relative past times\n\nExample: Get CPU usage over the last hour with 1-minute resolution.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "end": {
+          "description": "End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time",
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')",
+          "type": "string"
+        },
+        "start": {
+          "description": "Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')",
+          "type": "string"
+        },
+        "step": {
+          "default": "1m",
+          "description": "Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query",
+        "start",
+        "end"
+      ]
+    },
+    "name": "prometheus_query_range"
+  },
   {
     "annotations": {
       "title": "Resources: Create or Update",
diff --git a/pkg/mcp/testdata/toolsets-full-tools.json b/pkg/mcp/testdata/toolsets-full-tools.json
index fcb890919..73e53dc78 100644
--- a/pkg/mcp/testdata/toolsets-full-tools.json
+++ b/pkg/mcp/testdata/toolsets-full-tools.json
@@ -1,4 +1,39 @@
 [
+  {
+    "annotations": {
+      "title": "Alertmanager: Get Alerts",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Query active and pending alerts from the cluster's Alertmanager.\nUseful for monitoring cluster health, detecting issues, and incident response.\n\nReturns alerts with their labels, annotations, status, and timing information.\nCan filter by active/silenced/inhibited state.\n\nCommon use cases:\n- Check for critical alerts affecting the cluster\n- Monitor for specific alert types (e.g., high CPU, disk pressure)\n- Verify alert silences are working correctly",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "active": {
+          "default": true,
+          "description": "Filter for active (firing) alerts. Default: true",
+          "type": "boolean"
+        },
+        "filter": {
+          "description": "Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'",
+          "type": "string"
+        },
+        "inhibited": {
+          "default": false,
+          "description": "Include inhibited alerts in the results. Default: false",
+          "type": "boolean"
+        },
+        "silenced": {
+          "default": false,
+          "description": "Include silenced alerts in the results. Default: false",
+          "type": "boolean"
+        }
+      }
+    },
+    "name": "alertmanager_alerts"
+  },
   {
     "annotations": {
       "title": "Configuration: View",
@@ -466,6 +501,71 @@
     },
     "name": "pods_top"
   },
+  {
+    "annotations": {
+      "title": "Prometheus: Instant Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute an instant PromQL query against the cluster's Thanos Querier.\nReturns current metric values at the specified time (or current time if not specified).\nUse this for point-in-time metric values.\n\nCommon queries:\n- up{job=\"apiserver\"} - Check if API server is up\n- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace\n- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate\n- kube_pod_status_phase{phase=\"Running\"} - Running pods count",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "description": "PromQL query string (e.g., 'up{job=\"apiserver\"}', 'sum by(namespace) (container_memory_usage_bytes)')",
+          "type": "string"
+        },
+        "time": {
+          "description": "Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query"
+      ]
+    },
+    "name": "prometheus_query"
+  },
+  {
+    "annotations": {
+      "title": "Prometheus: Range Query",
+      "readOnlyHint": true,
+      "destructiveHint": false,
+      "idempotentHint": true,
+      "openWorldHint": true
+    },
+    "description": "Execute a range PromQL query against the cluster's Thanos Querier.\nReturns metric values over a time range with specified resolution.\nUse this for time-series data, trends, and historical analysis.\n\nSupports relative times:\n- 'now' for current time\n- '-10m', '-1h', '-1d' for relative past times\n\nExample: Get CPU usage over the last hour with 1-minute resolution.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "end": {
+          "description": "End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time",
+          "type": "string"
+        },
+        "query": {
+          "description": "PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')",
+          "type": "string"
+        },
+        "start": {
+          "description": "Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')",
+          "type": "string"
+        },
+        "step": {
+          "default": "1m",
+          "description": "Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'",
+          "type": "string"
+        }
+      },
+      "required": [
+        "query",
+        "start",
+        "end"
+      ]
+    },
+    "name": "prometheus_query_range"
+  },
   {
     "annotations": {
       "title": "Resources: Create or Update",
diff --git a/pkg/prometheus/alertmanager.go b/pkg/prometheus/alertmanager.go
new file mode 100644
index 000000000..1b06941e1
--- /dev/null
+++ b/pkg/prometheus/alertmanager.go
@@ -0,0 +1,40 @@
+package prometheus
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/url"
+)
+
+// buildAlertsParams constructs query parameters for Alertmanager alerts API.
+func buildAlertsParams(active, silenced, inhibited bool, filter string) url.Values {
+	params := url.Values{}
+	params.Set("active", fmt.Sprintf("%t", active))
+	params.Set("silenced", fmt.Sprintf("%t", silenced))
+	params.Set("inhibited", fmt.Sprintf("%t", inhibited))
+	if filter != "" {
+		params.Add("filter", filter)
+	}
+	return params
+}
+
+// GetAlerts retrieves alerts from Alertmanager.
+func (c *Client) GetAlerts(ctx context.Context, active, silenced, inhibited bool, filter string) ([]Alert, error) {
+	body, err := c.executeRequest(ctx, "/api/v2/alerts", buildAlertsParams(active, silenced, inhibited, filter))
+	if err != nil {
+		return nil, err
+	}
+
+	var alerts []Alert
+	if err := json.Unmarshal(body, &alerts); err != nil {
+		return nil, fmt.Errorf("failed to parse alerts response: %w", err)
+	}
+
+	return alerts, nil
+}
+
+// GetAlertsRaw retrieves raw JSON alerts from Alertmanager.
+func (c *Client) GetAlertsRaw(ctx context.Context, active, silenced, inhibited bool, filter string) ([]byte, error) {
+	return c.executeRequest(ctx, "/api/v2/alerts", buildAlertsParams(active, silenced, inhibited, filter))
+}
diff --git a/pkg/prometheus/client.go b/pkg/prometheus/client.go
new file mode 100644
index 000000000..10ecbf2c4
--- /dev/null
+++ b/pkg/prometheus/client.go
@@ -0,0 +1,155 @@
+package prometheus
+
+import (
+	"context"
+	"crypto/tls"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"time"
+)
+
+const (
+	// DefaultTimeout is the default HTTP timeout.
+	DefaultTimeout = 30 * time.Second
+
+	// MaxResponseSize is the maximum response size (10MB).
+	MaxResponseSize = 10 * 1024 * 1024
+)
+
+// Client is an HTTP client for Prometheus and Alertmanager APIs.
+type Client struct {
+	baseURL     string
+	bearerToken string
+	tlsConfig   *tls.Config
+	timeout     time.Duration
+}
+
+// NewClient creates a new Prometheus client with the specified base URL and options.
+func NewClient(baseURL string, opts ...ClientOption) *Client {
+	c := &Client{
+		baseURL:   baseURL,
+		tlsConfig: newDefaultTLSConfig(),
+		timeout:   DefaultTimeout,
+	}
+
+	for _, opt := range opts {
+		opt(c)
+	}
+
+	return c
+}
+
+// Query executes an instant PromQL query at the specified time.
+// If timeStr is empty, the current time is used.
+func (c *Client) Query(ctx context.Context, query string, timeStr string) (*QueryResult, error) {
+	params := url.Values{}
+	params.Set("query", query)
+	if timeStr != "" {
+		params.Set("time", timeStr)
+	}
+
+	body, err := c.executeRequest(ctx, "/api/v1/query", params)
+	if err != nil {
+		return nil, err
+	}
+
+	var result QueryResult
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse query response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// QueryRange executes a range PromQL query over the specified time range.
+func (c *Client) QueryRange(ctx context.Context, query, start, end, step string) (*QueryResult, error) {
+	params := url.Values{}
+	params.Set("query", query)
+	params.Set("start", start)
+	params.Set("end", end)
+	params.Set("step", step)
+
+	body, err := c.executeRequest(ctx, "/api/v1/query_range", params)
+	if err != nil {
+		return nil, err
+	}
+
+	var result QueryResult
+	if err := json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse query_range response: %w", err)
+	}
+
+	return &result, nil
+}
+
+// QueryRaw executes a query and returns the raw JSON response.
+func (c *Client) QueryRaw(ctx context.Context, endpoint string, params url.Values) ([]byte, error) {
+	return c.executeRequest(ctx, endpoint, params)
+}
+
+// executeRequest executes an HTTP GET request with authentication.
+func (c *Client) executeRequest(ctx context.Context, endpoint string, params url.Values) ([]byte, error) {
+	// Build URL
+	requestURL := c.baseURL + endpoint
+	if len(params) > 0 {
+		requestURL += "?" + params.Encode()
+	}
+
+	// Create request
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	// Add authentication
+	if c.bearerToken != "" {
+		req.Header.Set("Authorization", "Bearer "+c.bearerToken)
+	}
+
+	// Execute request
+	client := c.createHTTPClient()
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("request failed: %w", err)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	// Read response with size limit
+	limitedReader := io.LimitReader(resp.Body, MaxResponseSize+1)
+	body, err := io.ReadAll(limitedReader)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if len(body) > MaxResponseSize {
+		return nil, fmt.Errorf("response size exceeds maximum of %d bytes", MaxResponseSize)
+	}
+
+	// Check HTTP status
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, truncateString(string(body), 200))
+	}
+
+	return body, nil
+}
+
+// createHTTPClient creates an HTTP client with the configured TLS and timeout settings.
+func (c *Client) createHTTPClient() *http.Client {
+	return &http.Client{
+		Timeout: c.timeout,
+		Transport: &http.Transport{
+			TLSClientConfig: c.tlsConfig,
+		},
+	}
+}
+
+// truncateString truncates a string to the specified length.
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen] + "..."
+}
diff --git a/pkg/prometheus/client_test.go b/pkg/prometheus/client_test.go
new file mode 100644
index 000000000..1fed6efd9
--- /dev/null
+++ b/pkg/prometheus/client_test.go
@@ -0,0 +1,467 @@
+package prometheus
+
+import (
+	"context"
+	"crypto/tls"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/suite"
+	"k8s.io/client-go/rest"
+)
+
+type PrometheusSuite struct {
+	suite.Suite
+}
+
+func (s *PrometheusSuite) TestNewClient() {
+	s.Run("creates client with defaults", func() {
+		client := NewClient("https://prometheus.example.com")
+
+		s.Equal("https://prometheus.example.com", client.baseURL)
+		s.Equal("", client.bearerToken)
+		s.Equal(DefaultTimeout, client.timeout)
+		s.NotNil(client.tlsConfig)
+	})
+
+	s.Run("applies bearer token option", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithBearerToken("test-token"),
+		)
+
+		s.Equal("test-token", client.bearerToken)
+	})
+
+	s.Run("applies timeout option", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithTimeout(60*time.Second),
+		)
+
+		s.Equal(60*time.Second, client.timeout)
+	})
+
+	s.Run("applies insecure option", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithInsecure(true),
+		)
+
+		s.True(client.tlsConfig.InsecureSkipVerify)
+	})
+
+	s.Run("trims whitespace from bearer token", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithBearerToken("  test-token  "),
+		)
+
+		s.Equal("test-token", client.bearerToken)
+	})
+}
+
+func (s *PrometheusSuite) TestWithBearerTokenFromRESTConfig() {
+	s.Run("uses token from BearerToken field", func() {
+		config := &rest.Config{
+			BearerToken: "direct-token",
+		}
+
+		client := NewClient("https://prometheus.example.com",
+			WithBearerTokenFromRESTConfig(config),
+		)
+
+		s.Equal("direct-token", client.bearerToken)
+	})
+
+	s.Run("handles nil config gracefully", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithBearerTokenFromRESTConfig(nil),
+		)
+
+		s.Equal("", client.bearerToken)
+	})
+}
+
+func (s *PrometheusSuite) TestWithTLSFromRESTConfig() {
+	s.Run("handles nil config gracefully", func() {
+		client := NewClient("https://prometheus.example.com",
+			WithTLSFromRESTConfig(nil),
+		)
+
+		s.NotNil(client.tlsConfig)
+	})
+
+	s.Run("uses CAData when available", func() {
+		// Create a minimal PEM certificate for testing
+		caPEM := []byte(`-----BEGIN CERTIFICATE-----
+MIIBkTCB+wIJAKHBfpegPjMCMA0GCSqGSIb3DQEBCwUAMBExDzANBgNVBAMMBnRl
+c3RjYTAeFw0yNDAxMDEwMDAwMDBaFw0yNTAxMDEwMDAwMDBaMBExDzANBgNVBAMM
+BnRlc3RjYTBcMA0GCSqGSIb3DQEBAQUAA0sAMEgCQQC7o96FCFhP2RxnNwj7mVXh
+qGYXt9L9BJVjjTpD2hCRVEJgqGYb3bSoGiK4MYpqnLJDt9IBSfJz7JBkjHDvDZLX
+AgMBAAGjUzBRMB0GA1UdDgQWBBQS0P3hKf3cG8XKBQMO3F/3GmZ7wjAfBgNVHSME
+GDAWgBQS0P3hKf3cG8XKBQMO3F/3GmZ7wjAPBgNVHRMBAf8EBTADAQH/MA0GCSqG
+SIb3DQEBCwUAA0EAFHbN1pWPxvCqVTH1gHCJdNlHqY3hg3PA2PIzv1NiaP3qmJk0
+cDq6b5fP0Z3e6Q1OvH5hEYnD6W8fXG5M8CxHjg==
+-----END CERTIFICATE-----`)
+
+		config := &rest.Config{
+			TLSClientConfig: rest.TLSClientConfig{
+				CAData: caPEM,
+			},
+		}
+
+		client := NewClient("https://prometheus.example.com",
+			WithTLSFromRESTConfig(config),
+		)
+
+		s.NotNil(client.tlsConfig.RootCAs)
+	})
+}
+
+func (s *PrometheusSuite) TestQuery() {
+	s.Run("executes instant query", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("/api/v1/query", r.URL.Path)
+			s.Equal("up", r.URL.Query().Get("query"))
+
+			response := QueryResult{
+				Status: "success",
+				Data: Data{
+					ResultType: "vector",
+					Result: []Result{
+						{
+							Metric: map[string]string{"__name__": "up", "job": "apiserver"},
+							Value:  []any{1234567890.0, "1"},
+						},
+					},
+				},
+			}
+			_ = json.NewEncoder(w).Encode(response)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		result, err := client.Query(context.Background(), "up", "")
+
+		s.NoError(err)
+		s.Equal("success", result.Status)
+		s.Len(result.Data.Result, 1)
+		s.Equal("up", result.Data.Result[0].Metric["__name__"])
+	})
+
+	s.Run("includes time parameter when specified", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("1234567890", r.URL.Query().Get("time"))
+
+			response := QueryResult{Status: "success"}
+			_ = json.NewEncoder(w).Encode(response)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		_, err := client.Query(context.Background(), "up", "1234567890")
+
+		s.NoError(err)
+	})
+
+	s.Run("includes bearer token in request", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("Bearer test-token", r.Header.Get("Authorization"))
+
+			response := QueryResult{Status: "success"}
+			_ = json.NewEncoder(w).Encode(response)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL, WithBearerToken("test-token"))
+		_, err := client.Query(context.Background(), "up", "")
+
+		s.NoError(err)
+	})
+
+	s.Run("returns error for HTTP error status", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+			http.Error(w, "Internal Server Error", http.StatusInternalServerError)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		_, err := client.Query(context.Background(), "up", "")
+
+		s.Error(err)
+		s.Contains(err.Error(), "500")
+	})
+}
+
+func (s *PrometheusSuite) TestQueryRange() {
+	s.Run("executes range query with all parameters", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("/api/v1/query_range", r.URL.Path)
+			s.Equal("rate(http_requests_total[5m])", r.URL.Query().Get("query"))
+			s.Equal("2024-01-01T00:00:00Z", r.URL.Query().Get("start"))
+			s.Equal("2024-01-01T01:00:00Z", r.URL.Query().Get("end"))
+			s.Equal("1m", r.URL.Query().Get("step"))
+
+			response := QueryResult{
+				Status: "success",
+				Data: Data{
+					ResultType: "matrix",
+					Result: []Result{
+						{
+							Metric: map[string]string{"__name__": "http_requests_total"},
+							Values: [][]any{
+								{1234567890.0, "10"},
+								{1234567950.0, "15"},
+							},
+						},
+					},
+				},
+			}
+			_ = json.NewEncoder(w).Encode(response)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		result, err := client.QueryRange(context.Background(),
+			"rate(http_requests_total[5m])",
+			"2024-01-01T00:00:00Z",
+			"2024-01-01T01:00:00Z",
+			"1m",
+		)
+
+		s.NoError(err)
+		s.Equal("success", result.Status)
+		s.Equal("matrix", result.Data.ResultType)
+		s.Len(result.Data.Result, 1)
+		s.Len(result.Data.Result[0].Values, 2)
+	})
+}
+
+func (s *PrometheusSuite) TestGetAlerts() {
+	s.Run("retrieves alerts with parameters", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("/api/v2/alerts", r.URL.Path)
+			s.Equal("true", r.URL.Query().Get("active"))
+			s.Equal("false", r.URL.Query().Get("silenced"))
+			s.Equal("false", r.URL.Query().Get("inhibited"))
+
+			alerts := []Alert{
+				{
+					Labels:      map[string]string{"alertname": "HighCPU", "severity": "warning"},
+					Annotations: map[string]string{"summary": "CPU usage is high"},
+					StartsAt:    "2024-01-01T00:00:00Z",
+					Status: AlertStatus{
+						State: "active",
+					},
+				},
+			}
+			_ = json.NewEncoder(w).Encode(alerts)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		alerts, err := client.GetAlerts(context.Background(), true, false, false, "")
+
+		s.NoError(err)
+		s.Len(alerts, 1)
+		s.Equal("HighCPU", alerts[0].Labels["alertname"])
+		s.Equal("active", alerts[0].Status.State)
+	})
+
+	s.Run("includes filter parameter when specified", func() {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			s.Equal("alertname=Watchdog", r.URL.Query().Get("filter"))
+
+			alerts := []Alert{}
+			_ = json.NewEncoder(w).Encode(alerts)
+		}))
+		defer server.Close()
+
+		client := NewClient(server.URL)
+		_, err := client.GetAlerts(context.Background(), true, false, false, "alertname=Watchdog")
+
+		s.NoError(err)
+	})
+}
+
+func (s *PrometheusSuite) TestConvertRelativeTime() {
+	s.Run("handles 'now' keyword", func() {
+		before := time.Now().UTC()
+		result, err := ConvertRelativeTime("now")
+		after := time.Now().UTC()
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+
+		// Parse and verify it's within the expected time range
+		parsed, err := time.Parse(time.RFC3339, result)
+		s.NoError(err)
+		s.True(parsed.After(before.Add(-time.Second)) && parsed.Before(after.Add(time.Second)),
+			"Parsed time should be close to current time")
+	})
+
+	s.Run("handles RFC3339 timestamp unchanged", func() {
+		input := "2024-01-01T12:00:00Z"
+		result, err := ConvertRelativeTime(input)
+
+		s.NoError(err)
+		s.Equal(input, result, "RFC3339 timestamp should be returned unchanged")
+	})
+
+	s.Run("handles Unix timestamp unchanged", func() {
+		input := "1704110400"
+		result, err := ConvertRelativeTime(input)
+
+		s.NoError(err)
+		s.Equal(input, result, "Unix timestamp should be returned unchanged")
+	})
+
+	s.Run("handles relative time -10m", func() {
+		before := time.Now().UTC().Add(-10 * time.Minute)
+		result, err := ConvertRelativeTime("-10m")
+		after := time.Now().UTC().Add(-10 * time.Minute)
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+
+		parsed, err := time.Parse(time.RFC3339, result)
+		s.NoError(err)
+		s.True(parsed.After(before.Add(-time.Second)) && parsed.Before(after.Add(time.Second)),
+			"Parsed time should be approximately 10 minutes ago")
+	})
+
+	s.Run("handles relative time -1h", func() {
+		before := time.Now().UTC().Add(-1 * time.Hour)
+		result, err := ConvertRelativeTime("-1h")
+		after := time.Now().UTC().Add(-1 * time.Hour)
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+
+		parsed, err := time.Parse(time.RFC3339, result)
+		s.NoError(err)
+		s.True(parsed.After(before.Add(-time.Second)) && parsed.Before(after.Add(time.Second)),
+			"Parsed time should be approximately 1 hour ago")
+	})
+
+	s.Run("handles relative time -1d (days)", func() {
+		before := time.Now().UTC().Add(-24 * time.Hour)
+		result, err := ConvertRelativeTime("-1d")
+		after := time.Now().UTC().Add(-24 * time.Hour)
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+
+		parsed, err := time.Parse(time.RFC3339, result)
+		s.NoError(err)
+		s.True(parsed.After(before.Add(-time.Second)) && parsed.Before(after.Add(time.Second)),
+			"Parsed time should be approximately 1 day ago")
+	})
+
+	s.Run("handles relative time -30s (seconds)", func() {
+		before := time.Now().UTC().Add(-30 * time.Second)
+		result, err := ConvertRelativeTime("-30s")
+		after := time.Now().UTC().Add(-30 * time.Second)
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+
+		parsed, err := time.Parse(time.RFC3339, result)
+		s.NoError(err)
+		s.True(parsed.After(before.Add(-time.Second)) && parsed.Before(after.Add(time.Second)),
+			"Parsed time should be approximately 30 seconds ago")
+	})
+
+	s.Run("handles whitespace around input", func() {
+		result, err := ConvertRelativeTime("  now  ")
+
+		s.NoError(err)
+		s.Contains(result, "T", "Result should be RFC3339 format")
+	})
+
+	s.Run("returns error for invalid format", func() {
+		_, err := ConvertRelativeTime("invalid")
+
+		s.Error(err)
+		s.Contains(err.Error(), "invalid time format")
+	})
+
+	s.Run("returns error for malformed relative time", func() {
+		_, err := ConvertRelativeTime("-abc")
+
+		s.Error(err)
+		s.Contains(err.Error(), "invalid relative time format")
+	})
+}
+
+func (s *PrometheusSuite) TestTruncateString() {
+	s.Run("returns original string if shorter than max", func() {
+		result := truncateString("hello", 10)
+		s.Equal("hello", result)
+	})
+
+	s.Run("returns original string if equal to max", func() {
+		result := truncateString("hello", 5)
+		s.Equal("hello", result)
+	})
+
+	s.Run("truncates and adds ellipsis if longer than max", func() {
+		result := truncateString("hello world", 5)
+		s.Equal("hello...", result)
+	})
+}
+
+func (s *PrometheusSuite) TestCreateHTTPClient() {
+	s.Run("creates client with timeout", func() {
+		client := NewClient("https://example.com", WithTimeout(60*time.Second))
+		httpClient := client.createHTTPClient()
+
+		s.Equal(60*time.Second, httpClient.Timeout)
+	})
+
+	s.Run("creates client with TLS config", func() {
+		client := NewClient("https://example.com", WithInsecure(true))
+		httpClient := client.createHTTPClient()
+
+		transport, ok := httpClient.Transport.(*http.Transport)
+		s.True(ok)
+		s.True(transport.TLSClientConfig.InsecureSkipVerify)
+	})
+}
+
+func (s *PrometheusSuite) TestNewDefaultTLSConfig() {
+	s.Run("sets minimum TLS version", func() {
+		config := newDefaultTLSConfig()
+		s.Equal(uint16(tls.VersionTLS12), config.MinVersion)
+	})
+}
+
+func (s *PrometheusSuite) TestParseIntFromString() {
+	s.Run("returns error for empty string", func() {
+		_, err := parseIntFromString("")
+		s.Error(err)
+		s.Contains(err.Error(), "empty string")
+	})
+
+	s.Run("returns error for number too large", func() {
+		_, err := parseIntFromString("12345678901") // 11 digits
+		s.Error(err)
+		s.Contains(err.Error(), "number too large")
+	})
+
+	s.Run("parses valid number", func() {
+		result, err := parseIntFromString("365")
+		s.NoError(err)
+		s.Equal(365, result)
+	})
+
+	s.Run("parses max allowed digits", func() {
+		result, err := parseIntFromString("1234567890") // exactly 10 digits
+		s.NoError(err)
+		s.Equal(1234567890, result)
+	})
+}
+
+func TestPrometheusSuite(t *testing.T) {
+	suite.Run(t, new(PrometheusSuite))
+}
diff --git a/pkg/prometheus/options.go b/pkg/prometheus/options.go
new file mode 100644
index 000000000..60f569eca
--- /dev/null
+++ b/pkg/prometheus/options.go
@@ -0,0 +1,163 @@
+package prometheus
+
+import (
+	"crypto/tls"
+	"crypto/x509"
+	"os"
+	"strings"
+	"time"
+
+	"k8s.io/client-go/rest"
+	"k8s.io/klog/v2"
+)
+
+// ClientOption is a function that configures a Client.
+type ClientOption func(*Client)
+
+// WithBearerToken sets the bearer token for authentication.
+func WithBearerToken(token string) ClientOption {
+	return func(c *Client) {
+		c.bearerToken = strings.TrimSpace(token)
+	}
+}
+
+// WithBearerTokenFromRESTConfig extracts and sets the bearer token from a Kubernetes REST config.
+// It tries the token directly first, then falls back to reading from a token file.
+func WithBearerTokenFromRESTConfig(config *rest.Config) ClientOption {
+	return func(c *Client) {
+		if config == nil {
+			return
+		}
+
+		// Try bearer token directly
+		if config.BearerToken != "" {
+			c.bearerToken = config.BearerToken
+			return
+		}
+
+		// Try bearer token file
+		if config.BearerTokenFile != "" {
+			token, err := os.ReadFile(config.BearerTokenFile)
+			if err != nil {
+				klog.V(2).Infof("Failed to read token file %s: %v", config.BearerTokenFile, err)
+				return
+			}
+			c.bearerToken = strings.TrimSpace(string(token))
+		}
+	}
+}
+
+// WithTLSFromRESTConfig configures TLS using the CA from a Kubernetes REST config.
+// It tries CAData first, then CAFile, then system cert pool, and finally falls back to insecure.
+func WithTLSFromRESTConfig(config *rest.Config) ClientOption {
+	return func(c *Client) {
+		if config == nil {
+			return
+		}
+
+		// Try to build a cert pool with the cluster CA
+		var certPool *x509.CertPool
+		var caLoaded bool
+
+		// First, try to load CA from REST config's CAData
+		if len(config.CAData) > 0 {
+			// Start with system cert pool if available
+			if systemPool, err := x509.SystemCertPool(); err == nil && systemPool != nil {
+				certPool = systemPool
+			} else {
+				certPool = x509.NewCertPool()
+			}
+			if ok := certPool.AppendCertsFromPEM(config.CAData); ok {
+				c.tlsConfig.RootCAs = certPool
+				caLoaded = true
+				klog.V(4).Info("Loaded cluster CA from REST config CAData")
+			} else {
+				klog.V(2).Info("Failed to parse CA certificates from REST config CAData")
+			}
+		}
+
+		// If CAData wasn't available or didn't work, try CAFile
+		if !caLoaded && config.CAFile != "" {
+			caPEM, err := os.ReadFile(config.CAFile)
+			if err != nil {
+				klog.V(2).Infof("Failed to read CA file %s: %v", config.CAFile, err)
+			} else {
+				// Start with system cert pool if available
+				if systemPool, err := x509.SystemCertPool(); err == nil && systemPool != nil {
+					certPool = systemPool
+				} else {
+					certPool = x509.NewCertPool()
+				}
+				if ok := certPool.AppendCertsFromPEM(caPEM); ok {
+					c.tlsConfig.RootCAs = certPool
+					caLoaded = true
+					klog.V(4).Infof("Loaded cluster CA from file %s", config.CAFile)
+				} else {
+					klog.V(2).Infof("Failed to parse CA certificates from file %s", config.CAFile)
+				}
+			}
+		}
+
+		// If no CA was loaded, try system cert pool alone (for routes with public CAs)
+		if !caLoaded {
+			if systemPool, err := x509.SystemCertPool(); err == nil && systemPool != nil {
+				c.tlsConfig.RootCAs = systemPool
+				klog.V(4).Info("Using system certificate pool for TLS verification")
+			} else {
+				// Last resort: skip verification with a warning
+				klog.Warning("No cluster CA available and system cert pool failed; using insecure TLS (skip verification)")
+				c.tlsConfig.InsecureSkipVerify = true
+			}
+		}
+	}
+}
+
+// WithCustomCA configures TLS using a custom CA certificate file.
+func WithCustomCA(caFile string) ClientOption {
+	return func(c *Client) {
+		caFile = strings.TrimSpace(caFile)
+		if caFile == "" {
+			return
+		}
+
+		caPEM, err := os.ReadFile(caFile)
+		if err != nil {
+			klog.Errorf("Failed to read CA certificate from file %s: %v; proceeding without custom CA", caFile, err)
+			return
+		}
+
+		// Start with the host system pool when possible so we don't drop system roots
+		var certPool *x509.CertPool
+		if systemPool, err := x509.SystemCertPool(); err == nil && systemPool != nil {
+			certPool = systemPool
+		} else {
+			certPool = x509.NewCertPool()
+		}
+		if ok := certPool.AppendCertsFromPEM(caPEM); ok {
+			c.tlsConfig.RootCAs = certPool
+		} else {
+			klog.V(0).Infof("Failed to append provided certificate authority; proceeding without custom CA")
+		}
+	}
+}
+
+// WithInsecure configures whether to skip TLS verification.
+func WithInsecure(insecure bool) ClientOption {
+	return func(c *Client) {
+		c.tlsConfig.InsecureSkipVerify = insecure
+	}
+}
+
+// WithTimeout sets the HTTP client timeout.
+func WithTimeout(timeout time.Duration) ClientOption {
+	return func(c *Client) {
+		c.timeout = timeout
+	}
+}
+
+// newDefaultTLSConfig creates a default TLS configuration.
+func newDefaultTLSConfig() *tls.Config {
+	return &tls.Config{
+		MinVersion: tls.VersionTLS12,
+	}
+}
diff --git a/pkg/prometheus/time.go b/pkg/prometheus/time.go
new file mode 100644
index 000000000..49034d130
--- /dev/null
+++ b/pkg/prometheus/time.go
@@ -0,0 +1,80 @@
+package prometheus
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+// ConvertRelativeTime converts relative time strings to RFC3339 timestamps.
+// Supports: "now", "-10m", "-1h", "-1d", or passthrough for RFC3339/Unix timestamps.
+func ConvertRelativeTime(timeStr string) (string, error) {
+	timeStr = strings.TrimSpace(timeStr)
+
+	// If already a timestamp (contains T) or is numeric (Unix timestamp), return as-is
+	if strings.Contains(timeStr, "T") || isNumeric(timeStr) {
+		return timeStr, nil
+	}
+
+	// Handle 'now'
+	if timeStr == "now" {
+		return time.Now().UTC().Format(time.RFC3339), nil
+	}
+
+	// Handle relative times like '-10m', '-1h', '-1d', '-30s'
+	if strings.HasPrefix(timeStr, "-") {
+		// Parse duration (Go's time.ParseDuration doesn't support 'd' for days)
+		durationStr := timeStr[1:] // Remove leading '-'
+
+		// Handle days specially
+		if strings.HasSuffix(durationStr, "d") {
+			days, err := parseIntFromString(strings.TrimSuffix(durationStr, "d"))
+			if err != nil {
+				return "", fmt.Errorf("invalid relative time format: %s", timeStr)
+			}
+			targetTime := time.Now().UTC().Add(-time.Duration(days) * 24 * time.Hour)
+			return targetTime.Format(time.RFC3339), nil
+		}
+
+		// Parse standard durations (s, m, h)
+		duration, err := time.ParseDuration(durationStr)
+		if err != nil {
+			return "", fmt.Errorf("invalid relative time format: %s", timeStr)
+		}
+		targetTime := time.Now().UTC().Add(-duration)
+		return targetTime.Format(time.RFC3339), nil
+	}
+
+	return "", fmt.Errorf("invalid time format: %s; expected 'now', relative time like '-10m', '-1h', '-1d', or RFC3339 timestamp", timeStr)
+}
+
+// isNumeric checks if a string contains only digits.
+func isNumeric(s string) bool {
+	if len(s) == 0 {
+		return false
+	}
+	for _, c := range s {
+		if c < '0' || c > '9' {
+			return false
+		}
+	}
+	return true
+}
+
+// parseIntFromString parses an integer from a string with overflow protection.
+func parseIntFromString(s string) (int, error) {
+	if len(s) == 0 {
+		return 0, fmt.Errorf("empty string")
+	}
+	if len(s) > 10 { // int32 max is 10 digits, prevents overflow
+		return 0, fmt.Errorf("number too large: %s", s)
+	}
+	var result int
+	for _, c := range s {
+		if c < '0' || c > '9' {
+			return 0, fmt.Errorf("invalid number: %s", s)
+		}
+		result = result*10 + int(c-'0')
+	}
+	return result, nil
+}
diff --git a/pkg/prometheus/types.go b/pkg/prometheus/types.go
new file mode 100644
index 000000000..2ce405d6d
--- /dev/null
+++ b/pkg/prometheus/types.go
@@ -0,0 +1,54 @@
+// Package prometheus provides a shared HTTP client for Prometheus and Alertmanager APIs.
+// It supports flexible authentication (bearer token), TLS configuration (REST config CA,
+// custom CA file, or insecure mode), and can be used by multiple toolsets with different
+// URL discovery mechanisms.
+package prometheus
+
+// QueryResult represents a Prometheus API query response.
+type QueryResult struct {
+	Status    string `json:"status"`
+	Data      Data   `json:"data"`
+	ErrorType string `json:"errorType,omitempty"`
+	Error     string `json:"error,omitempty"`
+	Warnings  []string `json:"warnings,omitempty"`
+}
+
+// Data contains the query result data.
+type Data struct {
+	ResultType string   `json:"resultType"`
+	Result     []Result `json:"result"`
+}
+
+// Result represents a single result in a query response.
+type Result struct {
+	Metric map[string]string `json:"metric"`
+	// Value is used for instant queries - [timestamp, value]
+	Value []any `json:"value,omitempty"`
+	// Values is used for range queries - [[timestamp, value], ...]
+	Values [][]any `json:"values,omitempty"`
+}
+
+// Alert represents an Alertmanager alert.
+type Alert struct {
+	Annotations  map[string]string `json:"annotations"`
+	EndsAt       string            `json:"endsAt"`
+	Fingerprint  string            `json:"fingerprint"`
+	Receivers    []Receiver        `json:"receivers"`
+	StartsAt     string            `json:"startsAt"`
+	Status       AlertStatus       `json:"status"`
+	UpdatedAt    string            `json:"updatedAt"`
+	GeneratorURL string            `json:"generatorURL,omitempty"`
+	Labels       map[string]string `json:"labels"`
+}
+
+// Receiver represents an Alertmanager receiver.
+type Receiver struct {
+	Name string `json:"name"`
+}
+
+// AlertStatus represents the status of an alert.
+type AlertStatus struct {
+	InhibitedBy []string `json:"inhibitedBy"`
+	SilencedBy  []string `json:"silencedBy"`
+	State       string   `json:"state"`
+}
diff --git a/pkg/toolsets/observability/alertmanager.go b/pkg/toolsets/observability/alertmanager.go
new file mode 100644
index 000000000..47e68fc7b
--- /dev/null
+++ b/pkg/toolsets/observability/alertmanager.go
@@ -0,0 +1,126 @@
+package observability
+
+import (
+	"fmt"
+
+	"github.com/google/jsonschema-go/jsonschema"
+	"k8s.io/utils/ptr"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+)
+
+// initAlertmanager returns the Alertmanager tools.
+func initAlertmanager() []api.ServerTool {
+	return []api.ServerTool{
+		initAlertmanagerAlerts(),
+	}
+}
+
+// initAlertmanagerAlerts creates the alertmanager_alerts tool.
+func initAlertmanagerAlerts() api.ServerTool {
+	return api.ServerTool{
+		Tool: api.Tool{
+			Name: "alertmanager_alerts",
+			Description: `Query active and pending alerts from the cluster's Alertmanager.
+Useful for monitoring cluster health, detecting issues, and incident response.
+
+Returns alerts with their labels, annotations, status, and timing information.
+Can filter by active/silenced/inhibited state.
+
+Common use cases:
+- Check for critical alerts affecting the cluster
+- Monitor for specific alert types (e.g., high CPU, disk pressure)
+- Verify alert silences are working correctly`,
+			InputSchema: &jsonschema.Schema{
+				Type: "object",
+				Properties: map[string]*jsonschema.Schema{
+					"active": {
+						Type:        "boolean",
+						Description: "Filter for active (firing) alerts. Default: true",
+						Default:     api.ToRawMessage(true),
+					},
+					"silenced": {
+						Type:        "boolean",
+						Description: "Include silenced alerts in the results. Default: false",
+						Default:     api.ToRawMessage(false),
+					},
+					"inhibited": {
+						Type:        "boolean",
+						Description: "Include inhibited alerts in the results. Default: false",
+						Default:     api.ToRawMessage(false),
+					},
+					"filter": {
+						Type:        "string",
+						Description: "Optional filter using Alertmanager filter syntax. Examples: 'alertname=Watchdog', 'severity=critical', 'namespace=openshift-monitoring'",
+					},
+				},
+			},
+			Annotations: api.ToolAnnotations{
+				Title:           "Alertmanager: Get Alerts",
+				ReadOnlyHint:    ptr.To(true),
+				DestructiveHint: ptr.To(false),
+				IdempotentHint:  ptr.To(true),
+				OpenWorldHint:   ptr.To(true),
+			},
+		},
+		Handler: alertmanagerAlertsHandler,
+	}
+}
+
+// alertmanagerAlertsHandler handles Alertmanager alerts queries.
+func alertmanagerAlertsHandler(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Validate endpoint (security check)
+	endpoint := "/api/v2/alerts"
+	if err := validateAlertmanagerEndpoint(endpoint); err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Get Alertmanager URL
+	baseURL, err := getRouteURL(params.Context, params, alertmanagerRoute, getMonitoringNamespace(params))
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get Alertmanager route: %w", err)), nil
+	}
+
+	// Handle active parameter (default: true)
+	active := true
+	if v, ok := params.GetArguments()["active"].(bool); ok {
+		active = v
+	}
+
+	// Handle silenced parameter (default: false)
+	silenced := false
+	if v, ok := params.GetArguments()["silenced"].(bool); ok {
+		silenced = v
+	}
+
+	// Handle inhibited parameter (default: false)
+	inhibited := false
+	if v, ok := params.GetArguments()["inhibited"].(bool); ok {
+		inhibited = v
+	}
+
+	// Handle optional filter
+	filter := ""
+	if f, ok := params.GetArguments()["filter"].(string); ok && f != "" {
+		// Validate filter length
+		if len(f) > maxQueryLength {
+			return api.NewToolCallResult("", fmt.Errorf("filter exceeds maximum length of %d characters", maxQueryLength)), nil
+		}
+		filter = f
+	}
+
+	// Create client and execute request
+	client := newPrometheusClient(baseURL, params)
+	body, err := client.GetAlertsRaw(params.Context, active, silenced, inhibited, filter)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("alertmanager query failed: %w", err)), nil
+	}
+
+	// Format response
+	result, err := prettyJSON(body)
+	if err != nil {
+		return api.NewToolCallResult(string(body), nil), nil
+	}
+
+	return api.NewToolCallResult(result, nil), nil
+}
diff --git a/pkg/toolsets/observability/config.go b/pkg/toolsets/observability/config.go
new file mode 100644
index 000000000..64653fdd1
--- /dev/null
+++ b/pkg/toolsets/observability/config.go
@@ -0,0 +1,36 @@
+package observability
+
+import (
+	"context"
+
+	"github.com/BurntSushi/toml"
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/config"
+)
+
+// Config holds observability toolset configuration
+type Config struct {
+	// MonitoringNamespace is the namespace where monitoring components are deployed.
+	// Defaults to "openshift-monitoring" if not specified.
+	MonitoringNamespace string `toml:"monitoring_namespace,omitempty"`
+}
+
+var _ api.ExtendedConfig = (*Config)(nil)
+
+// Validate checks that the configuration values are valid.
+func (c *Config) Validate() error {
+	// All fields are optional with sensible defaults, no validation required
+	return nil
+}
+
+func observabilityToolsetParser(_ context.Context, primitive toml.Primitive, md toml.MetaData) (api.ExtendedConfig, error) {
+	var cfg Config
+	if err := md.PrimitiveDecode(primitive, &cfg); err != nil {
+		return nil, err
+	}
+	return &cfg, nil
+}
+
+func init() {
+	config.RegisterToolsetConfig("observability", observabilityToolsetParser)
+}
diff --git a/pkg/toolsets/observability/helpers.go b/pkg/toolsets/observability/helpers.go
new file mode 100644
index 000000000..f5420ac69
--- /dev/null
+++ b/pkg/toolsets/observability/helpers.go
@@ -0,0 +1,168 @@
+package observability
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"sync"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/klog/v2"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/prometheus"
+)
+
+const (
+	// defaultMonitoringNamespace is the default namespace for OpenShift monitoring components
+	defaultMonitoringNamespace = "openshift-monitoring"
+
+	// thanosQuerierRoute is the route name for Thanos Querier
+	thanosQuerierRoute = "thanos-querier"
+
+	// alertmanagerRoute is the route name for Alertmanager
+	alertmanagerRoute = "alertmanager-main"
+
+	// maxQueryLength is the maximum allowed query length to prevent DoS
+	maxQueryLength = 10000
+)
+
+// routeGVR is the GroupVersionResource for OpenShift Routes
+var routeGVR = schema.GroupVersionResource{
+	Group:    "route.openshift.io",
+	Version:  "v1",
+	Resource: "routes",
+}
+
+// routeURLCache caches resolved route URLs for the lifetime of the server process.
+// This avoids repeated Kubernetes API calls since routes rarely change.
+// Key format: "apiServerHost/namespace/routeName", value: URL string.
+// The API server host is included to support multi-cluster (ACM) environments.
+var routeURLCache sync.Map
+
+// allowedPrometheusEndpoints is a whitelist of allowed Prometheus API endpoints
+var allowedPrometheusEndpoints = map[string]bool{
+	"/api/v1/query":       true,
+	"/api/v1/query_range": true,
+	"/api/v1/series":      true,
+	"/api/v1/labels":      true,
+}
+
+// allowedPrometheusLabelPattern matches /api/v1/label/<label>/values endpoints
+var allowedPrometheusLabelPattern = regexp.MustCompile(`^/api/v1/label/[^/]+/values$`)
+
+// allowedAlertmanagerEndpoints is a whitelist of allowed Alertmanager API endpoints
+var allowedAlertmanagerEndpoints = map[string]bool{
+	"/api/v2/alerts":   true,
+	"/api/v2/silences": true,
+	"/api/v1/alerts":   true,
+}
+
+// getMonitoringNamespace returns the monitoring namespace from config or the default.
+func getMonitoringNamespace(params api.ToolHandlerParams) string {
+	if cfg, ok := params.GetToolsetConfig("observability"); ok {
+		if obsCfg, ok := cfg.(*Config); ok && obsCfg.MonitoringNamespace != "" {
+			return obsCfg.MonitoringNamespace
+		}
+	}
+	return defaultMonitoringNamespace
+}
+
+// getRouteURL retrieves the URL for an OpenShift route.
+// Results are cached for the lifetime of the server process to avoid repeated API calls.
+// The cache key includes the API server host to support multi-cluster (ACM) environments.
+func getRouteURL(ctx context.Context, params api.ToolHandlerParams, routeName, namespace string) (string, error) {
+	// Include API server host in cache key to support multi-cluster (ACM) environments
+	cacheKey := params.RESTConfig().Host + "/" + namespace + "/" + routeName
+
+	// Check cache first
+	if cached, ok := routeURLCache.Load(cacheKey); ok {
+		klog.V(4).Infof("Using cached route URL for %s", cacheKey)
+		return cached.(string), nil
+	}
+
+	// Fetch from Kubernetes API
+	route, err := params.DynamicClient().Resource(routeGVR).Namespace(namespace).Get(ctx, routeName, metav1.GetOptions{})
+	if err != nil {
+		return "", fmt.Errorf("failed to get route %s/%s: %w; check RBAC permissions for routes in the monitoring namespace", namespace, routeName, err)
+	}
+
+	host, found, err := unstructured.NestedString(route.Object, "spec", "host")
+	if err != nil {
+		return "", fmt.Errorf("failed to read route host: %w", err)
+	}
+	if !found || host == "" {
+		return "", fmt.Errorf("route %s/%s has no host configured; verify the monitoring stack is properly deployed", namespace, routeName)
+	}
+
+	url := fmt.Sprintf("https://%s", host)
+
+	// Cache the result for future calls
+	routeURLCache.Store(cacheKey, url)
+	klog.V(4).Infof("Cached route URL for %s: %s", cacheKey, url)
+
+	return url, nil
+}
+
+// newPrometheusClient creates a new Prometheus client configured with auth and TLS from the REST config.
+func newPrometheusClient(baseURL string, params api.ToolHandlerParams) *prometheus.Client {
+	return prometheus.NewClient(baseURL,
+		prometheus.WithBearerTokenFromRESTConfig(params.RESTConfig()),
+		prometheus.WithTLSFromRESTConfig(params.RESTConfig()),
+	)
+}
+
+// validatePrometheusEndpoint checks if the endpoint is allowed.
+func validatePrometheusEndpoint(endpoint string) error {
+	if allowedPrometheusEndpoints[endpoint] {
+		return nil
+	}
+	if allowedPrometheusLabelPattern.MatchString(endpoint) {
+		return nil
+	}
+	return fmt.Errorf("endpoint %s is not allowed; allowed endpoints: %v", endpoint, getAllowedPrometheusEndpoints())
+}
+
+// getAllowedPrometheusEndpoints returns a list of allowed endpoints for error messages.
+func getAllowedPrometheusEndpoints() []string {
+	endpoints := make([]string, 0, len(allowedPrometheusEndpoints)+1)
+	for ep := range allowedPrometheusEndpoints {
+		endpoints = append(endpoints, ep)
+	}
+	endpoints = append(endpoints, "/api/v1/label/<name>/values")
+	return endpoints
+}
+
+// validateAlertmanagerEndpoint checks if the endpoint is allowed.
+func validateAlertmanagerEndpoint(endpoint string) error {
+	if !allowedAlertmanagerEndpoints[endpoint] {
+		return fmt.Errorf("endpoint %s is not allowed; allowed endpoints: %v", endpoint, getAllowedAlertmanagerEndpoints())
+	}
+	return nil
+}
+
+// getAllowedAlertmanagerEndpoints returns a list of allowed endpoints for error messages.
+func getAllowedAlertmanagerEndpoints() []string {
+	endpoints := make([]string, 0, len(allowedAlertmanagerEndpoints))
+	for ep := range allowedAlertmanagerEndpoints {
+		endpoints = append(endpoints, ep)
+	}
+	return endpoints
+}
+
+// prettyJSON formats JSON data with indentation.
+func prettyJSON(data []byte) (string, error) {
+	var v any
+	if err := json.Unmarshal(data, &v); err != nil {
+		return string(data), nil // Return raw if not valid JSON
+	}
+
+	pretty, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		return string(data), nil
+	}
+	return string(pretty), nil
+}
diff --git a/pkg/toolsets/observability/helpers_test.go b/pkg/toolsets/observability/helpers_test.go
new file mode 100644
index 000000000..b114fa33b
--- /dev/null
+++ b/pkg/toolsets/observability/helpers_test.go
@@ -0,0 +1,96 @@
+package observability
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/suite"
+)
+
+type HelpersSuite struct {
+	suite.Suite
+}
+
+func (s *HelpersSuite) TestValidatePrometheusEndpoint() {
+	s.Run("allows valid query endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/query")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid query_range endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/query_range")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid series endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/series")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid labels endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/labels")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid label values endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/label/job/values")
+		s.NoError(err)
+	})
+
+	s.Run("rejects invalid endpoint", func() {
+		err := validatePrometheusEndpoint("/api/v1/admin/tsdb/delete_series")
+		s.Error(err)
+		s.Contains(err.Error(), "not allowed")
+	})
+
+	s.Run("rejects arbitrary path", func() {
+		err := validatePrometheusEndpoint("/some/random/path")
+		s.Error(err)
+	})
+}
+
+func (s *HelpersSuite) TestValidateAlertmanagerEndpoint() {
+	s.Run("allows valid v2 alerts endpoint", func() {
+		err := validateAlertmanagerEndpoint("/api/v2/alerts")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid v2 silences endpoint", func() {
+		err := validateAlertmanagerEndpoint("/api/v2/silences")
+		s.NoError(err)
+	})
+
+	s.Run("allows valid v1 alerts endpoint", func() {
+		err := validateAlertmanagerEndpoint("/api/v1/alerts")
+		s.NoError(err)
+	})
+
+	s.Run("rejects invalid endpoint", func() {
+		err := validateAlertmanagerEndpoint("/api/v2/status")
+		s.Error(err)
+		s.Contains(err.Error(), "not allowed")
+	})
+}
+
+func (s *HelpersSuite) TestPrettyJSON() {
+	s.Run("formats valid JSON", func() {
+		input := []byte(`{"key":"value","number":123}`)
+		result, err := prettyJSON(input)
+
+		s.NoError(err)
+		s.Contains(result, "\"key\"")
+		s.Contains(result, "\"value\"")
+		s.Contains(result, "\n") // Should have newlines for pretty printing
+	})
+
+	s.Run("returns original for invalid JSON", func() {
+		input := []byte("not valid json")
+		result, err := prettyJSON(input)
+
+		s.NoError(err)
+		s.Equal("not valid json", result)
+	})
+}
+
+func TestHelpersSuite(t *testing.T) {
+	suite.Run(t, new(HelpersSuite))
+}
diff --git a/pkg/toolsets/observability/prometheus.go b/pkg/toolsets/observability/prometheus.go
new file mode 100644
index 000000000..f00bd30c0
--- /dev/null
+++ b/pkg/toolsets/observability/prometheus.go
@@ -0,0 +1,233 @@
+package observability
+
+import (
+	"fmt"
+	"net/url"
+
+	"github.com/google/jsonschema-go/jsonschema"
+	"k8s.io/utils/ptr"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/prometheus"
+)
+
+// initPrometheus returns the Prometheus query tools.
+func initPrometheus() []api.ServerTool {
+	return []api.ServerTool{
+		initPrometheusQuery(),
+		initPrometheusQueryRange(),
+	}
+}
+
+// initPrometheusQuery creates the prometheus_query tool.
+func initPrometheusQuery() api.ServerTool {
+	return api.ServerTool{
+		Tool: api.Tool{
+			Name: "prometheus_query",
+			Description: `Execute an instant PromQL query against the cluster's Thanos Querier.
+Returns current metric values at the specified time (or current time if not specified).
+Use this for point-in-time metric values.
+
+Common queries:
+- up{job="apiserver"} - Check if API server is up
+- sum by(namespace) (container_memory_usage_bytes) - Memory usage by namespace
+- rate(container_cpu_usage_seconds_total[5m]) - CPU usage rate
+- kube_pod_status_phase{phase="Running"} - Running pods count`,
+			InputSchema: &jsonschema.Schema{
+				Type: "object",
+				Properties: map[string]*jsonschema.Schema{
+					"query": {
+						Type:        "string",
+						Description: "PromQL query string (e.g., 'up{job=\"apiserver\"}', 'sum by(namespace) (container_memory_usage_bytes)')",
+					},
+					"time": {
+						Type:        "string",
+						Description: "Optional evaluation timestamp. Accepts RFC3339 format (e.g., '2024-01-01T12:00:00Z') or Unix timestamp. If not provided, uses current time.",
+					},
+				},
+				Required: []string{"query"},
+			},
+			Annotations: api.ToolAnnotations{
+				Title:           "Prometheus: Instant Query",
+				ReadOnlyHint:    ptr.To(true),
+				DestructiveHint: ptr.To(false),
+				IdempotentHint:  ptr.To(true),
+				OpenWorldHint:   ptr.To(true),
+			},
+		},
+		Handler: prometheusQueryHandler,
+	}
+}
+
+// initPrometheusQueryRange creates the prometheus_query_range tool.
+func initPrometheusQueryRange() api.ServerTool {
+	return api.ServerTool{
+		Tool: api.Tool{
+			Name: "prometheus_query_range",
+			Description: `Execute a range PromQL query against the cluster's Thanos Querier.
+Returns metric values over a time range with specified resolution.
+Use this for time-series data, trends, and historical analysis.
+
+Supports relative times:
+- 'now' for current time
+- '-10m', '-1h', '-1d' for relative past times
+
+Example: Get CPU usage over the last hour with 1-minute resolution.`,
+			InputSchema: &jsonschema.Schema{
+				Type: "object",
+				Properties: map[string]*jsonschema.Schema{
+					"query": {
+						Type:        "string",
+						Description: "PromQL query string (e.g., 'rate(container_cpu_usage_seconds_total[5m])')",
+					},
+					"start": {
+						Type:        "string",
+						Description: "Start time. Accepts RFC3339 timestamp (e.g., '2024-01-01T12:00:00Z'), Unix timestamp, or relative time (e.g., '-1h', '-30m', '-1d')",
+					},
+					"end": {
+						Type:        "string",
+						Description: "End time. Accepts RFC3339 timestamp, Unix timestamp, 'now', or relative time",
+					},
+					"step": {
+						Type:        "string",
+						Description: "Query resolution step width (e.g., '15s', '1m', '5m'). Determines the granularity of returned data points. Default: '1m'",
+						Default:     api.ToRawMessage("1m"),
+					},
+				},
+				Required: []string{"query", "start", "end"},
+			},
+			Annotations: api.ToolAnnotations{
+				Title:           "Prometheus: Range Query",
+				ReadOnlyHint:    ptr.To(true),
+				DestructiveHint: ptr.To(false),
+				IdempotentHint:  ptr.To(true),
+				OpenWorldHint:   ptr.To(true),
+			},
+		},
+		Handler: prometheusQueryRangeHandler,
+	}
+}
+
+// prometheusQueryHandler handles instant PromQL queries.
+func prometheusQueryHandler(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Extract and validate query
+	query, ok := params.GetArguments()["query"].(string)
+	if !ok || query == "" {
+		return api.NewToolCallResult("", fmt.Errorf("query parameter is required")), nil
+	}
+
+	if len(query) > maxQueryLength {
+		return api.NewToolCallResult("", fmt.Errorf("query exceeds maximum length of %d characters", maxQueryLength)), nil
+	}
+
+	// Validate endpoint (security check)
+	endpoint := "/api/v1/query"
+	if err := validatePrometheusEndpoint(endpoint); err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Get Thanos Querier URL
+	baseURL, err := getRouteURL(params.Context, params, thanosQuerierRoute, getMonitoringNamespace(params))
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get Thanos Querier route: %w", err)), nil
+	}
+
+	// Create client and build query parameters
+	client := newPrometheusClient(baseURL, params)
+	queryParams := url.Values{}
+	queryParams.Set("query", query)
+
+	if timeParam, ok := params.GetArguments()["time"].(string); ok && timeParam != "" {
+		queryParams.Set("time", timeParam)
+	}
+
+	// Execute request
+	body, err := client.QueryRaw(params.Context, endpoint, queryParams)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("prometheus query failed: %w", err)), nil
+	}
+
+	// Format response
+	result, err := prettyJSON(body)
+	if err != nil {
+		return api.NewToolCallResult(string(body), nil), nil
+	}
+
+	return api.NewToolCallResult(result, nil), nil
+}
+
+// prometheusQueryRangeHandler handles range PromQL queries.
+func prometheusQueryRangeHandler(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Extract and validate query
+	query, ok := params.GetArguments()["query"].(string)
+	if !ok || query == "" {
+		return api.NewToolCallResult("", fmt.Errorf("query parameter is required")), nil
+	}
+
+	if len(query) > maxQueryLength {
+		return api.NewToolCallResult("", fmt.Errorf("query exceeds maximum length of %d characters", maxQueryLength)), nil
+	}
+
+	// Extract and validate start time
+	start, ok := params.GetArguments()["start"].(string)
+	if !ok || start == "" {
+		return api.NewToolCallResult("", fmt.Errorf("start parameter is required")), nil
+	}
+
+	// Extract and validate end time
+	end, ok := params.GetArguments()["end"].(string)
+	if !ok || end == "" {
+		return api.NewToolCallResult("", fmt.Errorf("end parameter is required")), nil
+	}
+
+	// Extract step (optional with default)
+	step := "1m"
+	if s, ok := params.GetArguments()["step"].(string); ok && s != "" {
+		step = s
+	}
+
+	// Convert relative times
+	startTime, err := prometheus.ConvertRelativeTime(start)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("invalid start time: %w", err)), nil
+	}
+
+	endTime, err := prometheus.ConvertRelativeTime(end)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("invalid end time: %w", err)), nil
+	}
+
+	// Validate endpoint (security check)
+	endpoint := "/api/v1/query_range"
+	if err := validatePrometheusEndpoint(endpoint); err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Get Thanos Querier URL
+	baseURL, err := getRouteURL(params.Context, params, thanosQuerierRoute, getMonitoringNamespace(params))
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get Thanos Querier route: %w", err)), nil
+	}
+
+	// Create client and build query parameters
+	client := newPrometheusClient(baseURL, params)
+	queryParams := url.Values{}
+	queryParams.Set("query", query)
+	queryParams.Set("start", startTime)
+	queryParams.Set("end", endTime)
+	queryParams.Set("step", step)
+
+	// Execute request
+	body, err := client.QueryRaw(params.Context, endpoint, queryParams)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("prometheus range query failed: %w", err)), nil
+	}
+
+	// Format response
+	result, err := prettyJSON(body)
+	if err != nil {
+		return api.NewToolCallResult(string(body), nil), nil
+	}
+
+	return api.NewToolCallResult(result, nil), nil
+}
diff --git a/pkg/toolsets/observability/toolset.go b/pkg/toolsets/observability/toolset.go
new file mode 100644
index 000000000..523d90932
--- /dev/null
+++ b/pkg/toolsets/observability/toolset.go
@@ -0,0 +1,41 @@
+package observability
+
+import (
+	"slices"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
+)
+
+// Toolset implements the observability toolset for cluster monitoring.
+type Toolset struct{}
+
+var _ api.Toolset = (*Toolset)(nil)
+
+// GetName returns the name of the toolset.
+func (t *Toolset) GetName() string {
+	return "observability"
+}
+
+// GetDescription returns a human-readable description of the toolset.
+func (t *Toolset) GetDescription() string {
+	return "Cluster observability tools for querying Prometheus metrics and Alertmanager alerts"
+}
+
+// GetTools returns all tools provided by this toolset.
+func (t *Toolset) GetTools(_ api.Openshift) []api.ServerTool {
+	return slices.Concat(
+		initPrometheus(),
+		initAlertmanager(),
+	)
+}
+
+// GetPrompts returns prompts provided by this toolset.
+func (t *Toolset) GetPrompts() []api.ServerPrompt {
+	// Observability toolset does not provide prompts
+	return nil
+}
+
+func init() {
+	toolsets.Register(&Toolset{})
+}
diff --git a/pkg/toolsets/observability/toolset_test.go b/pkg/toolsets/observability/toolset_test.go
new file mode 100644
index 000000000..b02298966
--- /dev/null
+++ b/pkg/toolsets/observability/toolset_test.go
@@ -0,0 +1,156 @@
+package observability
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/suite"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+)
+
+type ToolsetSuite struct {
+	suite.Suite
+	toolset *Toolset
+}
+
+// mockOpenShift implements api.Openshift for testing
+type mockOpenShift struct {
+	isOpenShift bool
+}
+
+func (m *mockOpenShift) IsOpenShift(_ context.Context) bool {
+	return m.isOpenShift
+}
+
+var _ api.Openshift = (*mockOpenShift)(nil)
+
+func (s *ToolsetSuite) SetupTest() {
+	s.toolset = &Toolset{}
+}
+
+func (s *ToolsetSuite) TestGetName() {
+	s.Run("returns correct toolset name", func() {
+		name := s.toolset.GetName()
+		s.Equal("observability", name)
+	})
+}
+
+func (s *ToolsetSuite) TestGetDescription() {
+	s.Run("returns non-empty description", func() {
+		desc := s.toolset.GetDescription()
+		s.NotEmpty(desc)
+		s.Contains(desc, "observability")
+	})
+}
+
+func (s *ToolsetSuite) TestGetTools() {
+	s.Run("returns expected number of tools", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		// We expect 3 tools: prometheus_query, prometheus_query_range, alertmanager_alerts
+		s.Len(tools, 3, "Expected 3 tools in observability toolset")
+	})
+
+	s.Run("all tools have required fields", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		for _, tool := range tools {
+			s.NotEmpty(tool.Tool.Name, "Tool name should not be empty")
+			s.NotEmpty(tool.Tool.Description, "Tool description should not be empty")
+			s.NotNil(tool.Handler, "Tool handler should not be nil")
+			s.NotNil(tool.Tool.InputSchema, "Tool input schema should not be nil")
+		}
+	})
+
+	s.Run("all tools are marked as read-only", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		for _, tool := range tools {
+			s.NotNil(tool.Tool.Annotations.ReadOnlyHint,
+				"Tool %s should have ReadOnlyHint set", tool.Tool.Name)
+			s.True(*tool.Tool.Annotations.ReadOnlyHint,
+				"Tool %s should be marked as read-only", tool.Tool.Name)
+		}
+	})
+
+	s.Run("all tools are marked as non-destructive", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		for _, tool := range tools {
+			if tool.Tool.Annotations.DestructiveHint != nil {
+				s.False(*tool.Tool.Annotations.DestructiveHint,
+					"Tool %s should be marked as non-destructive", tool.Tool.Name)
+			}
+		}
+	})
+
+	s.Run("prometheus_query tool exists with correct schema", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		var found bool
+		for _, tool := range tools {
+			if tool.Tool.Name == "prometheus_query" {
+				found = true
+				s.Contains(tool.Tool.InputSchema.Required, "query",
+					"prometheus_query should require 'query' parameter")
+				s.Contains(tool.Tool.InputSchema.Properties, "query")
+				s.Contains(tool.Tool.InputSchema.Properties, "time")
+				break
+			}
+		}
+		s.True(found, "prometheus_query tool should exist")
+	})
+
+	s.Run("prometheus_query_range tool exists with correct schema", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		var found bool
+		for _, tool := range tools {
+			if tool.Tool.Name == "prometheus_query_range" {
+				found = true
+				s.Contains(tool.Tool.InputSchema.Required, "query")
+				s.Contains(tool.Tool.InputSchema.Required, "start")
+				s.Contains(tool.Tool.InputSchema.Required, "end")
+				s.Contains(tool.Tool.InputSchema.Properties, "step")
+				break
+			}
+		}
+		s.True(found, "prometheus_query_range tool should exist")
+	})
+
+	s.Run("alertmanager_alerts tool exists with correct schema", func() {
+		tools := s.toolset.GetTools(&mockOpenShift{isOpenShift: true})
+
+		var found bool
+		for _, tool := range tools {
+			if tool.Tool.Name == "alertmanager_alerts" {
+				found = true
+				s.Contains(tool.Tool.InputSchema.Properties, "active")
+				s.Contains(tool.Tool.InputSchema.Properties, "silenced")
+				s.Contains(tool.Tool.InputSchema.Properties, "inhibited")
+				s.Contains(tool.Tool.InputSchema.Properties, "filter")
+				break
+			}
+		}
+		s.True(found, "alertmanager_alerts tool should exist")
+	})
+}
+
+func (s *ToolsetSuite) TestGetPrompts() {
+	s.Run("returns nil (no prompts)", func() {
+		prompts := s.toolset.GetPrompts()
+		s.Nil(prompts, "Observability toolset should not have prompts")
+	})
+}
+
+func (s *ToolsetSuite) TestToolsetImplementsInterface() {
+	s.Run("implements api.Toolset interface", func() {
+		var _ api.Toolset = (*Toolset)(nil)
+		// If this compiles, the interface is implemented correctly
+	})
+}
+
+func TestToolsetSuite(t *testing.T) {
+	suite.Run(t, new(ToolsetSuite))
+}