Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 43 additions & 20 deletions assisted_service_mcp/src/utils/log_analyzer/log_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import json
import logging
from typing import Dict, List, Any, cast
from typing import Dict, List, Any, cast, Iterator, Tuple

import dateutil.parser
import nestedarchive
Expand Down Expand Up @@ -40,9 +40,7 @@ def metadata(self) -> Dict[str, Any] | None:
raw_metadata = json.loads(cast(str | bytes, metadata_content))

# The metadata file contains cluster information at the root level
# Wrap it in a "cluster" key to match the expected structure
wrapped_metadata = {"cluster": raw_metadata}
self._metadata = self._clean_metadata_json(wrapped_metadata)
self._metadata = self._clean_metadata_json(raw_metadata)
except Exception as e:
logger.error("Failed to load metadata: %s", e)
raise
Expand All @@ -51,20 +49,18 @@ def metadata(self) -> Dict[str, Any] | None:
@staticmethod
def _clean_metadata_json(md: Dict[str, Any]) -> Dict[str, Any]:
"""Clean metadata JSON by separating deleted hosts."""
installation_start_time = dateutil.parser.isoparse(
md["cluster"]["install_started_at"]
)
installation_start_time = dateutil.parser.isoparse(md["install_started_at"])

def host_deleted_before_installation_started(host):
if deleted_at := host.get("deleted_at"):
return dateutil.parser.isoparse(deleted_at) < installation_start_time
return False

all_hosts = md["cluster"]["hosts"]
md["cluster"]["deleted_hosts"] = [
all_hosts = md["hosts"]
md["deleted_hosts"] = [
h for h in all_hosts if host_deleted_before_installation_started(h)
]
md["cluster"]["hosts"] = [
md["hosts"] = [
h for h in all_hosts if not host_deleted_before_installation_started(h)
]

Expand Down Expand Up @@ -181,15 +177,42 @@ def get_controller_logs(self) -> str:
),
)

@staticmethod
def get_hostname(host: Dict[str, Any]) -> str:
"""Extract hostname from host metadata."""
hostname = host.get("requested_hostname")
if hostname:
return hostname
def cluster_is_sno(self) -> bool:
"""
Check if the cluster is a Single Node OpenShift (SNO) cluster.

Returns:
True if the cluster is SNO (high_availability_mode == "None"), False otherwise
"""
try:
cluster = self.metadata
return (
cluster is not None and cluster.get("high_availability_mode") == "None"
)
except Exception:
return False

def all_host_journal_logs(
self,
) -> Iterator[Tuple[Dict[str, Any], str]]:
"""
Iterate over hosts and their journal logs, skipping hosts where journal.logs is not found.

Yields:
Tuple of (host, journal_logs) for each host with available journal logs
"""
try:
inventory = json.loads(host["inventory"])
return inventory["hostname"]
except (KeyError, json.JSONDecodeError):
return host.get("id", "unknown")
cluster = self.metadata
except Exception:
return

if cluster is None:
return

for host in cluster.get("hosts", []):
host_id = host["id"]
try:
journal_logs = self.get_host_log_file(host_id, "journal.logs")
yield host, journal_logs
except FileNotFoundError:
continue
44 changes: 38 additions & 6 deletions assisted_service_mcp/src/utils/log_analyzer/signatures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,44 @@
import inspect

from .base import Signature, ErrorSignature, SignatureResult
from .basic_info import * # noqa
from .error_detection import * # noqa
from .performance import * # noqa
from .networking import * # noqa
from .advanced_analysis import * # noqa
from .platform_specific import * # noqa

# Import all individual signature classes
# These are used dynamically via inspect.getmembers(), so we suppress unused import warnings
from .components_version_signature import ComponentsVersionSignature # noqa: F401
from .sno_hostname_has_etcd import SNOHostnameHasEtcd # noqa: F401
from .api_invalid_certificate_signature import (
ApiInvalidCertificateSignature, # noqa: F401
)
from .api_expired_certificate_signature import (
ApiExpiredCertificateSignature, # noqa: F401
)
from .release_pull_error_signature import ReleasePullErrorSignature # noqa: F401
from .error_on_cleanup_install_device import ErrorOnCleanupInstallDevice # noqa: F401
from .missing_mc import MissingMC # noqa: F401
from .error_creating_read_write_layer import ErrorCreatingReadWriteLayer # noqa: F401
from .sno_machine_cidr_signature import SNOMachineCidrSignature # noqa: F401
from .duplicate_vip import DuplicateVIP # noqa: F401
from .nameserver_in_cluster_network import NameserverInClusterNetwork # noqa: F401
from .networks_mtu_mismatch import NetworksMtuMismatch # noqa: F401
from .dual_stack_bad_route import DualStackBadRoute # noqa: F401
from .dualstackr_dns_bug import DualstackrDNSBug # noqa: F401
from .user_managed_networking_load_balancer import (
UserManagedNetworkingLoadBalancer, # noqa: F401
)
from .slow_image_download_signature import SlowImageDownloadSignature # noqa: F401
from .libvirt_reboot_flag_signature import LibvirtRebootFlagSignature # noqa: F401
from .ip_changed_after_reboot import IpChangedAfterReboot # noqa: F401
from .events_installation_attempts import EventsInstallationAttempts # noqa: F401
from .controller_warnings import ControllerWarnings # noqa: F401
from .user_has_logged_into_cluster import UserHasLoggedIntoCluster # noqa: F401
from .failed_request_triggers_host_timeout import (
FailedRequestTriggersHostTimeout, # noqa: F401
)
from .controller_failed_to_start import ControllerFailedToStart # noqa: F401
from .machine_config_daemon_error_extracting import (
MachineConfigDaemonErrorExtracting, # noqa: F401
)
from .container_crash_analysis import ContainerCrashAnalysis # noqa: F401

# Collect all signatures from all modules
ALL_SIGNATURES = []
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
ApiExpiredCertificateSignature for OpenShift Assisted Installer logs.
"""

import logging
import re
from typing import Optional

from assisted_service_mcp.src.utils.log_analyzer.log_analyzer import (
LOG_BUNDLE_PATH,
)

from .base import ErrorSignature, SignatureResult

logger = logging.getLogger(__name__)


class ApiExpiredCertificateSignature(ErrorSignature):
"""Detect expired or not yet valid certificate in kube-apiserver logs."""

LOG_PATTERN = re.compile("x509: certificate has expired or is not yet valid.*")

def analyze(self, log_analyzer) -> Optional[SignatureResult]:
path = f"{LOG_BUNDLE_PATH}/bootstrap/containers/bootstrap-control-plane/kube-apiserver.log"
try:
logs = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
return None
invalid_api_log_lines = self.LOG_PATTERN.findall(logs)
if invalid_api_log_lines:
content = invalid_api_log_lines[0]
if (num_lines := len(invalid_api_log_lines)) > 1:
content += f"\nadditional {num_lines - 1} similar error log lines found"
return self.create_result(
title="Expired Certificate",
content=content,
severity="error",
)
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
ApiInvalidCertificateSignature for OpenShift Assisted Installer logs.
"""

import logging
import re
from typing import Optional

from .base import ErrorSignature, SignatureResult

logger = logging.getLogger(__name__)


class ApiInvalidCertificateSignature(ErrorSignature):
"""Detect invalid SAN values on certificate for AI API from controller logs."""

LOG_PATTERN = re.compile(
'time=".*" level=error msg=".*x509: certificate is valid.* not .*'
)
Comment on lines +17 to +19
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

Verify the regex pattern is complete.

The LOG_PATTERN regex appears to be missing a closing quote or pattern marker. The pattern ends with 'time=".*" level=error msg=".*x509: certificate is valid.* not .*' but typically x509 certificate error messages have more structure.

Please verify this pattern matches the actual log format. Compare with similar patterns in the codebase (e.g., ApiExpiredCertificateSignature) to ensure consistency.


🏁 Script executed:

#!/bin/bash
# Search for similar x509 certificate error patterns in signature files
rg -n -A2 -B2 'x509.*certificate' --type=py -g 'signatures/*.py'

Length of output: 148



🏁 Script executed:

#!/bin/bash
# Find the target file and similar signature files
fd 'signature' -e py -p 'log_analyzer' 2>/dev/null | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Also search more broadly for files containing 'x509' in the codebase
rg -l 'x509' --type=py

Length of output: 183



🏁 Script executed:

#!/bin/bash
# Read the target file
cat assisted_service_mcp/src/utils/log_analyzer/signatures/api_invalid_certificate_signature.py

Length of output: 1243


🏁 Script executed:

#!/bin/bash
# Read the comparison file for similar pattern
cat assisted_service_mcp/src/utils/log_analyzer/signatures/api_expired_certificate_signature.py

Length of output: 1307


The regex pattern is missing a closing quote for the message field.

The pattern 'time=".*" level=error msg=".*x509: certificate is valid.* not .*' opens the message field with msg=" but never closes it. While the Python string syntax is valid, the regex pattern itself is structurally incomplete. It should include a closing quote to properly match the log structure:

LOG_PATTERN = re.compile(
    'time=".*" level=error msg=".*x509: certificate is valid.* not .*"'
)

Compare this to api_expired_certificate_signature.py which uses a simpler pattern; the invalid certificate pattern requires closing the message field quote to properly validate against actual log output.

🤖 Prompt for AI Agents
In
assisted_service_mcp/src/utils/log_analyzer/signatures/api_invalid_certificate_signature.py
around lines 17 to 19, the regex pattern opens the msg=" field but never closes
it; update the LOG_PATTERN string to include the closing double-quote after the
final .* (i.e. end the regex with ... not .*") so the pattern matches the full
msg="..."; ensure the string quoting in Python remains correct (escape if
needed) and run tests to verify it matches the intended log lines.


def analyze(self, log_analyzer) -> Optional[SignatureResult]:
try:
controller_logs = log_analyzer.get_controller_logs()
except FileNotFoundError:
return None

invalid_api_log_lines = self.LOG_PATTERN.findall(controller_logs)
if invalid_api_log_lines:
shown = invalid_api_log_lines[:5]
more = len(invalid_api_log_lines) - len(shown)
content = "\n".join(shown)
if more > 0:
content += f"\nadditional {more} similar error log lines found"
return self.create_result(
title="Invalid SAN values on certificate for AI API",
content=content,
severity="error",
)
return None
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""
Basic information and status signature analysis.
These signatures provide fundamental information about the cluster and installation.
ComponentsVersionSignature for OpenShift Assisted Installer logs.
"""

import logging
Expand All @@ -18,15 +17,14 @@ def analyze(self, log_analyzer) -> Optional[SignatureResult]:
"""Analyze component versions."""
try:
metadata = log_analyzer.metadata
cluster_md = metadata.get("cluster", {})

content_lines = []

release_tag = metadata.get("release_tag") or cluster_md.get("release_tag")
release_tag = metadata.get("release_tag")
if release_tag:
content_lines.append(f"Release tag: {release_tag}")

versions = metadata.get("versions") or cluster_md.get("versions")
versions = metadata.get("versions")
if versions:
if "assisted-installer" in versions:
content_lines.append(
Expand Down
Loading