Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 5 additions & 38 deletions assisted_service_mcp/src/utils/log_analyzer/log_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,15 @@

import json
import logging
from collections import defaultdict
from typing import Dict, List, Any, cast

import dateutil.parser
import nestedarchive

logger = logging.getLogger(__name__)

# Archive path constants for different log bundle formats
NEW_LOG_BUNDLE_PATH = "*_bootstrap_*.tar/*_bootstrap_*.tar.gz/logs_host_*/log-bundle-*.tar.gz/log-bundle-*"
OLD_LOG_BUNDLE_PATH = (
"*_bootstrap_*.tar.gz/logs_host_*/log-bundle-*.tar.gz/log-bundle-*"
)
# Archive path constant for log bundle format
LOG_BUNDLE_PATH = "*_bootstrap_*.tar/*_bootstrap_*.tar.gz/logs_host_*/log-bundle-*.tar.gz/log-bundle-*"


class LogAnalyzer:
Expand Down Expand Up @@ -123,14 +119,6 @@ def partition_cluster_events(

return partitions or [[]]

def get_events_by_host(self) -> Dict[str, List[Dict[str, Any]]]:
"""Get events grouped by host ID."""
events_by_host = defaultdict(list)
for event in self.get_last_install_cluster_events():
if "host_id" in event:
events_by_host[event["host_id"]].append(event)
return events_by_host

def get_host_log_file(self, host_id: str, filename: str) -> str:
"""
Get a specific log file for a host.
Expand Down Expand Up @@ -179,21 +167,9 @@ def get_journal_log(self, host_ip: str, journal_file: str, **kwargs) -> str:
Raises:
FileNotFoundError: If the journal file cannot be found
"""
new_logs_path = (
f"{NEW_LOG_BUNDLE_PATH}/control-plane/{host_ip}/journals/{journal_file}"
)
try:
content = self.logs_archive.get(new_logs_path, **kwargs)
logger.debug("Found journal under new location: %s", new_logs_path)
return cast(str, content)
except FileNotFoundError:
pass

old_logs_path = (
f"{OLD_LOG_BUNDLE_PATH}/control-plane/{host_ip}/journals/{journal_file}"
)
content = self.logs_archive.get(old_logs_path, **kwargs)
logger.debug("Found journal under old location: %s", old_logs_path)
logs_path = f"{LOG_BUNDLE_PATH}/control-plane/{host_ip}/journals/{journal_file}"
content = self.logs_archive.get(logs_path, **kwargs)
logger.debug("Found journal: %s", logs_path)
return cast(str, content)

def get_controller_logs(self) -> str:
Expand All @@ -205,15 +181,6 @@ def get_controller_logs(self) -> str:
),
)

def get_must_gather(self) -> bytes:
"""Get must-gather logs."""
return cast(
bytes,
self.logs_archive.get(
"controller_logs.tar.gz/must-gather.tar.gz", mode="rb"
),
)

@staticmethod
def get_hostname(host: Dict[str, Any]) -> str:
"""Extract hostname from host metadata."""
Expand Down
14 changes: 0 additions & 14 deletions assisted_service_mcp/src/utils/log_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,3 @@ async def analyze_cluster(
except Exception as e:
logger.error("Error analyzing cluster %s: %s", cluster_id, e)
raise


def print_results(results: List[SignatureResult]) -> None:
"""Print analysis results to stdout."""
if not results:
print("No issues found in the cluster logs.")
return

print("OpenShift Assisted Installer Log Analysis")
print("=" * 50)
print()

for result in results:
print(result)
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
from typing import Any, Generator, Optional, Callable, List, Dict

from assisted_service_mcp.src.utils.log_analyzer.log_analyzer import (
NEW_LOG_BUNDLE_PATH,
OLD_LOG_BUNDLE_PATH,
LOG_BUNDLE_PATH,
)

from .base import Signature, SignatureResult
Expand Down Expand Up @@ -228,48 +227,44 @@ def analyze(self, log_analyzer) -> Optional[SignatureResult]:
return None
if bootstrap[0]["progress"]["current_stage"] != "Waiting for controller":
return None
for base in (NEW_LOG_BUNDLE_PATH, OLD_LOG_BUNDLE_PATH):
path = f"{base}/resources/pods.json"
try:
pods_json = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
continue
try:
pods = json.loads(pods_json)
controller_pod = [
pod
for pod in pods.get("items", [])
if pod.get("metadata", {}).get("namespace") == "assisted-installer"
][0]
except Exception:
continue
try:
ready = [
condition.get("status") == "True"
for condition in controller_pod.get("status", {}).get(
"conditions", {}
)
if condition.get("type") == "Ready"
][0]
except Exception:
ready = False
conditions_tbl = self.generate_table(
controller_pod.get("status", {}).get("conditions", [])
)
containers_tbl = self.generate_table(
controller_pod.get("status", {}).get("containerStatuses", [])
)
content = (
f"The controller pod {'is' if ready else 'is not'} ready.\n"
f"Conditions:\n{conditions_tbl}\n\nContainer Statuses:\n{containers_tbl}"
)
return SignatureResult(
signature_name=self.name,
title="Assisted Installer Controller failed to start",
content=content,
severity="warning",
)
return None
path = f"{LOG_BUNDLE_PATH}/resources/pods.json"
try:
pods_json = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
return None
try:
pods = json.loads(pods_json)
controller_pod = [
pod
for pod in pods.get("items", [])
if pod.get("metadata", {}).get("namespace") == "assisted-installer"
][0]
except Exception:
return None
try:
ready = [
condition.get("status") == "True"
for condition in controller_pod.get("status", {}).get("conditions", {})
if condition.get("type") == "Ready"
][0]
except Exception:
ready = False
conditions_tbl = self.generate_table(
controller_pod.get("status", {}).get("conditions", [])
)
containers_tbl = self.generate_table(
controller_pod.get("status", {}).get("containerStatuses", [])
)
content = (
f"The controller pod {'is' if ready else 'is not'} ready.\n"
f"Conditions:\n{conditions_tbl}\n\nContainer Statuses:\n{containers_tbl}"
)
return SignatureResult(
signature_name=self.name,
title="Assisted Installer Controller failed to start",
content=content,
severity="warning",
)


class MachineConfigDaemonErrorExtracting(Signature):
Expand All @@ -281,23 +276,20 @@ class MachineConfigDaemonErrorExtracting(Signature):
)

def analyze(self, log_analyzer) -> Optional[SignatureResult]:
for base in (NEW_LOG_BUNDLE_PATH, OLD_LOG_BUNDLE_PATH):
path = (
f"{base}/control-plane/*/journals/machine-config-daemon-firstboot.log"
path = f"{LOG_BUNDLE_PATH}/control-plane/*/journals/machine-config-daemon-firstboot.log"
try:
mcd_logs = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
return None
if self.mco_error.search(mcd_logs):
return SignatureResult(
signature_name=self.name,
title="machine-config-daemon could not extract machine-os-content",
content=(
"machine-config-daemon-firstboot logs indicate a node may be hitting OCPBUGS-5352"
),
severity="warning",
)
try:
mcd_logs = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
continue
if self.mco_error.search(mcd_logs):
return SignatureResult(
signature_name=self.name,
title="machine-config-daemon could not extract machine-os-content",
content=(
"machine-config-daemon-firstboot logs indicate a node may be hitting OCPBUGS-5352"
),
severity="warning",
)
return None


Expand Down Expand Up @@ -372,18 +364,18 @@ def _get_host_directories(self, log_analyzer) -> List[Dict[str, str]]:
host_dirs.append(
{
"host_id": "bootstrap",
"kubelet_path": f"{NEW_LOG_BUNDLE_PATH}/bootstrap/journals/kubelet.log",
"containers_path": f"{NEW_LOG_BUNDLE_PATH}/bootstrap/containers/",
"kubelet_path": f"{LOG_BUNDLE_PATH}/bootstrap/journals/kubelet.log",
"containers_path": f"{LOG_BUNDLE_PATH}/bootstrap/containers/",
}
)

# Add control-plane directories
try:
control_plane_dir = log_analyzer.logs_archive.get(
f"{NEW_LOG_BUNDLE_PATH}/control-plane/"
f"{LOG_BUNDLE_PATH}/control-plane/"
)
logger.debug(
"Found control-plane directory: %s/control-plane/", NEW_LOG_BUNDLE_PATH
"Found control-plane directory: %s/control-plane/", LOG_BUNDLE_PATH
)

for node_dir in self.archive_dir_contents(control_plane_dir):
Expand All @@ -393,8 +385,8 @@ def _get_host_directories(self, log_analyzer) -> List[Dict[str, str]]:
host_dirs.append(
{
"host_id": node_ip,
"kubelet_path": f"{NEW_LOG_BUNDLE_PATH}/control-plane/{node_ip}/journals/kubelet.log",
"containers_path": f"{NEW_LOG_BUNDLE_PATH}/control-plane/{node_ip}/containers/",
"kubelet_path": f"{LOG_BUNDLE_PATH}/control-plane/{node_ip}/journals/kubelet.log",
"containers_path": f"{LOG_BUNDLE_PATH}/control-plane/{node_ip}/containers/",
}
)
except FileNotFoundError as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import logging
from typing import Optional, Any, Sequence

import dateutil.parser
from tabulate import tabulate

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,14 +73,6 @@ def generate_table(data: Sequence[dict[str, Any]]) -> str:
return "No data available"
return tabulate(data, headers="keys", tablefmt="grid")

@staticmethod
def format_time(time_str: str) -> str:
"""Format time string for display."""
try:
return dateutil.parser.isoparse(time_str).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return time_str

@staticmethod
def archive_dir_contents(archive_dir):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

import yaml
from assisted_service_mcp.src.utils.log_analyzer.log_analyzer import (
NEW_LOG_BUNDLE_PATH,
OLD_LOG_BUNDLE_PATH,
LOG_BUNDLE_PATH,
)

from .base import ErrorSignature, SignatureResult
Expand All @@ -22,17 +21,6 @@
# pylint: disable=duplicate-code


def _search_patterns_in_string(string, patterns):
"""Utility function to search for patterns in a string."""
if isinstance(patterns, str):
patterns = [patterns]

combined_regex = re.compile(
f'({"|".join(fr".*{pattern}.*" for pattern in patterns)})'
)
return combined_regex.findall(string)


class SNOHostnameHasEtcd(ErrorSignature):
"""Looks for etcd in SNO hostname (OCPBUGS-15852)."""

Expand Down Expand Up @@ -96,25 +84,21 @@ class ApiExpiredCertificateSignature(ErrorSignature):
LOG_PATTERN = re.compile("x509: certificate has expired or is not yet valid.*")

def analyze(self, log_analyzer) -> Optional[SignatureResult]:
new_logs_path = f"{NEW_LOG_BUNDLE_PATH}/bootstrap/containers/bootstrap-control-plane/kube-apiserver.log"
old_logs_path = f"{OLD_LOG_BUNDLE_PATH}/bootstrap/containers/bootstrap-control-plane/kube-apiserver.log"
for path in (new_logs_path, old_logs_path):
try:
logs = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
continue
invalid_api_log_lines = self.LOG_PATTERN.findall(logs)
if invalid_api_log_lines:
content = invalid_api_log_lines[0]
if (num_lines := len(invalid_api_log_lines)) > 1:
content += (
f"\nadditional {num_lines - 1} similar error log lines found"
)
return self.create_result(
title="Expired Certificate",
content=content,
severity="error",
)
path = f"{LOG_BUNDLE_PATH}/bootstrap/containers/bootstrap-control-plane/kube-apiserver.log"
try:
logs = log_analyzer.logs_archive.get(path)
except FileNotFoundError:
return None
invalid_api_log_lines = self.LOG_PATTERN.findall(logs)
if invalid_api_log_lines:
content = invalid_api_log_lines[0]
if (num_lines := len(invalid_api_log_lines)) > 1:
content += f"\nadditional {num_lines - 1} similar error log lines found"
return self.create_result(
title="Expired Certificate",
content=content,
severity="error",
)
return None


Expand Down
Loading