-
Notifications
You must be signed in to change notification settings - Fork 19
MGMT-21844: Enable running Signatures regardless of log bundle availability #128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
867a2a6
312c9e4
f3a3a3e
3b5f0d8
f68510b
d9472bd
1177fa1
52f6df6
b4faa94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| """ | ||
| Core log analyzer for OpenShift Assisted Installer logs. | ||
| Core analyzer for OpenShift Assisted Installer clusters and their logs. | ||
| """ | ||
|
|
||
| import json | ||
|
|
@@ -19,44 +19,45 @@ | |
| ) | ||
|
|
||
|
|
||
| class LogAnalyzer: | ||
| """Analyzer for OpenShift Assisted Installer logs.""" | ||
|
|
||
| _metadata: dict[str, Any] | None | ||
|
|
||
| def __init__(self, logs_archive: nestedarchive.RemoteNestedArchive): | ||
| """ | ||
| Initialize the log analyzer. | ||
| class ClusterAnalyzer: | ||
| """Analyzer for OpenShift Assisted Installer clusters.""" | ||
|
|
||
| Args: | ||
| logs_archive: RemoteNestedArchive containing the cluster logs | ||
| """ | ||
| self.logs_archive = logs_archive | ||
| def __init__(self): | ||
| self._metadata = None | ||
| self._cluster_events = None | ||
|
|
||
| def set_cluster_metadata(self, metadata: Dict[str, Any]): | ||
| """Set cluster metadata for analyzer.""" | ||
| if not metadata.get("cluster"): | ||
| # Wrap metadata in a "cluster" key to match the expected structure | ||
| metadata = {"cluster": metadata} | ||
| self._metadata = self._clean_metadata_json(metadata) | ||
|
|
||
| def set_cluster_events(self, events: List[Dict[str, Any]]): | ||
| """Set cluster events for analyzer.""" | ||
| self._cluster_events = events | ||
|
|
||
| @property | ||
| def metadata(self) -> Dict[str, Any] | None: | ||
| """Get cluster metadata.""" | ||
| if self._metadata is None: | ||
| try: | ||
| metadata_content = self.logs_archive.get("cluster_metadata.json") | ||
| raw_metadata = json.loads(cast(str | bytes, metadata_content)) | ||
|
|
||
| # The metadata file contains cluster information at the root level | ||
| # Wrap it in a "cluster" key to match the expected structure | ||
| wrapped_metadata = {"cluster": raw_metadata} | ||
| self._metadata = self._clean_metadata_json(wrapped_metadata) | ||
| except Exception as e: | ||
| logger.error("Failed to load metadata: %s", e) | ||
| raise | ||
| return self._metadata | ||
|
|
||
| @property | ||
| def cluster_events(self) -> List[Dict[str, Any]] | None: | ||
| """Get cluster events.""" | ||
| return self._cluster_events | ||
|
|
||
| def get_all_cluster_events(self) -> List[Dict[str, Any]]: | ||
| """Get all the cluster installation events.""" | ||
| if self._cluster_events is None: | ||
| return [] | ||
| return self._cluster_events | ||
|
|
||
| @staticmethod | ||
| def _clean_metadata_json(md: Dict[str, Any]) -> Dict[str, Any]: | ||
| """Clean metadata JSON by separating deleted hosts.""" | ||
| installation_start_time = dateutil.parser.isoparse( | ||
| md["cluster"]["install_started_at"] | ||
| str(md["cluster"]["install_started_at"]) | ||
| ) | ||
|
|
||
| def host_deleted_before_installation_started(host): | ||
|
|
@@ -87,21 +88,6 @@ def get_last_install_cluster_events(self) -> List[Dict[str, Any]]: | |
|
|
||
| return events | ||
|
|
||
| def get_all_cluster_events(self) -> List[Dict[str, Any]]: | ||
| """Get all the cluster installation events.""" | ||
| if self._cluster_events is None: | ||
| try: | ||
| events_content = self.logs_archive.get("cluster_events.json") | ||
| all_events = json.loads(cast(str | bytes, events_content)) | ||
|
|
||
| # Get the last partition (latest installation attempt) | ||
| self._cluster_events = self.partition_cluster_events(all_events)[-1] | ||
| except Exception as e: | ||
| logger.error("Failed to load cluster events: %s", e) | ||
| self._cluster_events = [] | ||
|
|
||
| return self._cluster_events | ||
|
|
||
| @staticmethod | ||
| def partition_cluster_events( | ||
| events: List[Dict[str, Any]], | ||
|
|
@@ -131,6 +117,66 @@ def get_events_by_host(self) -> Dict[str, List[Dict[str, Any]]]: | |
| events_by_host[event["host_id"]].append(event) | ||
| return events_by_host | ||
|
|
||
| @staticmethod | ||
| def get_hostname(host: Dict[str, Any]) -> str: | ||
| """Extract hostname from host metadata.""" | ||
| hostname = host.get("requested_hostname") | ||
| if hostname: | ||
| return hostname | ||
|
|
||
| try: | ||
| inventory = json.loads(host["inventory"]) | ||
| return inventory["hostname"] | ||
| except (KeyError, json.JSONDecodeError): | ||
| return host.get("id", "unknown") | ||
|
|
||
|
|
||
| class LogAnalyzer(ClusterAnalyzer): | ||
| """Analyzer for OpenShift Assisted Installer logs.""" | ||
|
|
||
| _metadata: dict[str, Any] | None | ||
|
|
||
| def __init__(self, logs_archive: nestedarchive.RemoteNestedArchive): | ||
| """ | ||
| Initialize the log analyzer. | ||
|
|
||
| Args: | ||
| logs_archive: RemoteNestedArchive containing the cluster logs | ||
| """ | ||
| super().__init__() | ||
| self.logs_archive = logs_archive | ||
|
|
||
| @property | ||
| def metadata(self) -> Dict[str, Any] | None: | ||
| """Get cluster metadata.""" | ||
| if self._metadata is None: | ||
| try: | ||
| metadata_content = self.logs_archive.get("cluster_metadata.json") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit strange to me that we now sometimes get the metadata from the API and other times get it from the log bundle. Did you consider making this consistent (I guess just always getting it from the API)? I could see a situation where if the logs are available we're actually acting on old data where if they are not then we get the data from the live cluster. Also it's a bit odd that the user of the
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then a follow on from this would be that maybe we don't need separate classes at all. If the behavior is the same for getting the data (cluster and events) the Or even better the signature could check
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes that's exactly it, the data from the live cluster might differ from what's in the log bundle. I've considered making it toggle-able as in either choose to use the api data or the log bundle data if it's available for the cluster metadata/events. However, this would then need to be something that the model decides when calling the tool, and I'm not sure how to do that...
Sure we could do it this way instead too, I was just hesitant to pass in the api client to the analyzer since the client is used outside of it.
Yeah that works too, but it just requires more changes where we'd need to update all of the signatures that do require logs to do this check. |
||
| raw_metadata = json.loads(cast(str | bytes, metadata_content)) | ||
|
|
||
| # The metadata file contains cluster information at the root level | ||
| # Wrap it in a "cluster" key to match the expected structure | ||
| wrapped_metadata = {"cluster": raw_metadata} | ||
| self._metadata = self._clean_metadata_json(wrapped_metadata) | ||
| except Exception as e: | ||
| logger.error("Failed to load metadata: %s", e) | ||
| raise | ||
| return self._metadata | ||
|
|
||
| def get_all_cluster_events(self) -> List[Dict[str, Any]]: | ||
| """Get all the cluster installation events.""" | ||
| if self._cluster_events is None: | ||
| try: | ||
| events_content = self.logs_archive.get("cluster_events.json") | ||
| all_events = json.loads(cast(str | bytes, events_content)) | ||
|
|
||
| self._cluster_events = all_events | ||
| except Exception as e: | ||
| logger.error("Failed to load cluster events: %s", e) | ||
| self._cluster_events = [] | ||
|
|
||
| return self._cluster_events | ||
|
|
||
| def get_host_log_file(self, host_id: str, filename: str) -> str: | ||
| """ | ||
| Get a specific log file for a host. | ||
|
|
@@ -213,16 +259,3 @@ def get_must_gather(self) -> bytes: | |
| "controller_logs.tar.gz/must-gather.tar.gz", mode="rb" | ||
| ), | ||
| ) | ||
|
|
||
| @staticmethod | ||
| def get_hostname(host: Dict[str, Any]) -> str: | ||
| """Extract hostname from host metadata.""" | ||
| hostname = host.get("requested_hostname") | ||
| if hostname: | ||
| return hostname | ||
|
|
||
| try: | ||
| inventory = json.loads(host["inventory"]) | ||
| return inventory["hostname"] | ||
| except (KeyError, json.JSONDecodeError): | ||
| return host.get("id", "unknown") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,12 +3,19 @@ | |
| Main entry point for the OpenShift Assisted Installer Log Analyzer. | ||
| """ | ||
| import logging | ||
| import json | ||
| from typing import List, Optional | ||
|
|
||
| from assisted_service_mcp.src.service_client.assisted_service_api import InventoryClient | ||
|
|
||
| from .log_analyzer import LogAnalyzer | ||
| from .signatures import ALL_SIGNATURES, SignatureResult | ||
| from assisted_service_mcp.src.utils.log_analyzer.log_analyzer import ( | ||
| ClusterAnalyzer, | ||
| LogAnalyzer, | ||
| ) | ||
| from assisted_service_mcp.src.utils.log_analyzer.signatures import ( | ||
| ALL_SIGNATURES, | ||
| SignatureResult, | ||
| Signature, | ||
| ) | ||
|
|
||
|
|
||
| async def analyze_cluster( | ||
|
|
@@ -17,11 +24,11 @@ async def analyze_cluster( | |
| specific_signatures: Optional[List[str]] = None, | ||
| ) -> List[SignatureResult]: | ||
| """ | ||
| Analyze a cluster's logs. | ||
| Analyze a cluster by using a signature analysis tool to detect common issues and errors. | ||
|
|
||
| Args: | ||
| cluster_id: UUID of the cluster to analyze | ||
| api_client: Client to fetch log files with | ||
| api_client: Client to fetch the cluster and its log files with | ||
| specific_signatures: List of specific signature names to run (None for all) | ||
|
|
||
| Returns: | ||
|
|
@@ -33,30 +40,52 @@ async def analyze_cluster( | |
| logger.info("Analyzing cluster: %s", cluster_id) | ||
|
|
||
| try: | ||
| # Download logs | ||
| logs_archive = await api_client.get_cluster_logs(cluster_id) | ||
| # first call the api to get the cluster and check if logs are available | ||
| cluster = await api_client.get_cluster(cluster_id) | ||
|
|
||
| if cluster.logs_info != "completed": | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's change this to actually check if we can download the logs. I say this because when you reset a cluster the |
||
| logger.info( | ||
| "Logs are not available for cluster: %s\nDefaulting to signatures that don't require logs", | ||
| cluster_id, | ||
| ) | ||
|
|
||
| analyzer = ClusterAnalyzer() | ||
|
|
||
| # Call events API to get the events and set the events in the analyzer | ||
| events = await api_client.get_events(cluster_id) | ||
| analyzer.set_cluster_events(json.loads(events)) | ||
|
|
||
| # Set the cluster metadata in the analyzer | ||
| analyzer.set_cluster_metadata(cluster.to_dict()) | ||
|
|
||
| # Select signatures that don't require logs | ||
| signatures_to_run = [ | ||
| sig for sig in ALL_SIGNATURES if sig.logs_required is False | ||
| ] | ||
|
|
||
| # Initialize log analyzer | ||
| log_analyzer = LogAnalyzer(logs_archive) | ||
| else: | ||
| # Download logs | ||
| logs_archive = await api_client.get_cluster_logs(cluster_id) | ||
|
|
||
| # Determine which signatures to run | ||
| signatures_to_run = ALL_SIGNATURES | ||
| # Initialize log analyzer | ||
| analyzer = LogAnalyzer(logs_archive) | ||
|
|
||
| # Add all signatures to the list to run | ||
| signatures_to_run = ALL_SIGNATURES | ||
|
|
||
| # If specific signatures are provided, filter the signatures to run | ||
| if specific_signatures: | ||
| signature_classes = {sig.__name__: sig for sig in ALL_SIGNATURES} | ||
| signatures_to_run = [] | ||
| for sig_name in specific_signatures: | ||
| if sig_name in signature_classes: | ||
| signatures_to_run.append(signature_classes[sig_name]) | ||
| else: | ||
| logger.warning("Unknown signature: %s", sig_name) | ||
| signatures_to_run = filter_signatures( | ||
| signatures_to_run, specific_signatures | ||
| ) | ||
|
Comment on lines
+43
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add warning when user-requested signatures are skipped due to log unavailability. When logs are unavailable, the code filters to signatures where Consider adding a warning after line 80: # If specific signatures are provided, filter the signatures to run
if specific_signatures:
+ # Track which signatures were filtered out
+ before_filter = set(sig.__name__ for sig in signatures_to_run)
signatures_to_run = filter_signatures(
signatures_to_run, specific_signatures
)
+ after_filter = set(sig.__name__ for sig in signatures_to_run)
+ skipped = set(specific_signatures) - after_filter
+ if skipped and cluster.logs_info != "completed":
+ logger.warning(
+ "Skipped signatures requiring logs (logs unavailable): %s",
+ ", ".join(sorted(skipped))
+ )🤖 Prompt for AI Agents |
||
|
|
||
| # Run signatures | ||
| results = [] | ||
| for signature_class in signatures_to_run: | ||
| logger.debug("Running signature: %s", signature_class.__name__) | ||
| try: | ||
| signature = signature_class() | ||
| result = signature.analyze(log_analyzer) | ||
| result = signature.analyze(analyzer) | ||
| if result: | ||
| results.append(result) | ||
| except Exception as e: | ||
|
|
@@ -71,6 +100,21 @@ async def analyze_cluster( | |
| raise | ||
|
|
||
|
|
||
| def filter_signatures( | ||
| signatures: List[type[Signature]], specific_signatures: List[str] | ||
| ) -> List[type[Signature]]: | ||
| """Filter signatures to run based on specific signatures.""" | ||
| logger = logging.getLogger(__name__) | ||
| signature_classes = {sig.__name__: sig for sig in signatures} | ||
| filtered_signatures = [] | ||
| for sig_name in specific_signatures: | ||
| if sig_name in signature_classes: | ||
| filtered_signatures.append(signature_classes[sig_name]) | ||
| else: | ||
| logger.warning("Unknown signature: %s", sig_name) | ||
| return filtered_signatures | ||
|
|
||
|
|
||
| def print_results(results: List[SignatureResult]) -> None: | ||
| """Print analysis results to stdout.""" | ||
| if not results: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.