Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus workload loader #266

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4fedd82
Add multiple workload loaders, refactor kubeapi workload loader
LeaveMyYard Apr 22, 2024
c7ad1cd
Moved the logic from #93 for a new refined structure
LeaveMyYard Apr 22, 2024
bf90978
Implement remaining kinds in prometheus workload loader
LeaveMyYard Apr 24, 2024
7cd0c59
Filter Cronjob-created jobs from display
LeaveMyYard Apr 24, 2024
4044b4a
Fix cluster selector
LeaveMyYard Apr 25, 2024
b9a62a0
Minor bug fix
LeaveMyYard Apr 25, 2024
7e8f1f4
BaseClusterLoader, class structure change, not finished
LeaveMyYard Apr 29, 2024
4c1f5c9
Finished structure changes and workload loaders
LeaveMyYard Apr 30, 2024
7124c80
PrometeusClusterLoader.list_clusters implementation
LeaveMyYard Apr 30, 2024
d1ad17d
Minor additional logging improvements
LeaveMyYard Apr 30, 2024
f7d8412
Minor debug comment
LeaveMyYard Apr 30, 2024
09c372b
Merge branch 'main' into prometheus-workload-loader
LeaveMyYard Apr 30, 2024
59cc29d
Fix prometheus auto-discovery
LeaveMyYard Apr 30, 2024
d4adcf8
Merge branch 'prometheus-workload-loader' of https://github.com/robus…
LeaveMyYard Apr 30, 2024
eb84c95
Logging improvement
LeaveMyYard Apr 30, 2024
e350084
Add HPA detection for prometheus mode
LeaveMyYard May 2, 2024
d4e09b0
Fix tests
LeaveMyYard May 2, 2024
7b6be35
Rework ckyster selector for prometheus mode
LeaveMyYard May 2, 2024
dce207f
Remove test raise
LeaveMyYard May 2, 2024
43ffb7f
Fix HPAKey
LeaveMyYard May 3, 2024
2403898
One more HPAKey fix
LeaveMyYard May 3, 2024
73eb5f3
Deprecate --prometheus-cluster-label
aantn Jun 14, 2024
24af7f5
Bug fix - thank you @deutschj
aantn Jun 14, 2024
9fda8be
add TODO
aantn Jun 28, 2024
f71abd1
Fix ArgoRollouts (#308)
aantn Jul 1, 2024
66389e6
Prevent single errors from failing scan (#307)
aantn Jul 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ If your Prometheus monitors multiple clusters we require the label you defined f
For example, if your cluster has the Prometheus label `cluster: "my-cluster-name"`, then run this command:

```sh
krr.py simple --prometheus-label cluster -l my-cluster-name
krr.py simple --prometheus-cluster-key cluster -l my-cluster-name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aantn I think this requires a change on the robusta-runner as well
see here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arikalon1 it should be fine. I'm not deprecating --prometheus-label, just adding another option with a name that makes more sense.

I did deprecate --prometheus-cluster-label but the runner doesn't pass that by default.

```

You may also need the `-p` flag to explicitly give Prometheus' URL.
Expand Down
4 changes: 2 additions & 2 deletions examples/custom_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pydantic as pd

import robusta_krr
from robusta_krr.api.models import K8sObjectData, MetricsPodData, ResourceRecommendation, ResourceType, RunResult
from robusta_krr.api.models import K8sWorkload, MetricsPodData, ResourceRecommendation, ResourceType, RunResult
from robusta_krr.api.strategies import BaseStrategy, StrategySettings
from robusta_krr.core.integrations.prometheus.metrics import MaxMemoryLoader, PercentileCPULoader

Expand All @@ -24,7 +24,7 @@ class CustomStrategy(BaseStrategy[CustomStrategySettings]):
rich_console = True # Whether to use rich console for the CLI
metrics = [PercentileCPULoader(90), MaxMemoryLoader] # The metrics to use for the strategy

def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
def run(self, history_data: MetricsPodData, object_data: K8sWorkload) -> RunResult:
return {
ResourceType.CPU: ResourceRecommendation(request=self.settings.param_1, limit=None),
ResourceType.Memory: ResourceRecommendation(request=self.settings.param_2, limit=self.settings.param_2),
Expand Down
4 changes: 2 additions & 2 deletions robusta_krr/api/models.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from robusta_krr.core.abstract.strategies import MetricsPodData, PodsTimeData, ResourceRecommendation, RunResult
from robusta_krr.core.models.allocations import RecommendationValue, ResourceAllocations, ResourceType
from robusta_krr.core.models.objects import K8sObjectData, PodData
from robusta_krr.core.models.objects import K8sWorkload, PodData
from robusta_krr.core.models.result import ResourceScan, Result
from robusta_krr.core.models.severity import Severity, register_severity_calculator

__all__ = [
"ResourceType",
"ResourceAllocations",
"RecommendationValue",
"K8sObjectData",
"K8sWorkload",
"PodData",
"Result",
"Severity",
Expand Down
46 changes: 46 additions & 0 deletions robusta_krr/core/abstract/cluster_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

import abc
import logging
from typing import Optional, TYPE_CHECKING

from .workload_loader import BaseWorkloadLoader

if TYPE_CHECKING:
from robusta_krr.core.integrations.prometheus.connector import PrometheusConnector


logger = logging.getLogger("krr")


class BaseClusterLoader(abc.ABC):
"""
A class that wraps loading data from multiple clusters.
For example, a centralized prometheus server that can query multiple clusters.
Or one kubeconfig can define connections to multiple clusters.
"""

@abc.abstractmethod
async def list_clusters(self) -> Optional[list[str]]:
pass

@abc.abstractmethod
def get_workload_loader(self, cluster: Optional[str]) -> BaseWorkloadLoader:
pass

def try_get_workload_loader(self, cluster: Optional[str]) -> Optional[BaseWorkloadLoader]:
try:
return self.get_workload_loader(cluster)
except Exception as e:
logger.error(f"Could not connect to cluster {cluster} and will skip it: {e}")
return None

@abc.abstractmethod
def get_prometheus(self, cluster: Optional[str]) -> PrometheusConnector:
"""
Connect to a Prometheus server and return a PrometheusConnector instance.
Cluster = None means that prometheus is the only one: either centralized or in-cluster.
raise prometrix.PrometheusNotFound if Prometheus is not available.
"""

pass
4 changes: 2 additions & 2 deletions robusta_krr/core/abstract/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod

from robusta_krr.core.abstract.strategies import PodsTimeData
from robusta_krr.core.models.objects import K8sObjectData
from robusta_krr.core.models.objects import K8sWorkload


class BaseMetric(ABC):
Expand All @@ -16,6 +16,6 @@ class BaseMetric(ABC):

@abstractmethod
async def load_data(
self, object: K8sObjectData, period: datetime.timedelta, step: datetime.timedelta
self, object: K8sWorkload, period: datetime.timedelta, step: datetime.timedelta
) -> PodsTimeData:
...
8 changes: 4 additions & 4 deletions robusta_krr/core/abstract/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
import pydantic as pd
from numpy.typing import NDArray

from robusta_krr.core.models.result import K8sObjectData, ResourceType
from robusta_krr.core.models.result import K8sWorkload, ResourceType

if TYPE_CHECKING:
from robusta_krr.core.abstract.metrics import BaseMetric # noqa: F401
from robusta_krr.core.integrations.prometheus.metrics import PrometheusMetric

SelfRR = TypeVar("SelfRR", bound="ResourceRecommendation")


# TODO: rename so it isn't the same name as ResourceRecommendation in result.py
class ResourceRecommendation(pd.BaseModel):
"""A class to represent resource recommendation with optional request and limit values.

Expand Down Expand Up @@ -133,7 +133,7 @@ def description(self) -> Optional[str]:
# Abstract method that needs to be implemented by subclass.
# This method is intended to calculate resource recommendation based on history data and kubernetes object data.
@abc.abstractmethod
def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
def run(self, history_data: MetricsPodData, object_data: K8sWorkload) -> RunResult:
pass

# This method is intended to return a strategy by its name.
Expand Down Expand Up @@ -167,6 +167,6 @@ def get_settings_type(cls) -> type[StrategySettings]:
"StrategySettings",
"PodsTimeData",
"MetricsPodData",
"K8sObjectData",
"K8sWorkload",
"ResourceType",
]
23 changes: 23 additions & 0 deletions robusta_krr/core/abstract/workload_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import abc
import logging

from robusta_krr.core.models.objects import K8sWorkload, PodData


logger = logging.getLogger("krr")


class BaseWorkloadLoader(abc.ABC):
"""A base class for single cluster workload loaders."""

@abc.abstractmethod
async def list_workloads(self) -> list[K8sWorkload]:
pass


class IListPodsFallback(abc.ABC):
"""This is an interface that a workload loader can implement to have a fallback method to list pods."""

@abc.abstractmethod
async def load_pods(self, object: K8sWorkload) -> list[PodData]:
pass
Loading
Loading