diff --git a/cli/cli/command_framework/highlevel/engine_consuming_kurtosis_command/engine_consuming_kurtosis_command.go b/cli/cli/command_framework/highlevel/engine_consuming_kurtosis_command/engine_consuming_kurtosis_command.go index d17f47e2db..22cc10cf09 100644 --- a/cli/cli/command_framework/highlevel/engine_consuming_kurtosis_command/engine_consuming_kurtosis_command.go +++ b/cli/cli/command_framework/highlevel/engine_consuming_kurtosis_command/engine_consuming_kurtosis_command.go @@ -187,7 +187,7 @@ func (cmd *EngineConsumingKurtosisCommand) getSetupFunc() func(context.Context) kurtosisBackend := engineManager.GetKurtosisBackend() dontRestartAPIContainers := false - engineClient, closeClientFunc, err := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod) + engineClient, closeClientFunc, err := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod, defaults.DefaultLogsSinks) if err != nil { return nil, stacktrace.Propagate(err, "An error occurred creating a new Kurtosis engine client") } diff --git a/cli/cli/command_str_consts/command_str_consts.go b/cli/cli/command_str_consts/command_str_consts.go index b1b9246692..7be1a73565 100644 --- a/cli/cli/command_str_consts/command_str_consts.go +++ b/cli/cli/command_str_consts/command_str_consts.go @@ -81,6 +81,9 @@ const ( GitHubLogoutCmdStr = "logout" GitHubTokenCmdStr = "token" GitHubStatusCmdStr = "status" + GraflokiCmdStr = "grafloki" + GraflokiStartCmdStr = "start" + GraflokiStopCmdStr = "stop" ) // TODO: added constant error message here, can we move to another file later. diff --git a/cli/cli/commands/cluster/set/set.go b/cli/cli/commands/cluster/set/set.go index 7227046ea8..73a451c913 100644 --- a/cli/cli/commands/cluster/set/set.go +++ b/cli/cli/commands/cluster/set/set.go @@ -124,7 +124,7 @@ func run(ctx context.Context, flags *flags.ParsedFlags, args *args.ParsedArgs) e // TODO - fix the idempotent starter longer term if engineStatus == engine_manager.EngineStatus_Stopped { dontRestartAPIContainers := false - _, engineClientCloseFunc, err := engineManagerNewCluster.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod) + _, engineClientCloseFunc, err := engineManagerNewCluster.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod, defaults.DefaultLogsSinks) if err != nil { return stacktrace.Propagate(err, "Engine could not be started after cluster was updated. Its status can be retrieved "+ "running 'kurtosis %s %s' and it can potentially be started running 'kurtosis %s %s'", diff --git a/cli/cli/commands/enclave/add/add.go b/cli/cli/commands/enclave/add/add.go index 7c51077de8..de00471a74 100644 --- a/cli/cli/commands/enclave/add/add.go +++ b/cli/cli/commands/enclave/add/add.go @@ -121,7 +121,7 @@ func run( return stacktrace.Propagate(err, "An error occurred creating an engine manager.") } - engineClient, closeClientFunc, err := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod) + engineClient, closeClientFunc, err := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, defaults.DefaultEngineLogLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod, defaults.DefaultLogsSinks) if err != nil { return stacktrace.Propagate(err, "An error occurred creating a new Kurtosis engine client") } diff --git a/cli/cli/commands/engine/restart/restart.go b/cli/cli/commands/engine/restart/restart.go index 2e0a97de57..b785008267 100644 --- a/cli/cli/commands/engine/restart/restart.go +++ b/cli/cli/commands/engine/restart/restart.go @@ -178,7 +178,7 @@ func run(_ context.Context, flags *flags.ParsedFlags, _ *args.ParsedArgs) error var engineClientCloseFunc func() error var restartEngineErr error - _, engineClientCloseFunc, restartEngineErr = engineManager.RestartEngineIdempotently(ctx, logLevel, engineVersion, restartEngineOnSameVersionIfAnyRunning, enclavePoolSize, shouldStartInDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, restartEngineErr = engineManager.RestartEngineIdempotently(ctx, logLevel, engineVersion, restartEngineOnSameVersionIfAnyRunning, enclavePoolSize, shouldStartInDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, defaults.DefaultLogsSinks) if restartEngineErr != nil { return stacktrace.Propagate(restartEngineErr, "An error occurred restarting the Kurtosis engine") } diff --git a/cli/cli/commands/engine/start/start.go b/cli/cli/commands/engine/start/start.go index a7369eda11..381439b45b 100644 --- a/cli/cli/commands/engine/start/start.go +++ b/cli/cli/commands/engine/start/start.go @@ -175,13 +175,13 @@ func run(_ context.Context, flags *flags.ParsedFlags, _ *args.ParsedArgs) error if engineVersion == defaultEngineVersion && isDebugMode { engineDebugVersion := fmt.Sprintf("%s-%s", kurtosis_version.KurtosisVersion, defaults.DefaultKurtosisContainerDebugImageNameSuffix) logrus.Infof("Starting Kurtosis engine in debug mode from image '%v%v%v'...", kurtosisTechEngineImagePrefix, imageVersionDelimiter, engineDebugVersion) - _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithCustomVersion(ctx, engineDebugVersion, logLevel, enclavePoolSize, true, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithCustomVersion(ctx, engineDebugVersion, logLevel, enclavePoolSize, true, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, defaults.DefaultLogsSinks) } else if engineVersion == defaultEngineVersion { logrus.Infof("Starting Kurtosis engine from image '%v%v%v'...", kurtosisTechEngineImagePrefix, imageVersionDelimiter, kurtosis_version.KurtosisVersion) - _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, logLevel, enclavePoolSize, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, logLevel, enclavePoolSize, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, defaults.DefaultLogsSinks) } else { logrus.Infof("Starting Kurtosis engine from image '%v%v%v'...", kurtosisTechEngineImagePrefix, imageVersionDelimiter, engineVersion) - _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithCustomVersion(ctx, engineVersion, logLevel, enclavePoolSize, defaults.DefaultEnableDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, startEngineErr = engineManager.StartEngineIdempotentlyWithCustomVersion(ctx, engineVersion, logLevel, enclavePoolSize, defaults.DefaultEnableDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, defaults.DefaultLogsSinks) } if startEngineErr != nil { return stacktrace.Propagate(startEngineErr, "An error occurred starting the Kurtosis engine") diff --git a/cli/cli/commands/github/login/login.go b/cli/cli/commands/github/login/login.go index 6ad37e2024..6c65bedfcf 100644 --- a/cli/cli/commands/github/login/login.go +++ b/cli/cli/commands/github/login/login.go @@ -71,7 +71,7 @@ func RestartEngineAfterGitHubAuth(ctx context.Context) error { var engineClientCloseFunc func() error var restartEngineErr error dontRestartAPIContainers := false - _, engineClientCloseFunc, restartEngineErr = engineManager.RestartEngineIdempotently(ctx, defaults.DefaultEngineLogLevel, defaultEngineVersion, restartEngineOnSameVersionIfAnyRunning, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultEnableDebugMode, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod) + _, engineClientCloseFunc, restartEngineErr = engineManager.RestartEngineIdempotently(ctx, defaults.DefaultEngineLogLevel, defaultEngineVersion, restartEngineOnSameVersionIfAnyRunning, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultEnableDebugMode, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod, defaults.DefaultLogsSinks) if restartEngineErr != nil { return stacktrace.Propagate(restartEngineErr, "An error occurred restarting the Kurtosis engine") } diff --git a/cli/cli/commands/grafloki/grafloki.go b/cli/cli/commands/grafloki/grafloki.go new file mode 100644 index 0000000000..d010e8868f --- /dev/null +++ b/cli/cli/commands/grafloki/grafloki.go @@ -0,0 +1,21 @@ +package grafloki + +import ( + "github.com/kurtosis-tech/kurtosis/cli/cli/command_str_consts" + "github.com/kurtosis-tech/kurtosis/cli/cli/commands/grafloki/start" + "github.com/kurtosis-tech/kurtosis/cli/cli/commands/grafloki/stop" + "github.com/spf13/cobra" +) + +// GraflokiCmd suppressing exhaustruct requirement because this struct has ~40 properties +// nolint: exhaustruct +var GraflokiCmd = &cobra.Command{ + Use: command_str_consts.GraflokiCmdStr, + Short: "Start Grafana/Loki for log collection", + RunE: nil, +} + +func init() { + GraflokiCmd.AddCommand(start.GraflokiStartCmd.MustGetCobraCommand()) + GraflokiCmd.AddCommand(stop.GraflokiStopCmd.MustGetCobraCommand()) +} diff --git a/cli/cli/commands/grafloki/start/start.go b/cli/cli/commands/grafloki/start/start.go new file mode 100644 index 0000000000..82d1e4643c --- /dev/null +++ b/cli/cli/commands/grafloki/start/start.go @@ -0,0 +1,124 @@ +package start + +import ( + "context" + "fmt" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel/args" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel/flags" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_str_consts" + "github.com/kurtosis-tech/kurtosis/cli/cli/defaults" + "github.com/kurtosis-tech/kurtosis/cli/cli/helpers/engine_manager" + "github.com/kurtosis-tech/kurtosis/cli/cli/helpers/grafloki" + "github.com/kurtosis-tech/kurtosis/cli/cli/helpers/kurtosis_config_getter" + "github.com/kurtosis-tech/kurtosis/cli/cli/kurtosis_config/resolved_config" + "github.com/kurtosis-tech/kurtosis/cli/cli/out" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface/objects/logs_aggregator" + "github.com/kurtosis-tech/stacktrace" + "github.com/sirupsen/logrus" +) + +const ( + defaultEngineVersion = "" + restartEngineOnSameVersionIfAnyRunning = true +) + +var GraflokiStartCmd = &lowlevel.LowlevelKurtosisCommand{ + CommandStr: command_str_consts.GraflokiStartCmdStr, + ShortDescription: "Starts a Grafana/Loki instance.", + LongDescription: "Starts a Grafana/Loki instance and configures Kurtosis engine to send logs to it.", + RunFunc: run, + Flags: nil, + Args: nil, + PostValidationAndRunFunc: nil, + PreValidationAndRunFunc: nil, +} + +func run( + ctx context.Context, + _ *flags.ParsedFlags, + _ *args.ParsedArgs, + +) error { + clusterConfig, err := kurtosis_config_getter.GetKurtosisClusterConfig() + if err != nil { + return stacktrace.Propagate(err, "An error occurred getting Kurtosis cluster config.") + } + + // NOTE(tedi 04/03/25): If you're wondering why the grafana / loki instance is being started by the CLI (and not in container-engine-lib via KurtosisBackend as with LogsCollector and LogsAggregator), here's why: + // 1. now that Kurtosis is purely OSS, it's important to reduce maintenance surface / complexity inside Kurtosis core (Engine, APIContainer, KurtosisBackend, Starlark Engine) + // wherever possible infra not essential to Kurtosis core should be built outside of it or at the edges (e.g. client) + // 2. the export logs feature was built in service of leveraging existing logging solutions/not rebuilding logging in Kurtosis + // 3. having grafloki started/managed by the CLI lets us build on the export logs feature + // In other words + // putting it in the CLI is saying - “You could set up Grafana and Loki yourself, and then restart the engine to point to it, Kurtosis CLI will do that for you to save you a step” + // putting it in Kurtosis core is saying - “Grafana and Loki are core a necessary part of the Kurtosis platform and supports the Kurtosis abstraction/value prop" - which is not the case + // https://drawpaintacademy.com/the-bull/ + var lokiHost string + var grafanaUrl string + switch clusterConfig.GetClusterType() { + case resolved_config.KurtosisClusterType_Docker: + lokiHost, grafanaUrl, err = grafloki.StartGrafLokiInDocker(ctx) + if err != nil { + return stacktrace.Propagate(err, "An error occurred starting Grafana and Loki in Docker.") + } + case resolved_config.KurtosisClusterType_Kubernetes: + lokiHost, grafanaUrl, err = grafloki.StartGrafLokiInKubernetes(ctx) + if err != nil { + return stacktrace.Propagate(err, "An error occurred starting Grafana and Loki in Kubernetes.") + } + default: + return stacktrace.NewError("Unsupported cluster type: %v", clusterConfig.GetClusterType().String()) + } + + // This matches the exact configurations here: https://vector.dev/docs/reference/configuration/sinks/loki/ + lokiSink := map[string]map[string]interface{}{ + "loki": { + "type": "loki", + "endpoint": lokiHost, + "encoding": map[string]string{ + "codec": "json", + }, + "labels": map[string]string{ + "job": "kurtosis", + }, + }, + } + + logrus.Infof("Configuring engine to send logs to Loki...") + err = restartEngineWithLogsSink(ctx, lokiSink) + if err != nil { + return stacktrace.Propagate(err, "An error occurred restarting engine to be configured to send logs to Loki.") + } + + out.PrintOutLn(fmt.Sprintf("Grafana running at %v", grafanaUrl)) + return nil +} + +func restartEngineWithLogsSink(ctx context.Context, sink logs_aggregator.Sinks) error { + engineManager, err := engine_manager.NewEngineManager(ctx) + if err != nil { + return stacktrace.Propagate(err, "An error occurred creating an engine manager.") + } + dontRestartAPIContainers := false + _, engineClientCloseFunc, restartEngineErr := engineManager.RestartEngineIdempotently(ctx, + defaults.DefaultEngineLogLevel, + defaultEngineVersion, + restartEngineOnSameVersionIfAnyRunning, + defaults.DefaultEngineEnclavePoolSize, + defaults.DefaultEnableDebugMode, + defaults.DefaultGitHubAuthTokenOverride, + dontRestartAPIContainers, + defaults.DefaultDomain, + defaults.DefaultLogRetentionPeriod, + sink) + if restartEngineErr != nil { + return stacktrace.Propagate(restartEngineErr, "An error occurred restarting the Kurtosis engine") + } + defer func() { + if err = engineClientCloseFunc(); err != nil { + logrus.Warnf("Error closing the engine client:\n'%v'", err) + } + }() + return nil +} diff --git a/cli/cli/commands/grafloki/stop/stop.go b/cli/cli/commands/grafloki/stop/stop.go new file mode 100644 index 0000000000..ca21c1d3da --- /dev/null +++ b/cli/cli/commands/grafloki/stop/stop.go @@ -0,0 +1,54 @@ +package stop + +import ( + "context" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel/args" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_framework/lowlevel/flags" + "github.com/kurtosis-tech/kurtosis/cli/cli/command_str_consts" + "github.com/kurtosis-tech/kurtosis/cli/cli/helpers/grafloki" + "github.com/kurtosis-tech/kurtosis/cli/cli/helpers/kurtosis_config_getter" + "github.com/kurtosis-tech/kurtosis/cli/cli/kurtosis_config/resolved_config" + "github.com/kurtosis-tech/kurtosis/cli/cli/out" + "github.com/kurtosis-tech/stacktrace" +) + +var GraflokiStopCmd = &lowlevel.LowlevelKurtosisCommand{ + CommandStr: command_str_consts.GraflokiStopCmdStr, + ShortDescription: "Stops a grafana/loki instance.", + LongDescription: "Stop a grafana/loki instance if one already exists.", + RunFunc: run, + Flags: nil, + Args: nil, + PreValidationAndRunFunc: nil, + PostValidationAndRunFunc: nil, +} + +func run( + ctx context.Context, + _ *flags.ParsedFlags, + _ *args.ParsedArgs, +) error { + clusterConfig, err := kurtosis_config_getter.GetKurtosisClusterConfig() + if err != nil { + return stacktrace.Propagate(err, "An error occurred getting Kurtosis cluster config.") + } + + switch clusterConfig.GetClusterType() { + case resolved_config.KurtosisClusterType_Docker: + err := grafloki.StopGrafLokiInDocker(ctx) + if err != nil { + return stacktrace.Propagate(err, "An error occurred stopping Grafana and Loki containers in Docker.") + } + case resolved_config.KurtosisClusterType_Kubernetes: + err := grafloki.StopGrafLokiInKubernetes(ctx) + if err != nil { + return stacktrace.Propagate(err, "An error occurred stopping Grafana and Loki containers in Kubernetes.") + } + default: + return stacktrace.NewError("Unsupported cluster type: %v", clusterConfig.GetClusterType().String()) + } + + out.PrintOutLn("Successfully stopped Grafana and Loki containers.") + return nil +} diff --git a/cli/cli/commands/kurtosis_context/set/set.go b/cli/cli/commands/kurtosis_context/set/set.go index ff13451ad8..5e7e468011 100644 --- a/cli/cli/commands/kurtosis_context/set/set.go +++ b/cli/cli/commands/kurtosis_context/set/set.go @@ -148,7 +148,7 @@ func SetContext( } dontRestartAPIContainers := false - _, engineClientCloseFunc, startEngineErr := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, logrus.InfoLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod) + _, engineClientCloseFunc, startEngineErr := engineManager.StartEngineIdempotentlyWithDefaultVersion(ctx, logrus.InfoLevel, defaults.DefaultEngineEnclavePoolSize, defaults.DefaultGitHubAuthTokenOverride, dontRestartAPIContainers, defaults.DefaultDomain, defaults.DefaultLogRetentionPeriod, defaults.DefaultLogsSinks) if startEngineErr != nil { logrus.Warnf("The context was successfully set to '%s' but Kurtosis failed to start an engine in "+ "this new context. A new engine should be started manually with '%s %s %s'. The error was:\n%v", diff --git a/cli/cli/commands/root.go b/cli/cli/commands/root.go index 79d4e3d21d..668e67096b 100644 --- a/cli/cli/commands/root.go +++ b/cli/cli/commands/root.go @@ -8,6 +8,7 @@ package commands import ( "encoding/json" "fmt" + "github.com/kurtosis-tech/kurtosis/cli/cli/commands/grafloki" "io" "net/http" "os" @@ -141,6 +142,7 @@ func init() { RootCmd.AddCommand(web.WebCmd.MustGetCobraCommand()) RootCmd.AddCommand(_package.PackageCmd) RootCmd.AddCommand(github.GitHubCmd) + RootCmd.AddCommand(grafloki.GraflokiCmd) } // ==================================================================================================== diff --git a/cli/cli/defaults/defaults.go b/cli/cli/defaults/defaults.go index 081138c00b..5b3dafb2e9 100644 --- a/cli/cli/defaults/defaults.go +++ b/cli/cli/defaults/defaults.go @@ -6,6 +6,7 @@ package defaults import ( + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface/objects/logs_aggregator" "github.com/sirupsen/logrus" ) @@ -30,5 +31,6 @@ const ( DefaultLogRetentionPeriod = "168h" ) +var DefaultLogsSinks = logs_aggregator.Sinks{} var DefaultApiContainerLogLevel = logrus.DebugLevel var DefaultEngineLogLevel = logrus.DebugLevel diff --git a/cli/cli/go.mod b/cli/cli/go.mod index 76d9f5928c..a4912d1320 100644 --- a/cli/cli/go.mod +++ b/cli/cli/go.mod @@ -51,6 +51,8 @@ require ( github.com/cli/go-gh/v2 v2.4.1-0.20231120145612-d32c104a9a25 github.com/cli/oauth v1.0.1 github.com/compose-spec/compose-go v1.17.0 + github.com/docker/docker v24.0.9+incompatible + github.com/docker/go-connections v0.4.0 github.com/fatih/color v1.13.0 github.com/go-git/go-git/v5 v5.14.0 github.com/google/go-github/v50 v50.2.0 @@ -95,8 +97,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f // indirect github.com/distribution/distribution/v3 v3.0.0-20230214150026-36d8c594d7aa // indirect - github.com/docker/docker v24.0.9+incompatible // indirect - github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-metrics v0.0.1 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect diff --git a/cli/cli/helpers/engine_manager/engine_manager.go b/cli/cli/helpers/engine_manager/engine_manager.go index 09d9dad897..a46bfb3bdf 100644 --- a/cli/cli/helpers/engine_manager/engine_manager.go +++ b/cli/cli/helpers/engine_manager/engine_manager.go @@ -2,6 +2,7 @@ package engine_manager import ( "context" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface/objects/logs_aggregator" "strings" "time" @@ -184,13 +185,18 @@ func (manager *EngineManager) StartEngineIdempotentlyWithDefaultVersion( restartAPIContainers bool, domain string, logRetentionPeriodStr string, -) (kurtosis_engine_rpc_api_bindings.EngineServiceClient, func() error, error) { + additionalSinks logs_aggregator.Sinks, +) ( + kurtosis_engine_rpc_api_bindings.EngineServiceClient, + func() error, error, +) { status, maybeHostMachinePortBinding, engineVersion, err := manager.GetEngineStatus(ctx) if err != nil { return nil, nil, stacktrace.Propagate(err, "An error occurred retrieving the Kurtosis engine status, which is necessary for creating a connection to the engine") } logrus.Debugf("Engine status: '%v'", status) clusterType := manager.clusterConfig.GetClusterType() + engineGuarantor := newEngineExistenceGuarantorWithDefaultVersion( ctx, maybeHostMachinePortBinding, @@ -209,7 +215,7 @@ func (manager *EngineManager) StartEngineIdempotentlyWithDefaultVersion( restartAPIContainers, domain, logRetentionPeriodStr, - manager.clusterConfig.GetLogsAggregatorConfig().Sinks, + combineAdditionalSinksAndConfigSinks(additionalSinks, manager.clusterConfig.GetLogsAggregatorConfig().Sinks), ) // TODO Need to handle the Kubernetes case, where a gateway needs to be started after the engine is started but // before we can return an EngineClient @@ -221,8 +227,7 @@ func (manager *EngineManager) StartEngineIdempotentlyWithDefaultVersion( } // StartEngineIdempotentlyWithCustomVersion Starts an engine if one doesn't exist already, and returns a client to it TokenOverride string) (kurtosis_engine_rpc_api_bindings.EngineServiceClient, func() error, error) { -func (manager *EngineManager) StartEngineIdempotentlyWithCustomVersion( - ctx context.Context, +func (manager *EngineManager) StartEngineIdempotentlyWithCustomVersion(ctx context.Context, engineImageVersionTag string, logLevel logrus.Level, poolSize uint8, @@ -231,7 +236,11 @@ func (manager *EngineManager) StartEngineIdempotentlyWithCustomVersion( restartAPIContainers bool, domain string, logRetentionPeriodStr string, -) (kurtosis_engine_rpc_api_bindings.EngineServiceClient, func() error, error) { + additionalSinks logs_aggregator.Sinks, +) ( + kurtosis_engine_rpc_api_bindings.EngineServiceClient, + func() error, error, +) { status, maybeHostMachinePortBinding, engineVersion, err := manager.GetEngineStatus(ctx) if err != nil { return nil, nil, stacktrace.Propagate(err, "An error occurred retrieving the Kurtosis engine status, which is necessary for creating a connection to the engine") @@ -257,7 +266,7 @@ func (manager *EngineManager) StartEngineIdempotentlyWithCustomVersion( restartAPIContainers, domain, logRetentionPeriodStr, - manager.clusterConfig.GetLogsAggregatorConfig().Sinks, + combineAdditionalSinksAndConfigSinks(additionalSinks, manager.clusterConfig.GetLogsAggregatorConfig().Sinks), ) engineClient, engineClientCloseFunc, err := manager.startEngineWithGuarantor(ctx, status, engineGuarantor) if err != nil { @@ -349,8 +358,7 @@ func (manager *EngineManager) StopEngineIdempotently(ctx context.Context) error // If no optionalVersionToUse is passed, then the new engine will take the default version, unless // restartEngineOnSameVersionIfAnyRunning is set to true in which case it will take the version of the currently // running engine -func (manager *EngineManager) RestartEngineIdempotently( - ctx context.Context, +func (manager *EngineManager) RestartEngineIdempotently(ctx context.Context, logLevel logrus.Level, optionalVersionToUse string, restartEngineOnSameVersionIfAnyRunning bool, @@ -360,7 +368,11 @@ func (manager *EngineManager) RestartEngineIdempotently( shouldRestartAPIContainers bool, domain string, logRetentionPeriodStr string, -) (kurtosis_engine_rpc_api_bindings.EngineServiceClient, func() error, error) { + additionalSinks logs_aggregator.Sinks, +) ( + kurtosis_engine_rpc_api_bindings.EngineServiceClient, + func() error, error, +) { var versionOfNewEngine string // We try to do our best to restart an engine on the same version the current on is on _, _, currentEngineVersion, err := manager.GetEngineStatus(ctx) @@ -385,9 +397,9 @@ func (manager *EngineManager) RestartEngineIdempotently( var engineClientCloseFunc func() error var restartEngineErr error if versionOfNewEngine != defaultEngineVersion { - _, engineClientCloseFunc, restartEngineErr = manager.StartEngineIdempotentlyWithCustomVersion(ctx, versionOfNewEngine, logLevel, poolSize, shouldStartInDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, restartEngineErr = manager.StartEngineIdempotentlyWithCustomVersion(ctx, versionOfNewEngine, logLevel, poolSize, shouldStartInDebugMode, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, additionalSinks) } else { - _, engineClientCloseFunc, restartEngineErr = manager.StartEngineIdempotentlyWithDefaultVersion(ctx, logLevel, poolSize, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr) + _, engineClientCloseFunc, restartEngineErr = manager.StartEngineIdempotentlyWithDefaultVersion(ctx, logLevel, poolSize, githubAuthTokenOverride, shouldRestartAPIContainers, domain, logRetentionPeriodStr, additionalSinks) } if restartEngineErr != nil { return nil, nil, stacktrace.Propagate(restartEngineErr, "An error occurred starting a new engine") @@ -515,3 +527,16 @@ func (manager *EngineManager) waitUntilEngineStoppedOrError(ctx context.Context) } return stacktrace.NewError("Engine did not report stopped status, last status reported was '%v'", status) } + +// combineAdditionalSinksAndConfigSinks will combine additionalSinks and configSinks +// note: additionalSinks will override configSinks in case of an id clash +func combineAdditionalSinksAndConfigSinks(additionalSinks logs_aggregator.Sinks, configSinks logs_aggregator.Sinks) logs_aggregator.Sinks { + combinedSinks := logs_aggregator.Sinks{} + for sinkId, sink := range configSinks { + combinedSinks[sinkId] = sink + } + for sinkId, sink := range additionalSinks { + combinedSinks[sinkId] = sink + } + return combinedSinks +} diff --git a/cli/cli/helpers/grafloki/docker_grafloki.go b/cli/cli/helpers/grafloki/docker_grafloki.go new file mode 100644 index 0000000000..42303ec1ae --- /dev/null +++ b/cli/cli/helpers/grafloki/docker_grafloki.go @@ -0,0 +1,244 @@ +package grafloki + +import ( + "context" + "fmt" + "github.com/docker/docker/client" + "github.com/docker/go-connections/nat" + "github.com/go-yaml/yaml" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/docker/docker_manager" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/docker/docker_manager/types" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/docker/object_attributes_provider/docker_label_key" + "github.com/kurtosis-tech/stacktrace" + "github.com/sirupsen/logrus" + "net/http" + "os" + "strconv" + "time" +) + +const ( + LokiContainerName = "kurtosis-loki" + GrafanaContainerName = "kurtosis-grafana" + lokiReadinessPath = "/ready" + + bridgeNetworkName = "bridge" + localhostAddr = "127.0.0.1" +) + +var EmptyDockerClientOpts = []client.Opt{} +var lokiContainerLabels = map[string]string{ + docker_label_key.ContainerTypeDockerLabelKey.GetString(): LokiContainerName, +} +var grafanaContainerLabels = map[string]string{ + docker_label_key.ContainerTypeDockerLabelKey.GetString(): GrafanaContainerName, +} + +func StartGrafLokiInDocker(ctx context.Context) (string, string, error) { + dockerManager, err := docker_manager.CreateDockerManager(EmptyDockerClientOpts) + if err != nil { + return "", "", stacktrace.Propagate(err, "An error occurred creating the docker manager to start grafana and loki.") + } + + var lokiHost string + doesGrafanaAndLokiExist, lokiHost, err := checkGrafanaAndLokiContainerExistence(ctx, dockerManager, lokiContainerLabels, grafanaContainerLabels) + if err != nil { + return "", "", stacktrace.Propagate(err, "An error occurred checking if Grafana and Loki exist.") + } + + if !doesGrafanaAndLokiExist { + logrus.Infof("No running Grafana and Loki containers found. Creating them...") + lokiHost, err = createGrafanaAndLokiContainers(ctx, dockerManager) + if err != nil { + return "", "", stacktrace.Propagate(err, "An error occurred creating Grafana and Loki containers.") + } + } + + grafanaUrl := fmt.Sprintf("http://%v:%v", localhostAddr, grafanaPort) + return lokiHost, grafanaUrl, nil +} + +func createGrafanaAndLokiContainers(ctx context.Context, dockerManager *docker_manager.DockerManager) (string, error) { + lokiNatPort := nat.Port(strconv.Itoa(lokiPort) + "/tcp") + grafanaNatPort := nat.Port(strconv.Itoa(grafanaPort) + "/tcp") + + bridgeNetworkId, err := dockerManager.GetNetworkIdByName(ctx, bridgeNetworkName) + if err != nil { + return "", stacktrace.Propagate(err, "An error occurred getting Docker network id by Name: %v", bridgeNetworkName) + } + + lokiArgs := docker_manager.NewCreateAndStartContainerArgsBuilder(lokiImage, LokiContainerName, bridgeNetworkId). + WithUsedPorts(map[nat.Port]docker_manager.PortPublishSpec{ + lokiNatPort: docker_manager.NewManualPublishingSpec(lokiPort), + }). + WithFetchingLatestImageIfMissing(). + WithRestartPolicy(docker_manager.RestartOnFailure). + WithNetworkMode(bridgeNetworkName). + WithLabels(lokiContainerLabels). + Build() + lokiContainerId, _, err := dockerManager.CreateAndStartContainer(ctx, lokiArgs) + if err != nil { + return "", stacktrace.Propagate(err, "An error occurred creating '%v' container.", LokiContainerName) + } + shouldDestroyLokiContainer := true + defer func() { + if shouldDestroyLokiContainer { + err := dockerManager.RemoveContainer(ctx, lokiContainerId) + if err != nil { + logrus.Warnf("Attempted to remove Loki container after an error occurred creating it but an error occurred removing it.") + logrus.Warnf("Manually remove Loki container with id: %v", lokiContainerId) + } + } + }() + logrus.Infof("Loki container started.") + + lokiBridgeNetworkIpAddr, err := dockerManager.GetContainerIPOnNetwork(ctx, lokiContainerId, bridgeNetworkName) + if err != nil { + return "", stacktrace.Propagate(err, "An error occurred getting container '%v' ip address on network '%v'.", lokiContainerId, bridgeNetworkName) + } + + lokiBridgeNetworkIpAddress := fmt.Sprintf("http://%v:%v", lokiBridgeNetworkIpAddr, lokiPort) + lokiHostNetworkIpAddress := fmt.Sprintf("http://%v:%v", localhostAddr, lokiPort) + if err := waitForLokiReadiness(lokiHostNetworkIpAddress, lokiReadinessPath); err != nil { + return "", stacktrace.Propagate(err, "An error occurred waiting for Loki container to become ready.") + } + + grafanaDatasource := &GrafanaDatasources{ + ApiVersion: int64(1), + Datasources: []GrafanaDatasource{ + { + Name: LokiContainerName, + Type_: "loki", + Access: "proxy", + Url: lokiBridgeNetworkIpAddress, + IsDefault: true, + Editable: true, + }, + }} + grafanaDatasourceYaml, err := yaml.Marshal(grafanaDatasource) + if err != nil { + return "", stacktrace.Propagate(err, "An error occurred serializing Grafana datasource to yaml: %v", grafanaDatasourceYaml) + } + logrus.Infof("Grafana data source yaml %v", string(grafanaDatasourceYaml)) + + tmpFile, err := os.CreateTemp("", "grafana-datasource-*.yaml") + if err != nil { + return "", stacktrace.Propagate(err, "An error occurred creating temp datasource config.") + } + defer tmpFile.Close() + if _, err := tmpFile.WriteString(string(grafanaDatasourceYaml)); err != nil { + return "", stacktrace.Propagate(err, "An error occurred writing config.") + } + + grafanaArgs := docker_manager.NewCreateAndStartContainerArgsBuilder(grafanaImage, GrafanaContainerName, bridgeNetworkId). + WithUsedPorts(map[nat.Port]docker_manager.PortPublishSpec{ + grafanaNatPort: docker_manager.NewManualPublishingSpec(grafanaPort), + }). + WithEnvironmentVariables(map[string]string{ + grafanaAuthAnonymousEnabledEnvVarKey: grafanaAuthAnonymousEnabledEnvVarVal, + grafanaSecurityAllowEmbeddingEnvVarKey: grafanaSecurityAllowEmbeddingEnvVarVal, + grafanaAuthAnonymousOrgRoleEnvVarKey: grafanaAuthAnonymousOrgRoleEnvVarVal, + }). + WithBindMounts(map[string]string{ + tmpFile.Name(): fmt.Sprintf("%v/loki.yaml", grafanaDatasourcesPath), + }). + WithFetchingLatestImageIfMissing(). + WithRestartPolicy(docker_manager.RestartOnFailure). + WithNetworkMode(bridgeNetworkName). + WithLabels(grafanaContainerLabels). + Build() + grafanaContainerId, _, err := dockerManager.CreateAndStartContainer(ctx, grafanaArgs) + if err != nil { + return "", stacktrace.Propagate(err, "An error creating creating '%v' container.", GrafanaContainerName) + } + shouldDestroyGrafanaContainer := true + defer func() { + if shouldDestroyGrafanaContainer { + err := dockerManager.RemoveContainer(ctx, grafanaContainerId) + if err != nil { + logrus.Warnf("Attempted to remove Grafana container after an error occurred creating it but an error occurred removing it.") + logrus.Warnf("Manually remove Grafana container with id: %v", grafanaContainerId) + } + } + }() + logrus.Infof("Grafana container started.") + + shouldDestroyLokiContainer = false + shouldDestroyGrafanaContainer = false + return lokiBridgeNetworkIpAddress, nil +} + +func waitForLokiReadiness(lokiHost string, readyPath string) error { + const ( + retryDelay = 1 * time.Second + maxAttempts = 30 + ) + url := lokiHost + readyPath + for i := 0; i < maxAttempts; i++ { + resp, err := http.Get(url) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + time.Sleep(retryDelay) + } + return stacktrace.NewError("%v did not become ready after %v attempts", lokiHost, maxAttempts) +} + +func checkGrafanaAndLokiContainerExistence(ctx context.Context, dockerManager *docker_manager.DockerManager, lokiContainerLabels map[string]string, grafanaContainerLabels map[string]string) (bool, string, error) { + existsLoki := false + existsGrafana := false + var lokiBridgeNetworkIpAddress string + + lokiContainer, err := getContainerByLabel(ctx, dockerManager, lokiContainerLabels) + if err != nil { + return false, "", stacktrace.Propagate(err, "An error occurred getting Loki container by labels: %v.", lokiContainerLabels) + } + if lokiContainer != nil { + existsLoki = true + lokiBridgeNetworkIpAddress, err = dockerManager.GetContainerIPOnNetwork(ctx, lokiContainer.GetId(), bridgeNetworkName) + if err != nil { + return false, "", stacktrace.Propagate(err, "An error occurred getting IP of Loki container on network: %v", bridgeNetworkName) + } + } + + grafanaContainer, err := getContainerByLabel(ctx, dockerManager, grafanaContainerLabels) + if err != nil { + return false, "", stacktrace.Propagate(err, "An error occurred getting Grafana container by labels: %v.", grafanaContainerLabels) + } + if grafanaContainer != nil { + existsGrafana = true + } + + return existsLoki && existsGrafana, fmt.Sprintf("http://%v:%v", lokiBridgeNetworkIpAddress, lokiPort), nil +} + +func getContainerByLabel(ctx context.Context, dockerManager *docker_manager.DockerManager, containerLabels map[string]string) (*types.Container, error) { + containers, err := dockerManager.GetContainersByLabels(ctx, containerLabels, false) + if err != nil { + return nil, stacktrace.Propagate(err, "An error occurred getting container by labels '%+v'.", containerLabels) + } + if len(containers) > 1 { + return nil, stacktrace.NewError("More than one container with labels '%v' found.", containerLabels) + } + if len(containers) == 0 { + return nil, nil + } + return containers[0], nil +} + +func StopGrafLokiInDocker(ctx context.Context) error { + dockerManager, err := docker_manager.CreateDockerManager(EmptyDockerClientOpts) + if err != nil { + return stacktrace.Propagate(err, "An error occurred creating Docker manager.") + } + if err := dockerManager.RemoveContainer(ctx, GrafanaContainerName); err != nil { + return stacktrace.Propagate(err, "An error occurred removing Grafana container '%v'", GrafanaContainerName) + } + if err := dockerManager.RemoveContainer(ctx, LokiContainerName); err != nil { + return stacktrace.Propagate(err, "An error occurred removing Loki container '%v'", GrafanaContainerName) + } + return nil +} diff --git a/cli/cli/helpers/grafloki/kubernetes_grafloki.go b/cli/cli/helpers/grafloki/kubernetes_grafloki.go new file mode 100644 index 0000000000..1472cfe617 --- /dev/null +++ b/cli/cli/helpers/grafloki/kubernetes_grafloki.go @@ -0,0 +1,529 @@ +package grafloki + +import ( + "context" + "fmt" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager" + "github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/object_attributes_provider/kubernetes_label_key" + "github.com/kurtosis-tech/stacktrace" + "github.com/sirupsen/logrus" + "gopkg.in/yaml.v3" + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "time" +) + +const ( + grafanaServiceName = "kurtosis-grafana-service" + lokiServiceName = "kurtosis-loki-service" + grafanaDeploymentName = "kurtosis-grafana-deployment" + lokiDeploymentName = "kurtosis-loki-deployment" + grafanaDatasourceConfigMapName = "kurtosis-grafana-datasources" + graflokiNamespace = "kurtosis-grafloki" + grafanaNodePort int32 = 30030 + lokiNodePort int32 = 30031 + lokiProbeInitialDelaySeconds = 5 + lokiProbePeriodSeconds = 10 + lokiProbeTimeoutSeconds = 10 + + // takes around 30 seconds for loki pod to become ready + lokiDeploymentMaxRetries = 40 + lokiDeploymentRetryInterval = 1 * time.Second + defaultStorageClass = "" +) + +var lokiLabels = map[string]string{ + kubernetes_label_key.KurtosisResourceTypeKubernetesLabelKey.GetString(): lokiDeploymentName, +} +var grafanaLabels = map[string]string{ + kubernetes_label_key.KurtosisResourceTypeKubernetesLabelKey.GetString(): grafanaDeploymentName, +} + +var httpApplicationProtocol = "http" + +func StartGrafLokiInKubernetes(ctx context.Context) (string, string, error) { + k8sManager, err := getKubernetesManager() + if err != nil { + return "", "", stacktrace.Propagate(err, "An error occurred getting Kubernetes Manager.") + } + + var lokiHost string + var removeGrafanaAndLokiFunc func() + shouldRemoveGrafanaAndLoki := false + doesGrafanaAndLokiExist, lokiHost := checkGrafanaAndLokiDeploymentExistence(ctx, k8sManager) + if !doesGrafanaAndLokiExist { + lokiHost, removeGrafanaAndLokiFunc, err = createGrafanaAndLokiDeployments(ctx, k8sManager) + if err != nil { + return "", "", stacktrace.Propagate(err, "An error occurred creating Grafana and Loki deployments.") + } + shouldRemoveGrafanaAndLoki = true + defer func() { + if shouldRemoveGrafanaAndLoki { + removeGrafanaAndLokiFunc() + } + }() + } + + logrus.Infof("Run `kubectl port-forward -n %v svc/%v %v:%v` to access Grafana service.", graflokiNamespace, grafanaServiceName, grafanaPort, grafanaNodePort) + shouldRemoveGrafanaAndLoki = false + return lokiHost, getGrafanaUrlOnHostMachine(grafanaPort), nil +} + +func createGrafanaAndLokiDeployments(ctx context.Context, k8sManager *kubernetes_manager.KubernetesManager) (string, func(), error) { + graflokilNamespaceObj, err := k8sManager.CreateNamespace(ctx, graflokiNamespace, map[string]string{}, map[string]string{}) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating namespace '%v'", graflokiNamespace) + } + shouldRemoveNamespace := false + removeGraflokiNamespaceFunc := func() { + if err := k8sManager.RemoveNamespace(ctx, graflokilNamespaceObj); err != nil { + logrus.Warnf("Attempted to remove namespace '%v' after an error occurred but an error occurred removing it.", graflokiNamespace) + logrus.Warnf("!! ACTION REQUIRED !! Manually remove namespace with Name: %v", graflokilNamespaceObj.Name) + } + } + defer func() { + if shouldRemoveNamespace { + removeGraflokiNamespaceFunc() + } + }() + + lokiDeployment, err := k8sManager.CreateDeployment( + ctx, + graflokiNamespace, + lokiDeploymentName, + lokiLabels, + map[string]string{}, + []apiv1.Container{}, // no init containers + []apiv1.Container{ + { + Name: "loki", + Image: lokiImage, + Ports: []apiv1.ContainerPort{ + { + Name: "", + HostPort: 0, + ContainerPort: lokiPort, + Protocol: "", + HostIP: "", + }, + }, + Command: nil, + Args: nil, + WorkingDir: "", + EnvFrom: nil, + Env: nil, + Resources: apiv1.ResourceRequirements{ + Limits: nil, + Requests: nil, + Claims: nil, + }, + ResizePolicy: nil, + VolumeMounts: nil, + VolumeDevices: nil, + LivenessProbe: nil, + ReadinessProbe: &apiv1.Probe{ + ProbeHandler: apiv1.ProbeHandler{ + Exec: nil, + HTTPGet: &apiv1.HTTPGetAction{ + Path: "/ready", + Port: intstr.FromInt(lokiPort), + Host: "", + Scheme: "", + HTTPHeaders: nil, + }, + TCPSocket: nil, + GRPC: nil, + }, + InitialDelaySeconds: lokiProbeInitialDelaySeconds, + TimeoutSeconds: lokiProbeTimeoutSeconds, + PeriodSeconds: lokiProbePeriodSeconds, + SuccessThreshold: 0, + FailureThreshold: 0, + TerminationGracePeriodSeconds: nil, + }, + StartupProbe: nil, + Lifecycle: nil, + TerminationMessagePath: "", + TerminationMessagePolicy: "", + ImagePullPolicy: "", + SecurityContext: nil, + Stdin: false, + StdinOnce: false, + TTY: false, + }, + }, + []apiv1.Volume{}, + &apiv1.Affinity{ + NodeAffinity: nil, + PodAffinity: nil, + PodAntiAffinity: nil, + }) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating Loki deployment.") + } + shouldRemoveLokiDeployment := false + removeLokiDeploymentFunc := func() { + if err := k8sManager.RemoveDeployment(ctx, graflokiNamespace, lokiDeployment); err != nil { + logrus.Warnf("Attempted to remove Loki deployment after an error occurred but an error occurred removing it.") + logrus.Warnf("!! ACTION REQUIRED !! Manually remove Loki deployment with Name: %v", lokiDeployment.Name) + } + } + defer func() { + if shouldRemoveLokiDeployment { + removeLokiDeploymentFunc() + } + }() + logrus.Infof("Waiting for Loki deployment to come online (can take around 30s)... ") + if err := k8sManager.WaitForPodManagedByDeployment(ctx, lokiDeployment, lokiDeploymentMaxRetries, lokiDeploymentRetryInterval); err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred while waiting for pod managed by Loki deployment '%v' to come online.", lokiDeploymentName) + } + + lokiService, err := k8sManager.CreateService(ctx, + graflokiNamespace, + lokiServiceName, + map[string]string{}, // empty labels + map[string]string{}, // empty annotations + lokiLabels, // match loki deployment pod labels + apiv1.ServiceTypeNodePort, + []apiv1.ServicePort{{ + Name: "logs-listening", + Port: lokiNodePort, + TargetPort: intstr.FromInt(lokiPort), + Protocol: apiv1.ProtocolTCP, + NodePort: lokiNodePort, + AppProtocol: &httpApplicationProtocol, + }}) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating Loki service") + } + shouldRemoveLokiService := false + removeLokiServiceFunc := func() { + if err := k8sManager.RemoveService(ctx, lokiService); err != nil { + logrus.Warnf("Attempted to remove Loki service after an error occurred but an error occurred removing it.") + logrus.Warnf("!! ACTION REQUIRED !! Manually remove Loki service with Name: %v", lokiService.Name) + } + } + defer func() { + if shouldRemoveLokiService { + removeLokiServiceFunc() + } + }() + lokiHost := getLokiUrlInsideK8sCluster(lokiServiceName, graflokiNamespace, lokiNodePort) + + grafanaDatasource := GrafanaDatasources{ + ApiVersion: int64(1), + Datasources: []GrafanaDatasource{ + { + Name: lokiServiceName, + Type_: "loki", + Access: "proxy", + Url: lokiHost, + IsDefault: true, + Editable: true, + }, + }} + grafanaDatasourceYaml, err := yaml.Marshal(grafanaDatasource) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred serializing Grafana datasource to yaml: %v", grafanaDatasourceYaml) + } + + configMapData := map[string]string{ + "loki-datasource.yaml": string(grafanaDatasourceYaml), + } + grafanaConfigMap, err := k8sManager.CreateConfigMap(ctx, + graflokiNamespace, + grafanaDatasourceConfigMapName, + map[string]string{}, // empty labels + map[string]string{}, // empty annotations + configMapData) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating Grafana datasource configmap.") + } + shouldRemoveGrafanaConfigMap := true + removeGrafanaConfigMapFunc := func() { + if err := k8sManager.RemoveConfigMap(ctx, graflokiNamespace, grafanaConfigMap); err != nil { + logrus.Warnf("Attempted to remove Grafana datasource config map after an error occurred but an error occurred removing it.") + logrus.Warnf("!! ACTION REQUIRED !! Manually remove Grafana datasource config map with Name: %v", grafanaConfigMap.Name) + } + } + defer func() { + if shouldRemoveGrafanaConfigMap { + removeGrafanaConfigMapFunc() + } + }() + + grafanaDeployment, err := k8sManager.CreateDeployment( + ctx, + graflokiNamespace, + grafanaDeploymentName, + grafanaLabels, + map[string]string{}, // empty annotations + []apiv1.Container{}, // no init containers + []apiv1.Container{ + { + Name: "grafana", + Image: grafanaImage, + Ports: []apiv1.ContainerPort{ + { + Name: "", + ContainerPort: grafanaPort, + HostPort: 0, + Protocol: "", + HostIP: "", + }, + }, + Env: []apiv1.EnvVar{ + { + Name: grafanaAuthAnonymousEnabledEnvVarKey, + Value: grafanaAuthAnonymousEnabledEnvVarVal, + ValueFrom: nil, + }, + { + Name: grafanaAuthAnonymousOrgRoleEnvVarKey, + Value: grafanaAuthAnonymousOrgRoleEnvVarVal, + ValueFrom: nil, + }, + { + Name: grafanaSecurityAllowEmbeddingEnvVarKey, + Value: grafanaSecurityAllowEmbeddingEnvVarVal, + ValueFrom: nil, + }, + }, + VolumeMounts: []apiv1.VolumeMount{ + { + Name: grafanaDatasourcesKey, + MountPath: grafanaDatasourcesPath, + ReadOnly: false, + SubPath: "", + MountPropagation: nil, + SubPathExpr: "", + }, + }, + Command: nil, + Args: nil, + WorkingDir: "", + EnvFrom: nil, + Resources: apiv1.ResourceRequirements{ + Limits: nil, + Requests: nil, + Claims: nil, + }, + ResizePolicy: nil, + VolumeDevices: nil, + LivenessProbe: nil, + ReadinessProbe: nil, + StartupProbe: nil, + Lifecycle: nil, + TerminationMessagePath: "", + TerminationMessagePolicy: "", + ImagePullPolicy: "", + SecurityContext: nil, + Stdin: false, + StdinOnce: false, + TTY: false, + }, + }, + []apiv1.Volume{{ + Name: grafanaDatasourcesKey, + VolumeSource: apiv1.VolumeSource{ + ConfigMap: &apiv1.ConfigMapVolumeSource{ + LocalObjectReference: apiv1.LocalObjectReference{ + Name: grafanaDatasourceConfigMapName, + }, + Items: nil, + DefaultMode: nil, + Optional: nil, + }, + HostPath: nil, + EmptyDir: nil, + GCEPersistentDisk: nil, + AWSElasticBlockStore: nil, + GitRepo: nil, + Secret: nil, + NFS: nil, + ISCSI: nil, + Glusterfs: nil, + PersistentVolumeClaim: nil, + RBD: nil, + FlexVolume: nil, + Cinder: nil, + CephFS: nil, + Flocker: nil, + DownwardAPI: nil, + FC: nil, + AzureFile: nil, + VsphereVolume: nil, + Quobyte: nil, + AzureDisk: nil, + PhotonPersistentDisk: nil, + Projected: nil, + PortworxVolume: nil, + ScaleIO: nil, + StorageOS: nil, + CSI: nil, + Ephemeral: nil, + }, + }}, + &apiv1.Affinity{ + NodeAffinity: nil, + PodAffinity: nil, + PodAntiAffinity: nil, + }) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating Grafana deployment.") + } + shouldRemoveGrafanaDeployment := true + removeGrafanaDeploymentFunc := func() { + if err := k8sManager.RemoveDeployment(ctx, graflokiNamespace, grafanaDeployment); err != nil { + logrus.Warnf("Attempted to remove Loki deployment after an error occurred but an error occurred removing it.") + logrus.Warnf("!! ACTION REQUIRED !! Manually remove Loki deployment with Name: %v", lokiDeployment.Name) + } + } + defer func() { + if shouldRemoveGrafanaDeployment { + removeGrafanaDeploymentFunc() + } + }() + + grafanaService, err := k8sManager.CreateService(ctx, + graflokiNamespace, + grafanaServiceName, + map[string]string{}, // empty labels + nil, // empty annotations + grafanaLabels, // match grafana deployment pod labels + apiv1.ServiceTypeNodePort, + []apiv1.ServicePort{{ + Name: "grafana-dashboard", + Port: grafanaNodePort, + TargetPort: intstr.FromInt(grafanaPort), + Protocol: apiv1.ProtocolTCP, + NodePort: grafanaNodePort, + AppProtocol: &httpApplicationProtocol, + }}) + if err != nil { + return "", nil, stacktrace.Propagate(err, "An error occurred creating Grafana service") + } + shouldRemoveGrafanaService := true + removeGrafanaServiceFunc := func() { + if err := k8sManager.RemoveService(ctx, grafanaService); err != nil { + logrus.Warnf("Attempted to remove Grafana service after an error occurred but an error occurred removing it.") + logrus.Warnf("!! ACTION REQUIRED !! Manually remove Grafana service with Name: %v", grafanaService.Name) + } + } + defer func() { + if shouldRemoveGrafanaService { + removeGrafanaServiceFunc() + } + }() + + removeGrafanaAndLokiDeploymentsFunc := func() { + removeGraflokiNamespaceFunc() + removeLokiDeploymentFunc() + removeLokiServiceFunc() + removeGrafanaConfigMapFunc() + removeGrafanaDeploymentFunc() + removeGrafanaServiceFunc() + } + + shouldRemoveLokiDeployment = false + shouldRemoveGrafanaConfigMap = false + shouldRemoveGrafanaDeployment = false + shouldRemoveGrafanaService = false + shouldRemoveNamespace = false + shouldRemoveLokiService = false + return lokiHost, removeGrafanaAndLokiDeploymentsFunc, nil +} + +func checkGrafanaAndLokiDeploymentExistence(ctx context.Context, k8sManager *kubernetes_manager.KubernetesManager) (bool, string) { + existsLoki := false + existsGrafana := false + var lokiHost string + + lokiDeployment, err := k8sManager.GetDeployment(ctx, graflokiNamespace, lokiDeploymentName) + if err == nil && lokiDeployment != nil { + existsLoki = true + lokiHost = getLokiUrlInsideK8sCluster(lokiServiceName, graflokiNamespace, lokiNodePort) + } else { + return existsLoki, "" // loki doesn't in this case so eject early + } + + grafanaDeployment, err := k8sManager.GetDeployment(ctx, graflokiNamespace, grafanaDeploymentName) + if err == nil && grafanaDeployment != nil { + existsGrafana = false + } else { + return existsGrafana, "" + } + + return existsLoki && existsGrafana, lokiHost +} + +func StopGrafLokiInKubernetes(ctx context.Context) error { + k8sManager, err := getKubernetesManager() + if err != nil { + return stacktrace.Propagate(err, "An error occurred getting Kubernetes Manager.") + } + graflokiNamespaceObj, err := k8sManager.GetNamespace(ctx, graflokiNamespace) + if err != nil { + return stacktrace.Propagate(err, "An error occurred getting graflokiNamespace '%v'.", graflokiNamespace) + } + err = k8sManager.RemoveNamespace(ctx, graflokiNamespaceObj) + if err != nil { + return stacktrace.Propagate(err, "An error occurred removing graflokiNamespace '%v'.", graflokiNamespace) + } + err = waitForNamespaceRemoval(ctx, graflokiNamespace, k8sManager) + if err != nil { + return stacktrace.Propagate(err, "An error occurred while waiting for graflokiNamespace '%v' removal.", graflokiNamespace) + } + return nil +} + +func getKubernetesManager() (*kubernetes_manager.KubernetesManager, error) { + kubernetesConfig, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + clientcmd.NewDefaultClientConfigLoadingRules(), + nil, // empty overrides + ).ClientConfig() + if err != nil { + return nil, stacktrace.Propagate(err, "An error occurred creating Kubernetes configuration") + } + clientSet, err := kubernetes.NewForConfig(kubernetesConfig) + if err != nil { + return nil, stacktrace.Propagate(err, "Expected to be able to create Kubernetes client set using Kubernetes config '%+v', instead a non nil error was returned", kubernetesConfig) + } + k8sManager := kubernetes_manager.NewKubernetesManager(clientSet, kubernetesConfig, defaultStorageClass) + return k8sManager, nil +} + +func waitForNamespaceRemoval( + ctx context.Context, + namespace string, + kubernetesManager *kubernetes_manager.KubernetesManager) error { + var ( + maxTriesToWaitForNamespaceRemoval uint = 30 + timeToWaitBetweenNamespaceRemovalChecks = 1 * time.Second + ) + + for i := uint(0); i < maxTriesToWaitForNamespaceRemoval; i++ { + if _, err := kubernetesManager.GetNamespace(ctx, namespace); err != nil { + // if err was returned, graflokiNamespace doesn't exist, or it's been marked for deleted + logrus.Debugf("Error retrieved from getting namespace '%v'. If the error is a timeout, the namespace could still exist.\n%v", namespace, err.Error()) + return nil + } + + // Tiny optimization to not sleep if we're not going to run the loop again + if i < maxTriesToWaitForNamespaceRemoval-1 { + time.Sleep(timeToWaitBetweenNamespaceRemovalChecks) + } + } + + return stacktrace.NewError("Attempted to wait for namespace '%v' removal or to be marked for deletion '%v' times but '%v' was not removed.", namespace, maxTriesToWaitForNamespaceRemoval, namespace) +} + +func getLokiUrlInsideK8sCluster(lokiServiceName, namespace string, lokiPort int32) string { + return fmt.Sprintf("http://%v.%v.svc.cluster.local:%v", lokiServiceName, namespace, lokiPort) +} + +func getGrafanaUrlOnHostMachine(grafanaPort int) string { + return fmt.Sprintf("http://127.0.0.1:%v", grafanaPort) +} diff --git a/cli/cli/helpers/grafloki/shared.go b/cli/cli/helpers/grafloki/shared.go new file mode 100644 index 0000000000..0e58f3e150 --- /dev/null +++ b/cli/cli/helpers/grafloki/shared.go @@ -0,0 +1,33 @@ +package grafloki + +const ( + lokiImage = "grafana/loki:3.4.2" + lokiPort = 3100 + + grafanaImage = "grafana/grafana:11.6.0" + grafanaPort = 3000 + + grafanaAuthAnonymousEnabledEnvVarKey = "GF_AUTH_ANONYMOUS_ENABLED" + grafanaAuthAnonymousEnabledEnvVarVal = "true" + grafanaAuthAnonymousOrgRoleEnvVarKey = "GF_AUTH_ANONYMOUS_ORG_ROLE" + grafanaAuthAnonymousOrgRoleEnvVarVal = "Admin" + grafanaSecurityAllowEmbeddingEnvVarKey = "GF_SECURITY_ALLOW_EMBEDDING" + grafanaSecurityAllowEmbeddingEnvVarVal = "true" + + grafanaDatasourcesKey = "datasources" + grafanaDatasourcesPath = "/etc/grafana/provisioning/datasources" +) + +type GrafanaDatasource struct { + Name string `yaml:"name"` + Type_ string `yaml:"type"` + Access string `yaml:"access"` + Url string `yaml:"url"` + IsDefault bool `yaml:"isDefault"` + Editable bool `yaml:"editable"` +} + +type GrafanaDatasources struct { + ApiVersion int64 `yaml:"apiVersion"` + Datasources []GrafanaDatasource `yaml:"datasources"` +} diff --git a/container-engine-lib/lib/backend_impls/docker/docker_manager/docker_manager.go b/container-engine-lib/lib/backend_impls/docker/docker_manager/docker_manager.go index 7a847ae8d4..9a6fcdb4a6 100644 --- a/container-engine-lib/lib/backend_impls/docker/docker_manager/docker_manager.go +++ b/container-engine-lib/lib/backend_impls/docker/docker_manager/docker_manager.go @@ -393,6 +393,37 @@ func (manager *DockerManager) GetContainerIdsConnectedToNetwork(context context. return result, nil } +func (manager *DockerManager) GetContainerIPOnNetwork(context context.Context, containerId string, networkId string) (string, error) { + inspectResponse, err := manager.dockerClient.NetworkInspect(context, networkId, types.NetworkInspectOptions{ + Scope: "", + Verbose: false, + }) + if err != nil { + return "", stacktrace.Propagate(err, "Failed to get network information for network with ID '%v'", networkId) + } + for id, c := range inspectResponse.Containers { + if id == containerId { + ip, _, err := net.ParseCIDR(c.IPv4Address) + if err != nil { + return "", stacktrace.Propagate(err, "Failed to parse IPv4 address '%s'", c.IPv4Address) + } + return ip.String(), nil + } + } + return "", stacktrace.NewError("Could not find container '%v' IP on network '%v'.", containerId, networkId) +} + +func (manager *DockerManager) GetNetworkIdByName(ctx context.Context, networkName string) (string, error) { + n, err := manager.dockerClient.NetworkInspect(ctx, networkName, types.NetworkInspectOptions{ + Scope: "", + Verbose: false, + }) + if err != nil { + return "", stacktrace.Propagate(err, "Failed to inspect the '%v' network.", networkName) + } + return n.ID, nil +} + /* RemoveNetwork Removes the Docker network with the given id @@ -539,10 +570,12 @@ func (manager *DockerManager) CreateAndStartContainer( return "", nil, stacktrace.Propagate(err, "An error occurred fetching image '%v'", dockerImage) } - idFilterArgs := filters.NewArgs(filters.KeyValuePair{ - Key: networkIdSearchFilterKey, - Value: args.networkId, - }) + idFilterArgs := filters.NewArgs( + filters.KeyValuePair{ + Key: networkIdSearchFilterKey, + Value: args.networkId, + }, + ) networks, err := manager.getNetworksByFilterArgs(ctx, idFilterArgs) if err != nil { return "", nil, stacktrace.Propagate(err, "An error occurred checking for the existence of network with ID %v", args.networkId) @@ -2131,6 +2164,7 @@ func newContainerFromDockerContainer(dockerContainer types.ContainerJSON) (*dock dockerContainer.Config.Entrypoint, dockerContainer.Config.Cmd, containerEnvArgs, + dockerContainer.NetworkSettings.IPAddress, ) return newContainer, nil diff --git a/container-engine-lib/lib/backend_impls/docker/docker_manager/types/container.go b/container-engine-lib/lib/backend_impls/docker/docker_manager/types/container.go index d0845d2283..7535c0122a 100644 --- a/container-engine-lib/lib/backend_impls/docker/docker_manager/types/container.go +++ b/container-engine-lib/lib/backend_impls/docker/docker_manager/types/container.go @@ -14,6 +14,7 @@ type Container struct { entrypointArgs []string cmdArgs []string envVars map[string]string + defaultIpAddress string } func NewContainer( @@ -26,6 +27,7 @@ func NewContainer( entrypointArgs []string, cmdArgs []string, envVars map[string]string, + defaultIpAddress string, ) *Container { return &Container{ id: id, @@ -37,6 +39,7 @@ func NewContainer( entrypointArgs: entrypointArgs, cmdArgs: cmdArgs, envVars: envVars, + defaultIpAddress: defaultIpAddress, } } diff --git a/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/logs_aggregator_functions/implementations/vector/vector_logs_aggregator_deployment.go b/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/logs_aggregator_functions/implementations/vector/vector_logs_aggregator_deployment.go index 0d2d201ff9..dbe1aac7cd 100644 --- a/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/logs_aggregator_functions/implementations/vector/vector_logs_aggregator_deployment.go +++ b/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/logs_aggregator_functions/implementations/vector/vector_logs_aggregator_deployment.go @@ -135,7 +135,7 @@ func (logsAggregator *vectorLogsAggregatorResourcesManager) CreateAndStart( } }() - if err = waitForPodManagedByDeployment(ctx, deployment, kubernetesManager); err != nil { + if err = kubernetesManager.WaitForPodManagedByDeployment(ctx, deployment, maxRetries, retryInterval); err != nil { return nil, nil, nil, nil, nil, stacktrace.Propagate(err, "An error occurred waiting for active pod managed by logs aggregator deployment '%v'", deployment.Name) } @@ -374,36 +374,6 @@ func createLogsAggregatorService( return serviceObj, nil } -func waitForPodManagedByDeployment(ctx context.Context, logsAggregatorDeployment *appsv1.Deployment, kubernetesManager *kubernetes_manager.KubernetesManager) error { - timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(maxRetries)*retryInterval) - defer cancel() - - ticker := time.NewTicker(retryInterval) - defer ticker.Stop() - for attempt := 0; attempt < maxRetries; attempt++ { - select { - case <-timeoutCtx.Done(): - return stacktrace.NewError( - "Timeout waiting for a pod managed by logs aggregator deployment '%s' to come online", - logsAggregatorDeployment.Name, - ) - case <-ticker.C: - pods, err := kubernetesManager.GetPodsManagedByDeployment(ctx, logsAggregatorDeployment) - if err != nil { - return stacktrace.Propagate(err, "An error occurred getting pods managed by logs aggregator deployment'%v'", logsAggregatorDeployment.Name) - } - if len(pods) > 0 && len(pods[0].Status.ContainerStatuses) > 0 && pods[0].Status.ContainerStatuses[0].Ready { - // found a pod with a running vector container - return nil - } - } - } - return stacktrace.NewError( - "Exceeded max retries (%d) waiting for a pod managed by deployment '%s' to come online", - maxRetries, logsAggregatorDeployment.Name, - ) -} - func (vector *vectorLogsAggregatorResourcesManager) GetLogsBaseDirPath() string { return kurtosisLogsMountPath } @@ -452,7 +422,7 @@ func (vector *vectorLogsAggregatorResourcesManager) Clean(ctx context.Context, l } // before continuing, ensure logs aggregator is up again - if err := waitForPodManagedByDeployment(ctx, logsAggregatorDeployment, kubernetesManager); err != nil { + if err := kubernetesManager.WaitForPodManagedByDeployment(ctx, logsAggregatorDeployment, maxRetries, retryInterval); err != nil { return stacktrace.Propagate(err, "An error occurred waiting for a pod managed by deployment '%v' to become available.", logsAggregatorDeployment.Name) } diff --git a/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager/kubernetes_manager.go b/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager/kubernetes_manager.go index ab792a47ad..dcef6230e0 100644 --- a/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager/kubernetes_manager.go +++ b/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_manager/kubernetes_manager.go @@ -1587,6 +1587,36 @@ func (manager *KubernetesManager) CreateDeployment( return createdDeployment, nil } +func (manager *KubernetesManager) WaitForPodManagedByDeployment(ctx context.Context, deployment *v1.Deployment, maxRetries int, retryInterval time.Duration) error { + timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(maxRetries)*retryInterval) + defer cancel() + + ticker := time.NewTicker(retryInterval) + defer ticker.Stop() + for attempt := 0; attempt < maxRetries; attempt++ { + select { + case <-timeoutCtx.Done(): + return stacktrace.NewError( + "Timeout waiting for a pod managed by deployment '%s' to come online", + deployment.Name, + ) + case <-ticker.C: + pods, err := manager.GetPodsManagedByDeployment(ctx, deployment) + if err != nil { + return stacktrace.Propagate(err, "An error occurred getting pods managed by deployment'%v'", deployment.Name) + } + if len(pods) > 0 && len(pods[0].Status.ContainerStatuses) > 0 && pods[0].Status.ContainerStatuses[0].Ready { + // found a pod with a running container + return nil + } + } + } + return stacktrace.NewError( + "Exceeded max retries (%d) waiting for a pod managed by deployment '%s' to come online", + maxRetries, deployment.Name, + ) +} + func (manager *KubernetesManager) ScaleDeployment(ctx context.Context, namespace, name string, replicas int32) error { deploymentClient := manager.kubernetesClientSet.AppsV1().Deployments(namespace) diff --git a/core/files_artifacts_expander/main.go b/core/files_artifacts_expander/main.go index b2e6a3854a..54f2a0dedf 100644 --- a/core/files_artifacts_expander/main.go +++ b/core/files_artifacts_expander/main.go @@ -111,7 +111,7 @@ func runMain() error { // NOTE: We don't use stacktrace here because the actual stacktraces we care about are the ones from the threads! return fmt.Errorf( - "The following errors occurred when trying to expand files artifacts:\n%v", + "the following errors occurred when trying to expand files artifacts:\n%v", strings.Join(allIndexedResultErrStrs, "\n\n"), ) } diff --git a/docs/docs/cli-reference/grafloki-start.md b/docs/docs/cli-reference/grafloki-start.md new file mode 100644 index 0000000000..bc1eefd357 --- /dev/null +++ b/docs/docs/cli-reference/grafloki-start.md @@ -0,0 +1,18 @@ +--- +title: grafloki start +sidebar_label: grafloki start +slug: /grafloki-start +--- + +To start a Grafana/Loki instance in Docker or K8s cluster, run: + +```bash +kurtosis grafloki start +``` + +This command starts a local Grafana/Loki instance and restarts the Kurtosis engine with an updated configuration. The new configuration includes a log sink that routes logs to the local Grafana/Loki instance. + +Read more about sinks and how to [export logs][export-logs] from Kurtosis. + + +[export-logs]: ../guides/exporting-logs.md \ No newline at end of file diff --git a/docs/docs/cli-reference/grafloki-stop.md b/docs/docs/cli-reference/grafloki-stop.md new file mode 100644 index 0000000000..644c9b6bee --- /dev/null +++ b/docs/docs/cli-reference/grafloki-stop.md @@ -0,0 +1,17 @@ +--- +title: grafloki stop +sidebar_label: grafloki stop +slug: /grafloki-stop +--- + +To stop a Grafana/Loki instance that was started by [grafloki start][grafloki-start] in Docker or K8s cluster, run: + +```bash +kurtosis grafloki stop +``` + +This will stop a local Grafana/Loki instance, if one is running. + + + +[grafloki-start]: ./grafloki-start.md \ No newline at end of file diff --git a/docs/docs/guides/exporting-logs.md b/docs/docs/guides/exporting-logs.md index df2cd4c8ca..c4a51d2817 100644 --- a/docs/docs/guides/exporting-logs.md +++ b/docs/docs/guides/exporting-logs.md @@ -404,4 +404,12 @@ kurtosis-clusters: codec: "json" ``` +### Grafloki + +For ease of setup, Kurtosis CLI comes with a feature to start a local Grafana and Loki instance that the engine gets configured to send logs to. See [grafloki start][grafloki-start] for more info. + + +[grafloki-start]: ../cli-reference/grafloki-start.md + + diff --git a/engine/server/go.mod b/engine/server/go.mod index c91c3290ea..e823125a5a 100644 --- a/engine/server/go.mod +++ b/engine/server/go.mod @@ -107,6 +107,7 @@ require ( github.com/kurtosis-tech/kurtosis/cloud/api/golang v0.0.0-20230828153722-32770ca96513 // indirect github.com/kurtosis-tech/kurtosis/contexts-config-store v0.0.0 // indirect github.com/kurtosis-tech/kurtosis/enclave-manager/api/golang v0.0.0-20230828153722-32770ca96513 // indirect + github.com/kurtosis-tech/kurtosis/kurtosis_version v0.0.0 // indirect github.com/kurtosis-tech/kurtosis/path-compression v0.0.0-20240307154559-64d2929cd265 // indirect github.com/labstack/gommon v0.4.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect