Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add loong collector monitor #1636

Merged
merged 6 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/application/Application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ void Application::Start() { // GCOVR_EXCL_START
#endif

LogtailAlarm::GetInstance()->Init();
LOG_INFO(sLogger, ("LoongCollectorMonitor", "init"));
Takuka0311 marked this conversation as resolved.
Show resolved Hide resolved
LoongCollectorMonitor::GetInstance()->Init();
LOG_INFO(sLogger, ("LogtailMonitor", "init"));
LogtailMonitor::GetInstance()->Init();

PluginRegistry::GetInstance()->LoadPlugins();
Expand Down Expand Up @@ -356,6 +359,7 @@ void Application::Exit() {
#endif

LogtailMonitor::GetInstance()->Stop();
LoongCollectorMonitor::GetInstance()->Stop();
LogtailAlarm::GetInstance()->Stop();
// from now on, alarm should not be used.

Expand Down
13 changes: 10 additions & 3 deletions core/event_handler/LogInput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ void LogInput::Start() {
initialized = true;

mInteruptFlag = false;

mGlobalOpenFdTotal = LoongCollectorMonitor::GetInstance()->GetGauge(METRIC_GLOBAL_OPEN_FD_TOTAL);
mGlobalRegisterHandlerTotal = LoongCollectorMonitor::GetInstance()->GetGauge(METRIC_GLOBAL_REGISTER_HANDLER_TOTAL);

new Thread([this]() { ProcessLoop(); });
}

Expand Down Expand Up @@ -341,9 +345,12 @@ void LogInput::UpdateCriticalMetric(int32_t curTime) {

LogtailMonitor::GetInstance()->UpdateMetric("event_tps",
1.0 * mEventProcessCount / (curTime - mLastUpdateMetricTime));
LogtailMonitor::GetInstance()->UpdateMetric("open_fd",
GloablFileDescriptorManager::GetInstance()->GetOpenedFilePtrSize());
LogtailMonitor::GetInstance()->UpdateMetric("register_handler", EventDispatcher::GetInstance()->GetHandlerCount());
int32_t openFdTotal = GloablFileDescriptorManager::GetInstance()->GetOpenedFilePtrSize();
LogtailMonitor::GetInstance()->UpdateMetric("open_fd", openFdTotal);
mGlobalOpenFdTotal->Set(openFdTotal);
size_t handlerCount = EventDispatcher::GetInstance()->GetHandlerCount();
LogtailMonitor::GetInstance()->UpdateMetric("register_handler", handlerCount);
mGlobalRegisterHandlerTotal->Set(handlerCount);
LogtailMonitor::GetInstance()->UpdateMetric("reader_count", CheckPointManager::Instance()->GetReaderCount());
LogtailMonitor::GetInstance()->UpdateMetric("multi_config", AppConfig::GetInstance()->IsAcceptMultiConfig());
Takuka0311 marked this conversation as resolved.
Show resolved Hide resolved
mEventProcessCount = 0;
Expand Down
3 changes: 3 additions & 0 deletions core/event_handler/LogInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "common/Lock.h"
#include "common/LogRunnable.h"
#include "monitor/Monitor.h"

namespace logtail {

Expand Down Expand Up @@ -78,6 +79,8 @@ class LogInput : public LogRunnable {
volatile bool mIdleFlag;
int32_t mEventProcessCount;
int32_t mLastUpdateMetricTime;
GaugePtr mGlobalOpenFdTotal;
GaugePtr mGlobalRegisterHandlerTotal;

std::atomic_int mLastReadEventTime{0};
mutable std::mutex mThreadRunningMux;
Expand Down
30 changes: 22 additions & 8 deletions core/monitor/LogtailMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Counter::Counter(const std::string& name, uint64_t val = 0) : mName(name), mVal(
}

uint64_t Counter::GetValue() const {
return mVal;
return mVal.load();
}

const std::string& Counter::GetName() const {
Expand All @@ -41,14 +41,14 @@ Counter* Counter::Collect() {
}

void Counter::Add(uint64_t value) {
mVal += value;
mVal.fetch_add(value);
}

Gauge::Gauge(const std::string& name, uint64_t val = 0) : mName(name), mVal(val) {
Gauge::Gauge(const std::string& name, double val = 0) : mName(name), mVal(val) {
}

uint64_t Gauge::GetValue() const {
return mVal;
double Gauge::GetValue() const {
return mVal.load();
}

const std::string& Gauge::GetName() const {
Expand All @@ -59,8 +59,8 @@ Gauge* Gauge::Collect() {
return new Gauge(mName, mVal);
}

void Gauge::Set(uint64_t value) {
mVal = value;
void Gauge::Set(double value) {
mVal.store(value);
}

MetricsRecord::MetricsRecord(LabelsPtr labels) : mLabels(labels), mDeleted(false) {
Expand All @@ -87,9 +87,15 @@ bool MetricsRecord::IsDeleted() const {
}

const LabelsPtr& MetricsRecord::GetLabels() const {
std::lock_guard<std::mutex> lock(mLabelsMutex);
Takuka0311 marked this conversation as resolved.
Show resolved Hide resolved
return mLabels;
}

void MetricsRecord::SetLabels(LabelsPtr labels) {
std::lock_guard<std::mutex> lock(mLabelsMutex);
mLabels = labels;
}

const std::vector<CounterPtr>& MetricsRecord::GetCounters() const {
return mCounters;
}
Expand All @@ -99,7 +105,11 @@ const std::vector<GaugePtr>& MetricsRecord::GetGauges() const {
}

MetricsRecord* MetricsRecord::Collect() {
MetricsRecord* metrics = new MetricsRecord(mLabels);
MetricsRecord* metrics;
Takuka0311 marked this conversation as resolved.
Show resolved Hide resolved
{
std::lock_guard<std::mutex> lock(mLabelsMutex);
metrics = new MetricsRecord(mLabels);
}
for (auto& item : mCounters) {
CounterPtr newPtr(item->Collect());
metrics->mCounters.emplace_back(newPtr);
Expand Down Expand Up @@ -129,6 +139,10 @@ void MetricsRecordRef::SetMetricsRecord(MetricsRecord* metricRecord) {
mMetrics = metricRecord;
}

void MetricsRecordRef::SetLabels(LabelsPtr labels) {
mMetrics->SetLabels(labels);
}

const LabelsPtr& MetricsRecordRef::GetLabels() const {
return mMetrics->GetLabels();
}
Expand Down
16 changes: 9 additions & 7 deletions core/monitor/LogtailMetric.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ enum class MetricType {
class Counter {
private:
std::string mName;
std::atomic_long mVal;
std::atomic_uint64_t mVal;

public:
Counter(const std::string& name, uint64_t val);
Expand All @@ -47,13 +47,13 @@ using CounterPtr = std::shared_ptr<Counter>;
class Gauge {
private:
std::string mName;
std::atomic_long mVal;
std::atomic<double> mVal;

public:
Gauge(const std::string& name, uint64_t val);
uint64_t GetValue() const;
Gauge(const std::string& name, double val);
double GetValue() const;
const std::string& GetName() const;
void Set(uint64_t val);
void Set(double val);
Gauge* Collect();
};

Expand All @@ -70,12 +70,15 @@ class MetricsRecord {
std::vector<GaugePtr> mGauges;
MetricsRecord* mNext = nullptr;

mutable std::mutex mLabelsMutex;

public:
MetricsRecord(LabelsPtr labels);
MetricsRecord() = default;
void MarkDeleted();
bool IsDeleted() const;
const LabelsPtr& GetLabels() const;
void SetLabels(LabelsPtr labels);
const std::vector<CounterPtr>& GetCounters() const;
const std::vector<GaugePtr>& GetGauges() const;
CounterPtr CreateCounter(const std::string& name);
Expand All @@ -88,8 +91,6 @@ class MetricsRecord {
class MetricsRecordRef {
private:
MetricsRecord* mMetrics = nullptr;
std::vector<CounterPtr> mCounters;
std::vector<GaugePtr> mGauges;

public:
~MetricsRecordRef();
Expand All @@ -99,6 +100,7 @@ class MetricsRecordRef {
MetricsRecordRef(MetricsRecordRef&&) = delete;
MetricsRecordRef& operator=(MetricsRecordRef&&) = delete;
void SetMetricsRecord(MetricsRecord* metricRecord);
void SetLabels(LabelsPtr labels);
const LabelsPtr& GetLabels() const;
CounterPtr CreateCounter(const std::string& name);
GaugePtr CreateGauge(const std::string& name);
Expand Down
38 changes: 35 additions & 3 deletions core/monitor/MetricConstants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,38 @@ const std::string METRIC_TOPIC_FIELD_NAME = "__topic__";
const std::string LABEL_PREFIX = "label.";
const std::string VALUE_PREFIX = "value.";

// global metrics labels

const std::string METRIC_LABEL_ALIUIDS = "aliuids";
const std::string METRIC_LABEL_INSTANCE_ID = "instance_id";
const std::string METRIC_LABEL_IP = "ip";
const std::string METRIC_LABEL_OS = "os";
const std::string METRIC_LABEL_OS_DETAIL = "os_detail";
const std::string METRIC_LABEL_PROJECTS = "projects";
const std::string METRIC_LABEL_USER_DEFINED_ID = "user_defined_id";
const std::string METRIC_LABEL_UUID = "uuid";
const std::string METRIC_LABEL_VERSION = "version";

// global metrics values

const std::string METRIC_GLOBAL_CPU = "global_cpu_used_cores";
const std::string METRIC_GLOBAL_MEMORY = "global_memory_used_mb";
const std::string METRIC_GLOBAL_OPEN_FD_TOTAL = "global_open_fd_total";
const std::string METRIC_GLOBAL_POLLING_DIR_CACHE_SIZE_TOTAL = "global_polling_dir_cache_size_total";
const std::string METRIC_GLOBAL_POLLING_FILE_CACHE_SIZE_TOTAL = "global_polling_file_cache_size_total";
const std::string METRIC_GLOBAL_POLLING_MODIFY_SIZE_TOTAL = "global_polling_modify_size_total";
const std::string METRIC_GLOBAL_REGISTER_HANDLER_TOTAL = "global_register_handler_total";
const std::string METRIC_GLOBAL_CONFIG_TOTAL = "global_config_total";
const std::string METRIC_GLOBAL_ENV_CONFIG_TOTAL = "global_env_config_total";
const std::string METRIC_GLOBAL_CRD_CONFIG_TOTAL = "global_crd_config_total";
const std::string METRIC_GLOBAL_CONSOLE_CONFIG_TOTAL = "global_console_config_total";
const std::string METRIC_GLOBAL_PLUGIN_TOTAL = "global_plugin_total";
const std::string METRIC_GLOBAL_PROCESS_QUEUE_FULL_TOTAL = "global_process_queue_full_total";
const std::string METRIC_GLOBAL_PROCESS_QUEUE_TOTAL = "global_process_queue_total";
const std::string METRIC_GLOBAL_SEND_QUEUE_FULL_TOTAL = "global_send_queue_full_total";
const std::string METRIC_GLOBAL_SEND_QUEUE_TOTAL = "global_send_queue_total";
const std::string METRIC_GLOBAL_USED_SENDING_CONCURRENCY = "global_used_sending_concurrency";

// common plugin labels
const std::string METRIC_LABEL_PROJECT = "project";
const std::string METRIC_LABEL_LOGSTORE = "logstore";
Expand All @@ -40,10 +72,10 @@ const std::string METRIC_LABEL_FILE_INODE = "file_inode";
const std::string METRIC_LABEL_FILE_NAME = "file_name";

// input file metrics
const std::string METRIC_INPUT_RECORDS_TOTAL = "input_records_total";
const std::string METRIC_INPUT_RECORDS_TOTAL = "input_records_total";
const std::string METRIC_INPUT_RECORDS_SIZE_BYTES = "input_records_size_bytes";
const std::string METRIC_INPUT_BATCH_TOTAL = "input_batch_total";
const std::string METRIC_INPUT_READ_TOTAL = "input_read_total";
const std::string METRIC_INPUT_BATCH_TOTAL = "input_batch_total";
const std::string METRIC_INPUT_READ_TOTAL = "input_read_total";
const std::string METRIC_INPUT_FILE_SIZE_BYTES = "input_file_size_bytes";
const std::string METRIC_INPUT_FILE_READ_DELAY_TIME_MS = "input_file_read_delay_time_ms";
const std::string METRIC_INPUT_FILE_OFFSET_BYTES = "input_file_offset_bytes";
Expand Down
38 changes: 35 additions & 3 deletions core/monitor/MetricConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,38 @@ extern const std::string METRIC_TOPIC_FIELD_NAME;
extern const std::string LABEL_PREFIX;
extern const std::string VALUE_PREFIX;

// global metrics labels

extern const std::string METRIC_LABEL_ALIUIDS;
extern const std::string METRIC_LABEL_INSTANCE_ID;
extern const std::string METRIC_LABEL_IP;
extern const std::string METRIC_LABEL_OS;
extern const std::string METRIC_LABEL_OS_DETAIL;
extern const std::string METRIC_LABEL_PROJECTS;
extern const std::string METRIC_LABEL_USER_DEFINED_ID;
extern const std::string METRIC_LABEL_UUID;
extern const std::string METRIC_LABEL_VERSION;

// global metrics values

extern const std::string METRIC_GLOBAL_CPU;
extern const std::string METRIC_GLOBAL_MEMORY;
extern const std::string METRIC_GLOBAL_OPEN_FD_TOTAL;
extern const std::string METRIC_GLOBAL_POLLING_DIR_CACHE_SIZE_TOTAL;
extern const std::string METRIC_GLOBAL_POLLING_FILE_CACHE_SIZE_TOTAL;
extern const std::string METRIC_GLOBAL_POLLING_MODIFY_SIZE_TOTAL;
extern const std::string METRIC_GLOBAL_REGISTER_HANDLER_TOTAL;
extern const std::string METRIC_GLOBAL_CONFIG_TOTAL;
extern const std::string METRIC_GLOBAL_ENV_CONFIG_TOTAL;
extern const std::string METRIC_GLOBAL_CRD_CONFIG_TOTAL;
extern const std::string METRIC_GLOBAL_CONSOLE_CONFIG_TOTAL;
extern const std::string METRIC_GLOBAL_PLUGIN_TOTAL;
extern const std::string METRIC_GLOBAL_PROCESS_QUEUE_FULL_TOTAL;
extern const std::string METRIC_GLOBAL_PROCESS_QUEUE_TOTAL;
extern const std::string METRIC_GLOBAL_SEND_QUEUE_FULL_TOTAL;
extern const std::string METRIC_GLOBAL_SEND_QUEUE_TOTAL;
extern const std::string METRIC_GLOBAL_USED_SENDING_CONCURRENCY;

// common plugin labels
extern const std::string METRIC_LABEL_PROJECT;
extern const std::string METRIC_LABEL_LOGSTORE;
Expand All @@ -42,10 +74,10 @@ extern const std::string METRIC_LABEL_FILE_INODE;
extern const std::string METRIC_LABEL_FILE_NAME;

// input file metrics
extern const std::string METRIC_INPUT_RECORDS_TOTAL;
extern const std::string METRIC_INPUT_RECORDS_TOTAL;
extern const std::string METRIC_INPUT_RECORDS_SIZE_BYTES;
extern const std::string METRIC_INPUT_BATCH_TOTAL;
extern const std::string METRIC_INPUT_READ_TOTAL;
extern const std::string METRIC_INPUT_BATCH_TOTAL;
extern const std::string METRIC_INPUT_READ_TOTAL;
extern const std::string METRIC_INPUT_FILE_SIZE_BYTES;
extern const std::string METRIC_INPUT_FILE_READ_DELAY_TIME_MS;
extern const std::string METRIC_INPUT_FILE_OFFSET_BYTES;
Expand Down
Loading
Loading