From 9fd9a6806067f2295bd8d8c238073ef0b2ea53dc Mon Sep 17 00:00:00 2001 From: Abingcbc Date: Tue, 19 Mar 2024 11:22:53 +0000 Subject: [PATCH 01/15] fix: refactor split state --- core/monitor/MetricConstants.cpp | 7 +- core/monitor/MetricConstants.h | 2 + core/processor/ProcessorSplitRegexNative.cpp | 304 ++++++++---------- core/processor/ProcessorSplitRegexNative.h | 3 + core/reader/LogFileReader.h | 2 - .../ProcessorSplitRegexNativeUnittest.cpp | 82 +---- 6 files changed, 162 insertions(+), 238 deletions(-) diff --git a/core/monitor/MetricConstants.cpp b/core/monitor/MetricConstants.cpp index fc9931269a..82b8027f2a 100644 --- a/core/monitor/MetricConstants.cpp +++ b/core/monitor/MetricConstants.cpp @@ -43,6 +43,11 @@ const std::string METRIC_PROC_PARSE_ERROR_TOTAL = "proc_parse_error_total"; const std::string METRIC_PROC_KEY_COUNT_NOT_MATCH_ERROR_TOTAL = "proc_key_count_not_match_error_total"; const std::string METRIC_PROC_HISTORY_FAILURE_TOTAL = "proc_history_failure_total"; +const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL + = "proc_split_multiline_log_splitted_records_total"; +const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL + = "proc_split_multiline_log_unmatched_records_total"; + // processor filter metrics const std::string METRIC_PROC_FILTER_IN_SIZE_BYTES = "proc_filter_in_size_bytes"; const std::string METRIC_PROC_FILTER_OUT_SIZE_BYTES = "proc_filter_out_size_bytes"; @@ -56,4 +61,4 @@ const std::string PLUGIN_PROCESSOR_PARSE_REGEX_NATIVE = "processor_parse_regex_n // processor desensitize metrics const std::string METRIC_PROC_DESENSITIZE_RECORDS_TOTAL = "proc_desensitize_records_total"; -} \ No newline at end of file +} // namespace logtail \ No newline at end of file diff --git a/core/monitor/MetricConstants.h b/core/monitor/MetricConstants.h index 403a6bcf25..bbed4b560c 100644 --- a/core/monitor/MetricConstants.h +++ b/core/monitor/MetricConstants.h @@ -42,6 +42,8 @@ extern const std::string METRIC_PROC_PARSE_OUT_SIZE_BYTES; extern const std::string METRIC_PROC_PARSE_ERROR_TOTAL; extern const std::string METRIC_PROC_KEY_COUNT_NOT_MATCH_ERROR_TOTAL; extern const std::string METRIC_PROC_HISTORY_FAILURE_TOTAL; +extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL; +extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL; // processor filter metrics extern const std::string METRIC_PROC_FILTER_IN_SIZE_BYTES; diff --git a/core/processor/ProcessorSplitRegexNative.cpp b/core/processor/ProcessorSplitRegexNative.cpp index 728141a58a..e37fa8bbda 100644 --- a/core/processor/ProcessorSplitRegexNative.cpp +++ b/core/processor/ProcessorSplitRegexNative.cpp @@ -26,7 +26,6 @@ #include "models/LogEvent.h" #include "monitor/MetricConstants.h" #include "plugin/instance/ProcessorInstance.h" -#include "reader/LogFileReader.h" //SplitState namespace logtail { @@ -68,6 +67,10 @@ bool ProcessorSplitRegexNative::Init(const Json::Value& config) { mFeedLines = &(GetContext().GetProcessProfile().feedLines); mSplitLines = &(GetContext().GetProcessProfile().splitLines); + mProcSplittedEventsCnt + = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL); + mProcUnmatchedEventsCnt + = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL); return true; } @@ -81,13 +84,26 @@ void ProcessorSplitRegexNative::Process(PipelineEventGroup& logGroup) { ProcessEvent(logGroup, logPath, std::move(e), newEvents); } *mSplitLines = newEvents.size(); + mProcSplittedEventsCnt->Add(newEvents.size()); logGroup.SwapEvents(newEvents); return; } bool ProcessorSplitRegexNative::IsSupportedEvent(const PipelineEventPtr& e) const { - return e.Is(); + if (e.Is()) { + return true; + } + LOG_ERROR( + mContext->GetLogger(), + ("unexpected error", "some events are not supported")("processor", sName)("config", mContext->GetConfigName())); + mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, + "unexpected error: some events are not supported.\tprocessor: " + sName + + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); + return false; } void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, @@ -109,6 +125,7 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, int feedLines = 0; bool splitSuccess = LogSplit(sourceVal.data(), sourceVal.size(), feedLines, logIndex, discardIndex, logPath); *mFeedLines += feedLines; + mProcUnmatchedEventsCnt->Add(discardIndex.size()); if (AppConfig::GetInstance()->IsLogParseAlarmValid() && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { if (!splitSuccess) { // warning if unsplittable @@ -174,155 +191,132 @@ bool ProcessorSplitRegexNative::LogSplit(const char* buffer, const StringView& logPath) { /* | -------------- | -------- \n - multiBeginIndex begIndex endIndex + multiStartIndex startIndex endIndex - multiBeginIndex: used to cache current parsing log. Clear when starting the next log. - begIndex: the begin index of the current line + multiStartIndex: used to cache current parsing log. Clear when starting the next log. + startIndex: the begin index of the current line endIndex: the end index of the current line Supported regex combination: - 1. begin - 2. begin + continue - 3. begin + end + 1. start + 2. start + continue + 3. start + end 4. continue + end 5. end */ - int multiBeginIndex = 0; - int begIndex = 0; + int multiStartIndex = 0; + int startIndex = 0; int endIndex = 0; bool anyMatched = false; lineFeed = 0; std::string exception; - SplitState state = SPLIT_UNMATCH; - while (endIndex <= size) { + bool isPartialLog = false; + if (mMultiline.GetStartPatternReg() == nullptr && mMultiline.GetContinuePatternReg() == nullptr + && mMultiline.GetEndPatternReg() != nullptr) { + // if only end pattern is given, then it will stick to this state + isPartialLog = true; + } + for (; endIndex <= size; endIndex++) { if (endIndex == size || buffer[endIndex] == '\n') { lineFeed++; exception.clear(); - // State machine with three states (SPLIT_UNMATCH, SPLIT_BEGIN, SPLIT_CONTINUE) - switch (state) { - case SPLIT_UNMATCH: - if (!mMultiline.IsMultiline()) { - // Single line log - anyMatched = true; - logIndex.emplace_back(buffer + begIndex, endIndex - begIndex); - multiBeginIndex = endIndex + 1; - break; - } else if (mMultiline.GetStartPatternReg() != nullptr) { - if (BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetStartPatternReg(), exception)) { - // Just clear old cache, task current line as the new cache - if (multiBeginIndex != begIndex) { - anyMatched = true; - logIndex[logIndex.size() - 1] = StringView(logIndex[logIndex.size() - 1].begin(), - logIndex[logIndex.size() - 1].length() - + begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; - } - state = SPLIT_BEGIN; - break; - } - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - break; - } - // ContinuePatternReg can be matched 0 or multiple times, if not match continue to try EndPatternReg - if (mMultiline.GetContinuePatternReg() != nullptr - && BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetContinuePatternReg(), exception)) { - state = SPLIT_CONTINUE; - break; - } - if (mMultiline.GetEndPatternReg() != nullptr - && BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetEndPatternReg(), exception)) { - // output logs in cache from multiBeginIndex to endIndex - anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, endIndex - multiBeginIndex); - multiBeginIndex = endIndex + 1; - break; - } - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - break; - - case SPLIT_BEGIN: - // ContinuePatternReg can be matched 0 or multiple times, if not match continue to - // try others. - if (mMultiline.GetContinuePatternReg() != nullptr - && BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetContinuePatternReg(), exception)) { - state = SPLIT_CONTINUE; - break; - } - if (mMultiline.GetEndPatternReg() != nullptr) { - if (BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetEndPatternReg(), exception)) { + if (!isPartialLog) { + // it is impossible to enter this state if only end pattern is given + boost::regex regex; + if (mMultiline.GetStartPatternReg() != nullptr) { + regex = *mMultiline.GetStartPatternReg(); + } else { + regex = *mMultiline.GetContinuePatternReg(); + } + if (BoostRegexMatch(buffer + startIndex, endIndex - startIndex, regex, exception)) { + multiStartIndex = startIndex; + isPartialLog = true; + } else if (mMultiline.GetEndPatternReg() != nullptr && mMultiline.GetStartPatternReg() == nullptr + && mMultiline.GetContinuePatternReg() != nullptr + && BoostRegexMatch( + buffer + startIndex, endIndex - startIndex, *mMultiline.GetEndPatternReg(), exception)) { + // case: continue + end + // output logs in cache from multiStartIndex to endIndex + anyMatched = true; + logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); + multiStartIndex = endIndex + 1; + } else { + HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); + multiStartIndex = endIndex + 1; + } + } else { + // case: start + continue or continue + end + if (mMultiline.GetContinuePatternReg() != nullptr + && BoostRegexMatch( + buffer + startIndex, endIndex - startIndex, *mMultiline.GetContinuePatternReg(), exception)) { + startIndex = endIndex + 1; + continue; + } + if (mMultiline.GetEndPatternReg() != nullptr) { + // case: start + end or continue + end or end + if (mMultiline.GetContinuePatternReg() != nullptr) { + // current line is not matched against the continue pattern, so the end pattern will decide if + // the current log is a match or not + if (BoostRegexMatch(buffer + startIndex, + endIndex - startIndex, + *mMultiline.GetEndPatternReg(), + exception)) { anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, endIndex - multiBeginIndex); - multiBeginIndex = endIndex + 1; - state = SPLIT_UNMATCH; + logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); + } else { + HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); } - // for case: begin unmatch end - // so logs cannot be handled as unmatch even if not match LogEngReg - } else if (mMultiline.GetStartPatternReg() != nullptr) { - anyMatched = true; - if (BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetStartPatternReg(), exception)) { - if (multiBeginIndex != begIndex) { - logIndex.emplace_back(buffer + multiBeginIndex, begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; + multiStartIndex = endIndex + 1; + isPartialLog = false; + } else { + // case: start + end or end + if (BoostRegexMatch(buffer + startIndex, + endIndex - startIndex, + *mMultiline.GetEndPatternReg(), + exception)) { + anyMatched = true; + logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); + multiStartIndex = endIndex + 1; + if (mMultiline.GetStartPatternReg() != nullptr) { + isPartialLog = false; } - } else if (mMultiline.GetContinuePatternReg() != nullptr) { - // case: begin+continue, but we meet unmatch log here - logIndex.emplace_back(buffer + multiBeginIndex, begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - state = SPLIT_UNMATCH; + // if only end pattern is given, start another log automatically } - // else case: begin+end or begin, we should keep unmatch log in the cache + // no continue pattern given, and the current line in not matched against the end pattern, so + // wait for the next line } - break; - - case SPLIT_CONTINUE: - // ContinuePatternReg can be matched 0 or multiple times, if not match continue to try others. - if (mMultiline.GetContinuePatternReg() != nullptr - && BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetContinuePatternReg(), exception)) { - break; - } - if (mMultiline.GetEndPatternReg() != nullptr) { - if (BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetEndPatternReg(), exception)) { - anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, endIndex - multiBeginIndex); - multiBeginIndex = endIndex + 1; - state = SPLIT_UNMATCH; - } else { - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - state = SPLIT_UNMATCH; + } else { + if (mMultiline.GetContinuePatternReg() == nullptr) { + // case: start + if (BoostRegexMatch(buffer + startIndex, + endIndex - startIndex, + *mMultiline.GetStartPatternReg(), + exception)) { + logIndex.emplace_back(buffer + multiStartIndex, startIndex - 1 - multiStartIndex); + multiStartIndex = startIndex; } - } else if (mMultiline.GetStartPatternReg() != nullptr) { - if (BoostRegexMatch( - buffer + begIndex, endIndex - begIndex, *mMultiline.GetStartPatternReg(), exception)) { - anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; - state = SPLIT_BEGIN; + } else { + // case: start + continue + // continue pattern is given, but current line is not matched against the continue pattern + if (!BoostRegexMatch(buffer + startIndex, + endIndex - startIndex, + *mMultiline.GetStartPatternReg(), + exception)) { + // when no end pattern is given, the only chance to enter unmatched state is when both start + // and continue pattern are given, and the current line is not matched against the start + // pattern + HandleUnmatchLogs(buffer, startIndex, endIndex, logIndex, discardIndex); + multiStartIndex = endIndex + 1; + isPartialLog = false; } else { anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - state = SPLIT_UNMATCH; + logIndex.emplace_back(buffer + multiStartIndex, startIndex - 1 - multiStartIndex); + multiStartIndex = startIndex; } - } else { - anyMatched = true; - logIndex.emplace_back(buffer + multiBeginIndex, begIndex - 1 - multiBeginIndex); - multiBeginIndex = begIndex; - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - state = SPLIT_UNMATCH; } - break; + } } - begIndex = endIndex + 1; + startIndex = endIndex + 1; if (!exception.empty()) { if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { if (GetContext().GetAlarm().IsLowLevelAlarmValid()) { @@ -340,67 +334,37 @@ bool ProcessorSplitRegexNative::LogSplit(const char* buffer, } } } - endIndex++; } - // We should clear the log from `multiBeginIndex` to `size`. - if (multiBeginIndex < size) { - if (!mMultiline.IsMultiline()) { - logIndex.emplace_back(buffer + multiBeginIndex, size - multiBeginIndex); + // when in unmatched state, the unmatched log is handled one by one, so there is no need for additional handle here + if (isPartialLog && multiStartIndex < size) { + endIndex = buffer[size - 1] == '\n' ? size - 1 : size; + if (mMultiline.GetEndPatternReg() == nullptr) { + anyMatched = true; + logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); } else { - endIndex = buffer[size - 1] == '\n' ? size - 1 : size; - if (mMultiline.GetStartPatternReg() != NULL && mMultiline.GetEndPatternReg() == NULL) { - anyMatched = true; - // If logs is unmatched, they have been handled immediately. So logs must be matched here. - logIndex.emplace_back(buffer + multiBeginIndex, endIndex - multiBeginIndex); - } else if (mMultiline.GetStartPatternReg() == NULL && mMultiline.GetContinuePatternReg() == NULL - && mMultiline.GetEndPatternReg() != NULL) { - // If there is still logs in cache, it means that there is no end line. We can handle them as unmatched. - if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::DISCARD) { - for (int i = multiBeginIndex; i <= endIndex; i++) { - if (i == endIndex || buffer[i] == '\n') { - discardIndex.emplace_back(buffer + multiBeginIndex, i - multiBeginIndex); - multiBeginIndex = i + 1; - } - } - } else if (mMultiline.mUnmatchedContentTreatment - == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { - for (int i = multiBeginIndex; i <= endIndex; i++) { - if (i == endIndex || buffer[i] == '\n') { - logIndex.emplace_back(buffer + multiBeginIndex, i - multiBeginIndex); - multiBeginIndex = i + 1; - } - } - } - } else { - HandleUnmatchLogs(buffer, multiBeginIndex, endIndex, logIndex, discardIndex); - } + HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); } } return anyMatched; } void ProcessorSplitRegexNative::HandleUnmatchLogs(const char* buffer, - int& multiBeginIndex, + int& multiStartIndex, int endIndex, std::vector& logIndex, std::vector& discardIndex) { - // Cannot determine where log is unmatched here where there is only EndPatternReg - if (mMultiline.GetStartPatternReg() == nullptr && mMultiline.GetContinuePatternReg() == nullptr - && mMultiline.GetEndPatternReg() != nullptr) { - return; - } if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::DISCARD) { - for (int i = multiBeginIndex; i <= endIndex; i++) { + for (int i = multiStartIndex; i <= endIndex; i++) { if (i == endIndex || buffer[i] == '\n') { - discardIndex.emplace_back(buffer + multiBeginIndex, i - multiBeginIndex); - multiBeginIndex = i + 1; + discardIndex.emplace_back(buffer + multiStartIndex, i - multiStartIndex); + multiStartIndex = i + 1; } } } else if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { - for (int i = multiBeginIndex; i <= endIndex; i++) { + for (int i = multiStartIndex; i <= endIndex; i++) { if (i == endIndex || buffer[i] == '\n') { - logIndex.emplace_back(buffer + multiBeginIndex, i - multiBeginIndex); - multiBeginIndex = i + 1; + logIndex.emplace_back(buffer + multiStartIndex, i - multiStartIndex); + multiStartIndex = i + 1; } } } diff --git a/core/processor/ProcessorSplitRegexNative.h b/core/processor/ProcessorSplitRegexNative.h index c5d4b16d5a..63afe5c899 100644 --- a/core/processor/ProcessorSplitRegexNative.h +++ b/core/processor/ProcessorSplitRegexNative.h @@ -61,6 +61,9 @@ class ProcessorSplitRegexNative : public Processor { int* mFeedLines = nullptr; int* mSplitLines = nullptr; + CounterPtr mProcSplittedEventsCnt; + CounterPtr mProcUnmatchedEventsCnt; + #ifdef APSARA_UNIT_TEST_MAIN friend class ProcessorSplitRegexNativeUnittest; friend class ProcessorSplitRegexDisacardUnmatchUnittest; diff --git a/core/reader/LogFileReader.h b/core/reader/LogFileReader.h index e242737522..9ed95de06c 100644 --- a/core/reader/LogFileReader.h +++ b/core/reader/LogFileReader.h @@ -47,8 +47,6 @@ class DevInode; typedef std::shared_ptr LogFileReaderPtr; typedef std::deque LogFileReaderPtrArray; -enum SplitState { SPLIT_UNMATCH, SPLIT_BEGIN, SPLIT_CONTINUE }; - // Only get the currently written log file, it will choose the last modified file to read. There are several condition // to choose the lastmodify file: // 1. if the last read file don't exist diff --git a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp index b6b52ee934..636ba400c5 100644 --- a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp @@ -36,7 +36,6 @@ class ProcessorSplitRegexNativeUnittest : public ::testing::Test { void SetUp() override { mContext.SetConfigName("project##config_0"); } void TestInit(); - void TestProcessEventSingleLine(); void TestProcessEventMultiline(); void TestProcessEventMultilineKeepUnmatch(); void TestProcessEventMultilineDiscardUnmatch(); @@ -48,7 +47,6 @@ class ProcessorSplitRegexNativeUnittest : public ::testing::Test { }; UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestInit); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventSingleLine); UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultiline); UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineKeepUnmatch); UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineDiscardUnmatch); @@ -64,73 +62,11 @@ void ProcessorSplitRegexNativeUnittest::TestInit() { config["AppendingLogPositionMeta"] = false; ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventSingleLine() { - // make config - Json::Value config; - config["StartPattern"] = ".*"; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitRegexNative processor; - processor.SetContext(mContext); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::string inJson = R"({ - "events" : - [ - { - "contents" : - { - "__file_offset__": "0", - "content" : "line1\nline2" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::string expectJson = R"({ - "events" : - [ - { - "contents" : - { - "content" : "line1" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "line2" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); -} - void ProcessorSplitRegexNativeUnittest::TestProcessEventMultiline() { // make config Json::Value config; @@ -140,6 +76,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultiline() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -211,6 +148,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineKeepUnmatch() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -297,6 +235,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineDiscardUnmatch( // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -354,6 +293,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchKeep // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -436,6 +376,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchDisc // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -476,6 +417,7 @@ void ProcessorSplitRegexNativeUnittest::TestProcess() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -563,6 +505,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -710,6 +653,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -876,6 +820,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -966,6 +911,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1102,6 +1048,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1211,6 +1158,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1404,6 +1352,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1652,6 +1601,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1768,6 +1718,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1940,6 +1891,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { // make processor ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log From 1a4ba978b9210ea25bfcab0b979ffc693c8eb92e Mon Sep 17 00:00:00 2001 From: abingcbc Date: Wed, 20 Mar 2024 02:01:13 +0000 Subject: [PATCH 02/15] fix --- core/monitor/MetricConstants.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/monitor/MetricConstants.cpp b/core/monitor/MetricConstants.cpp index 82b8027f2a..f343fef1f3 100644 --- a/core/monitor/MetricConstants.cpp +++ b/core/monitor/MetricConstants.cpp @@ -61,4 +61,4 @@ const std::string PLUGIN_PROCESSOR_PARSE_REGEX_NATIVE = "processor_parse_regex_n // processor desensitize metrics const std::string METRIC_PROC_DESENSITIZE_RECORDS_TOTAL = "proc_desensitize_records_total"; -} // namespace logtail \ No newline at end of file +} \ No newline at end of file From a14570b6c0a9f3f0463c6fb0a0745d44f0f735cd Mon Sep 17 00:00:00 2001 From: abingcbc Date: Wed, 20 Mar 2024 12:15:55 +0000 Subject: [PATCH 03/15] fix --- core/file_server/MultilineOptions.cpp | 14 + core/file_server/MultilineOptions.h | 1 + core/processor/ProcessorSplitRegexNative.cpp | 454 +++++++++--------- core/processor/ProcessorSplitRegexNative.h | 30 +- .../ProcessorSplitRegexNativeUnittest.cpp | 1 - 5 files changed, 266 insertions(+), 234 deletions(-) diff --git a/core/file_server/MultilineOptions.cpp b/core/file_server/MultilineOptions.cpp index d476df51e3..434bbb1349 100644 --- a/core/file_server/MultilineOptions.cpp +++ b/core/file_server/MultilineOptions.cpp @@ -181,4 +181,18 @@ bool MultilineOptions::ParseRegex(const string& pattern, shared_ptrGetRegion()); } + // Ignore Warning + if (!GetOptionalBoolParam(config, "IgnoreUnmatchWarning", mIgnoreUnmatchWarning, errorMsg)) { + PARAM_WARNING_DEFAULT(mContext->GetLogger(), + mContext->GetAlarm(), + errorMsg, + mIgnoreUnmatchWarning, + sName, + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); + } + if (!mMultiline.Init(config, *mContext, sName)) { return false; } @@ -64,7 +77,6 @@ bool ProcessorSplitRegexNative::Init(const Json::Value& config) { mContext->GetRegion()); } - mFeedLines = &(GetContext().GetProcessProfile().feedLines); mSplitLines = &(GetContext().GetProcessProfile().splitLines); mProcSplittedEventsCnt @@ -86,8 +98,6 @@ void ProcessorSplitRegexNative::Process(PipelineEventGroup& logGroup) { *mSplitLines = newEvents.size(); mProcSplittedEventsCnt->Add(newEvents.size()); logGroup.SwapEvents(newEvents); - - return; } bool ProcessorSplitRegexNative::IsSupportedEvent(const PipelineEventPtr& e) const { @@ -117,98 +127,22 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, const LogEvent& sourceEvent = e.Cast(); if (!sourceEvent.HasContent(mSourceKey)) { newEvents.emplace_back(std::move(e)); + LOG_ERROR(mContext->GetLogger(), + ("unexpected error", "Some events do not have the SourceKey.")("processor", sName)( + "SourceKey", mSourceKey)("config", mContext->GetConfigName())); + mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, + "unexpected error: some events do not have the sourceKey.\tSourceKey: " + + mSourceKey + "\tprocessor: " + sName + + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); return; } - StringView sourceVal = sourceEvent.GetContent(mSourceKey); - std::vector logIndex; // all splitted logs - std::vector discardIndex; // used to send warning - int feedLines = 0; - bool splitSuccess = LogSplit(sourceVal.data(), sourceVal.size(), feedLines, logIndex, discardIndex, logPath); - *mFeedLines += feedLines; - mProcUnmatchedEventsCnt->Add(discardIndex.size()); - - if (AppConfig::GetInstance()->IsLogParseAlarmValid() && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { - if (!splitSuccess) { // warning if unsplittable - GetContext().GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "split log lines fail, please check log_begin_regex, file:" - + logPath.to_string() - + ", logs:" + sourceVal.substr(0, 1024).to_string(), - GetContext().GetProjectName(), - GetContext().GetLogstoreName(), - GetContext().GetRegion()); - LOG_ERROR(GetContext().GetLogger(), - ("split log lines fail", "please check log_begin_regex")("file_name", logPath)( - "log bytes", sourceVal.size() + 1)("first 1KB log", sourceVal.substr(0, 1024).to_string())); - } - for (auto& discardData : discardIndex) { // warning if data loss - GetContext().GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "split log lines discard data, file:" + logPath.to_string() - + ", logs:" + discardData.substr(0, 1024).to_string(), - GetContext().GetProjectName(), - GetContext().GetLogstoreName(), - GetContext().GetRegion()); - LOG_WARNING( - GetContext().GetLogger(), - ("split log lines discard data", "please check log_begin_regex")("file_name", logPath)( - "log bytes", sourceVal.size() + 1)("first 1KB log", discardData.substr(0, 1024).to_string())); - } - } - if (logIndex.size() == 0) { - return; - } - long sourceoffset = 0L; - if (sourceEvent.HasContent(LOG_RESERVED_KEY_FILE_OFFSET)) { - sourceoffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method - } - StringBuffer splitKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); - for (auto& content : logIndex) { - std::unique_ptr targetEvent = logGroup.CreateLogEvent(); - targetEvent->SetTimestamp( - sourceEvent.GetTimestamp(), - sourceEvent.GetTimestampNanosecond()); // it is easy to forget other fields, better solution? - targetEvent->SetContentNoCopy(StringView(splitKey.data, splitKey.size), content); - if (mAppendingLogPositionMeta) { - auto const offset = sourceoffset + (content.data() - sourceVal.data()); - StringBuffer offsetStr = logGroup.GetSourceBuffer()->CopyString(std::to_string(offset)); - targetEvent->SetContentNoCopy(LOG_RESERVED_KEY_FILE_OFFSET, StringView(offsetStr.data, offsetStr.size)); - } - if (sourceEvent.GetContents().size() > 1) { // copy other fields - for (auto& kv : sourceEvent.GetContents()) { - if (kv.first != mSourceKey && kv.first != LOG_RESERVED_KEY_FILE_OFFSET) { - targetEvent->SetContentNoCopy(kv.first, kv.second); - } - } - } - newEvents.emplace_back(std::move(targetEvent)); - } -} - -bool ProcessorSplitRegexNative::LogSplit(const char* buffer, - int32_t size, - int32_t& lineFeed, - std::vector& logIndex, - std::vector& discardIndex, - const StringView& logPath) { - /* - | -------------- | -------- \n - multiStartIndex startIndex endIndex - multiStartIndex: used to cache current parsing log. Clear when starting the next log. - startIndex: the begin index of the current line - endIndex: the end index of the current line - - Supported regex combination: - 1. start - 2. start + continue - 3. start + end - 4. continue + end - 5. end - */ - int multiStartIndex = 0; - int startIndex = 0; - int endIndex = 0; - bool anyMatched = false; - lineFeed = 0; + StringView sourceVal = sourceEvent.GetContent(mSourceKey); + StringBuffer sourceKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); + const char* multiStartIndex = sourceVal.data(); std::string exception; bool isPartialLog = false; if (mMultiline.GetStartPatternReg() == nullptr && mMultiline.GetContinuePatternReg() == nullptr @@ -216,158 +150,236 @@ bool ProcessorSplitRegexNative::LogSplit(const char* buffer, // if only end pattern is given, then it will stick to this state isPartialLog = true; } - for (; endIndex <= size; endIndex++) { - if (endIndex == size || buffer[endIndex] == '\n') { - lineFeed++; - exception.clear(); - if (!isPartialLog) { - // it is impossible to enter this state if only end pattern is given - boost::regex regex; - if (mMultiline.GetStartPatternReg() != nullptr) { - regex = *mMultiline.GetStartPatternReg(); - } else { - regex = *mMultiline.GetContinuePatternReg(); - } - if (BoostRegexMatch(buffer + startIndex, endIndex - startIndex, regex, exception)) { - multiStartIndex = startIndex; - isPartialLog = true; - } else if (mMultiline.GetEndPatternReg() != nullptr && mMultiline.GetStartPatternReg() == nullptr - && mMultiline.GetContinuePatternReg() != nullptr - && BoostRegexMatch( - buffer + startIndex, endIndex - startIndex, *mMultiline.GetEndPatternReg(), exception)) { - // case: continue + end - // output logs in cache from multiStartIndex to endIndex - anyMatched = true; - logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); - multiStartIndex = endIndex + 1; - } else { - HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); - multiStartIndex = endIndex + 1; - } + + long sourceOffset = 0L; + if (sourceEvent.HasContent(LOG_RESERVED_KEY_FILE_OFFSET)) { + sourceOffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method + } + + size_t begin = 0; + while (begin < sourceVal.size()) { + StringView content = GetNextLine(sourceVal, begin); + if (!isPartialLog) { + // it is impossible to enter this state if only end pattern is given + boost::regex regex; + if (mMultiline.GetStartPatternReg() != nullptr) { + regex = *mMultiline.GetStartPatternReg(); } else { - // case: start + continue or continue + end - if (mMultiline.GetContinuePatternReg() != nullptr - && BoostRegexMatch( - buffer + startIndex, endIndex - startIndex, *mMultiline.GetContinuePatternReg(), exception)) { - startIndex = endIndex + 1; - continue; - } - if (mMultiline.GetEndPatternReg() != nullptr) { - // case: start + end or continue + end or end - if (mMultiline.GetContinuePatternReg() != nullptr) { - // current line is not matched against the continue pattern, so the end pattern will decide if - // the current log is a match or not - if (BoostRegexMatch(buffer + startIndex, - endIndex - startIndex, - *mMultiline.GetEndPatternReg(), - exception)) { - anyMatched = true; - logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); - } else { - HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); - } - multiStartIndex = endIndex + 1; - isPartialLog = false; + regex = *mMultiline.GetContinuePatternReg(); + } + if (BoostRegexMatch(content.data(), content.size(), regex, exception)) { + multiStartIndex = content.data(); + isPartialLog = true; + } else if (mMultiline.GetEndPatternReg() != nullptr && mMultiline.GetStartPatternReg() == nullptr + && mMultiline.GetContinuePatternReg() != nullptr + && BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { + // case: continue + end + // output logs in cache from multiStartIndex to endIndex + HandleSplittedLogs(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); + multiStartIndex = content.data() + content.size() + 1; + } else { + HandleUnmatchLogs(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents, + logPath); + multiStartIndex = content.data() + content.size() + 1; + } + } else { + // case: start + continue or continue + end + if (mMultiline.GetContinuePatternReg() != nullptr + && BoostRegexMatch(content.data(), content.size(), *mMultiline.GetContinuePatternReg(), exception)) { + begin += content.size() + 1; + continue; + } + if (mMultiline.GetEndPatternReg() != nullptr) { + // case: start + end or continue + end or end + if (mMultiline.GetContinuePatternReg() != nullptr) { + // current line is not matched against the continue pattern, so the end pattern will decide + // if the current log is a match or not + if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { + HandleSplittedLogs( + StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); } else { - // case: start + end or end - if (BoostRegexMatch(buffer + startIndex, - endIndex - startIndex, - *mMultiline.GetEndPatternReg(), - exception)) { - anyMatched = true; - logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); - multiStartIndex = endIndex + 1; - if (mMultiline.GetStartPatternReg() != nullptr) { - isPartialLog = false; - } - // if only end pattern is given, start another log automatically - } - // no continue pattern given, and the current line in not matched against the end pattern, so - // wait for the next line + HandleUnmatchLogs( + StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents, + logPath); } + multiStartIndex = content.data() + content.size() + 1; + isPartialLog = false; } else { - if (mMultiline.GetContinuePatternReg() == nullptr) { - // case: start - if (BoostRegexMatch(buffer + startIndex, - endIndex - startIndex, - *mMultiline.GetStartPatternReg(), - exception)) { - logIndex.emplace_back(buffer + multiStartIndex, startIndex - 1 - multiStartIndex); - multiStartIndex = startIndex; - } - } else { - // case: start + continue - // continue pattern is given, but current line is not matched against the continue pattern - if (!BoostRegexMatch(buffer + startIndex, - endIndex - startIndex, - *mMultiline.GetStartPatternReg(), - exception)) { - // when no end pattern is given, the only chance to enter unmatched state is when both start - // and continue pattern are given, and the current line is not matched against the start - // pattern - HandleUnmatchLogs(buffer, startIndex, endIndex, logIndex, discardIndex); - multiStartIndex = endIndex + 1; + // case: start + end or end + if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { + HandleSplittedLogs( + StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); + multiStartIndex = content.data() + content.size() + 1; + if (mMultiline.GetStartPatternReg() != nullptr) { isPartialLog = false; - } else { - anyMatched = true; - logIndex.emplace_back(buffer + multiStartIndex, startIndex - 1 - multiStartIndex); - multiStartIndex = startIndex; } + // if only end pattern is given, start another log automatically } + // no continue pattern given, and the current line in not matched against the end pattern, + // so wait for the next line } - } - startIndex = endIndex + 1; - if (!exception.empty()) { - if (AppConfig::GetInstance()->IsLogParseAlarmValid()) { - if (GetContext().GetAlarm().IsLowLevelAlarmValid()) { - LOG_ERROR(GetContext().GetLogger(), - ("regex_match in LogSplit fail, exception", exception)("project", - GetContext().GetProjectName())( - "logstore", GetContext().GetLogstoreName())("file", logPath)); + } else { + if (mMultiline.GetContinuePatternReg() == nullptr) { + // case: start + if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetStartPatternReg(), exception)) { + HandleSplittedLogs(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); + multiStartIndex = content.data(); + } + } else { + // case: start + continue + // continue pattern is given, but current line is not matched against the continue pattern + if (!BoostRegexMatch(content.data(), content.size(), *mMultiline.GetStartPatternReg(), exception)) { + // when no end pattern is given, the only chance to enter unmatched state is when both + // start and continue pattern are given, and the current line is not matched against the + // start pattern + HandleUnmatchLogs( + StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents, + logPath); + multiStartIndex = content.data() + content.size() + 1; + isPartialLog = false; + } else { + HandleSplittedLogs(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); + multiStartIndex = content.data(); } - GetContext().GetAlarm().SendAlarm(REGEX_MATCH_ALARM, - "regex_match in LogSplit fail:" + exception + ", file" - + logPath.to_string(), - GetContext().GetProjectName(), - GetContext().GetLogstoreName(), - GetContext().GetRegion()); } } } + begin += content.size() + 1; } - // when in unmatched state, the unmatched log is handled one by one, so there is no need for additional handle here - if (isPartialLog && multiStartIndex < size) { - endIndex = buffer[size - 1] == '\n' ? size - 1 : size; + // when in unmatched state, the unmatched log is handled one by one, so there is no need for additional handle + // here + if (isPartialLog && multiStartIndex - sourceVal.data() < sourceVal.size()) { + int endIndex = sourceVal[sourceVal.size() - 1] == '\n' ? sourceVal.size() - 1 : sourceVal.size(); if (mMultiline.GetEndPatternReg() == nullptr) { - anyMatched = true; - logIndex.emplace_back(buffer + multiStartIndex, endIndex - multiStartIndex); + HandleSplittedLogs(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); } else { - HandleUnmatchLogs(buffer, multiStartIndex, endIndex, logIndex, discardIndex); + HandleUnmatchLogs(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents, + logPath); } } - return anyMatched; } -void ProcessorSplitRegexNative::HandleUnmatchLogs(const char* buffer, - int& multiStartIndex, - int endIndex, - std::vector& logIndex, - std::vector& discardIndex) { - if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::DISCARD) { - for (int i = multiStartIndex; i <= endIndex; i++) { - if (i == endIndex || buffer[i] == '\n') { - discardIndex.emplace_back(buffer + multiStartIndex, i - multiStartIndex); - multiStartIndex = i + 1; +void ProcessorSplitRegexNative::HandleSplittedLogs(const StringView& content, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents) { + StringView sourceVal = sourceEvent.GetContent(mSourceKey); + std::unique_ptr targetEvent = logGroup.CreateLogEvent(); + targetEvent->SetTimestamp( + sourceEvent.GetTimestamp(), + sourceEvent.GetTimestampNanosecond()); // it is easy to forget other fields, better solution? + targetEvent->SetContentNoCopy(StringView(sourceKey.data, sourceKey.size), content); + if (mAppendingLogPositionMeta) { + auto const offset = sourceoffset + (content.data() - sourceVal.data()); + StringBuffer offsetStr = logGroup.GetSourceBuffer()->CopyString(std::to_string(offset)); + targetEvent->SetContentNoCopy(LOG_RESERVED_KEY_FILE_OFFSET, StringView(offsetStr.data, offsetStr.size)); + } + if (sourceEvent.GetContents().size() > 1) { // copy other fields + for (auto& kv : sourceEvent.GetContents()) { + if (kv.first != mSourceKey && kv.first != LOG_RESERVED_KEY_FILE_OFFSET) { + targetEvent->SetContentNoCopy(kv.first, kv.second); } } - } else if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { - for (int i = multiStartIndex; i <= endIndex; i++) { - if (i == endIndex || buffer[i] == '\n') { - logIndex.emplace_back(buffer + multiStartIndex, i - multiStartIndex); - multiStartIndex = i + 1; - } + } + newEvents.emplace_back(std::move(targetEvent)); +} + +void ProcessorSplitRegexNative::HandleUnmatchLogs(const StringView& sourceVal, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents, + StringView logPath) { + size_t begin = 0; + while (begin < sourceVal.size()) { + StringView content = GetNextLine(sourceVal, begin); + mProcUnmatchedEventsCnt->Add(1); + if (!mIgnoreUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + LOG_WARNING( + GetContext().GetLogger(), + ("unmatched log line", "please check regex")("action", mMultiline.UnmatchedContentTreatmentToString())( + "first 1KB", content.substr(0, 1024).to_string())("filepath", logPath.to_string())( + "processor", sName)("config", GetContext().GetConfigName())("log bytes", content.size() + 1)); + GetContext().GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, + "unmatched log line, first 1KB:" + content.substr(0, 1024).to_string() + + "\taction: " + mMultiline.UnmatchedContentTreatmentToString() + + "\tfilepath: " + logPath.to_string() + "\tprocessor: " + sName + + "\tconfig: " + GetContext().GetConfigName(), + GetContext().GetProjectName(), + GetContext().GetLogstoreName(), + GetContext().GetRegion()); + } + if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { + HandleSplittedLogs(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); + } + begin += content.size() + 1; + } +} + +StringView ProcessorSplitRegexNative::GetNextLine(StringView log, size_t begin) { + if (begin >= log.size()) { + return StringView(); + } + + for (size_t end = begin; end < log.size(); ++end) { + if (log[end] == mSplitChar) { + return StringView(log.data() + begin, end - begin); } } + return StringView(log.data() + begin, log.size() - begin); } } // namespace logtail diff --git a/core/processor/ProcessorSplitRegexNative.h b/core/processor/ProcessorSplitRegexNative.h index 63afe5c899..5af12a2191 100644 --- a/core/processor/ProcessorSplitRegexNative.h +++ b/core/processor/ProcessorSplitRegexNative.h @@ -31,8 +31,10 @@ class ProcessorSplitRegexNative : public Processor { static const std::string sName; std::string mSourceKey = DEFAULT_CONTENT_KEY; + char mSplitChar = '\n'; MultilineOptions mMultiline; bool mAppendingLogPositionMeta = false; + bool mIgnoreUnmatchWarning = false; const std::string& Name() const override { return sName; } bool Init(const Json::Value& config) override; @@ -46,19 +48,23 @@ class ProcessorSplitRegexNative : public Processor { const StringView& logPath, PipelineEventPtr&& e, EventsContainer& newEvents); - bool LogSplit(const char* buffer, - int32_t size, - int32_t& lineFeed, - std::vector& logIndex, - std::vector& discardIndex, - const StringView& logPath); - void HandleUnmatchLogs(const char* buffer, - int& multiBeginIndex, - int endIndex, - std::vector& logIndex, - std::vector& discardIndex); + void SplitLogByRegex(PipelineEventGroup& logGroup); + void HandleSplittedLogs(const StringView& content, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents); + void HandleUnmatchLogs(const StringView& sourceVal, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents, + StringView logPath); + + StringView GetNextLine(StringView log, size_t begin); - int* mFeedLines = nullptr; int* mSplitLines = nullptr; CounterPtr mProcSplittedEventsCnt; diff --git a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp index 636ba400c5..6dd59d12fc 100644 --- a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp @@ -472,7 +472,6 @@ void ProcessorSplitRegexNativeUnittest::TestProcess() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); // check observability - APSARA_TEST_EQUAL_FATAL(4, processor.GetContext().GetProcessProfile().feedLines); APSARA_TEST_EQUAL_FATAL(2, processor.GetContext().GetProcessProfile().splitLines); } From c469908955983f1a65960287612f13f53b09ddbc Mon Sep 17 00:00:00 2001 From: abingcbc Date: Thu, 21 Mar 2024 02:08:17 +0000 Subject: [PATCH 04/15] fix --- .../processor/ProcessorDesensitizeNativeUnittest.cpp | 1 + .../processor/ProcessorParseApsaraNativeUnittest.cpp | 1 + .../processor/ProcessorParseDelimiterNativeUnittest.cpp | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp index 5e73681ef7..77d7dd08b6 100644 --- a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp @@ -157,6 +157,7 @@ dbf@@@324 FS2$%pwd,pwd=saf543#$@,," // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processorSplitRegexNative; processorSplitRegexNative.SetContext(mContext); + processorSplitRegexNative.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processorSplitRegexNative.Init(config)); processorSplitRegexNative.Process(eventGroup); diff --git a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp index 85e765b2cd..db6cf8b74e 100644 --- a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp @@ -574,6 +574,7 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processorSplitRegexNative; processorSplitRegexNative.SetContext(mContext); + processorSplitRegexNative.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processorSplitRegexNative.Init(config)); processorSplitRegexNative.Process(eventGroup); diff --git a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp index 3ee14df93c..24893f3e16 100644 --- a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp @@ -241,6 +241,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); @@ -363,6 +364,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -514,6 +516,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -640,6 +643,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -764,6 +768,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -885,6 +890,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -1011,6 +1017,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -1228,6 +1235,7 @@ void ProcessorParseDelimiterNativeUnittest::TestProcessQuote() { // run function ProcessorSplitRegexNative ProcessorSplitRegexNative processor; processor.SetContext(mContext); + processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); From 844c31fd7b7694259ed88c92cd24a458f449cf48 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Thu, 21 Mar 2024 03:05:30 +0000 Subject: [PATCH 05/15] fix unittest --- .../ProcessorDesensitizeNativeUnittest.cpp | 2 +- .../ProcessorParseApsaraNativeUnittest.cpp | 2 +- .../ProcessorParseDelimiterNativeUnittest.cpp | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp index 77d7dd08b6..1dab8c6f36 100644 --- a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp @@ -150,7 +150,7 @@ dbf@@@324 FS2$%pwd,pwd=saf543#$@,," // make config Json::Value config = GetCastSensWordConfig("content"); std::string pluginId = "testID"; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; diff --git a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp index db6cf8b74e..137c3b268f 100644 --- a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp @@ -565,7 +565,7 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { config["KeepingSourceWhenParseSucceed"] = false; config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; diff --git a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp index 24893f3e16..2672ac7cc3 100644 --- a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp @@ -233,7 +233,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -356,7 +356,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = true; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -508,7 +508,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -635,7 +635,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -760,7 +760,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -882,7 +882,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -1009,7 +1009,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = false; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; @@ -1227,7 +1227,7 @@ void ProcessorParseDelimiterNativeUnittest::TestProcessQuote() { config["CopingRawLog"] = false; config["RenamedSourceKey"] = "__raw__"; config["AllowingShortenedFields"] = true; - config["StartPattern"] = ".*"; + config["StartPattern"] = "[a-zA-Z0-9]*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; From eeb38179e19e99485518e5745ebda3b06878d0f4 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Thu, 21 Mar 2024 06:53:47 +0000 Subject: [PATCH 06/15] fix --- core/file_server/MultilineOptions.cpp | 9 +- core/file_server/MultilineOptions.h | 3 +- core/processor/ProcessorSplitRegexNative.cpp | 132 +++++++----------- core/processor/ProcessorSplitRegexNative.h | 13 +- .../ProcessorDesensitizeNativeUnittest.cpp | 3 +- .../ProcessorParseApsaraNativeUnittest.cpp | 6 +- .../ProcessorParseDelimiterNativeUnittest.cpp | 22 ++- .../ProcessorSplitRegexNativeUnittest.cpp | 99 ++++++++----- 8 files changed, 154 insertions(+), 133 deletions(-) diff --git a/core/file_server/MultilineOptions.cpp b/core/file_server/MultilineOptions.cpp index 434bbb1349..a98c19e3c7 100644 --- a/core/file_server/MultilineOptions.cpp +++ b/core/file_server/MultilineOptions.cpp @@ -181,12 +181,13 @@ bool MultilineOptions::ParseRegex(const string& pattern, shared_ptr; } // namespace logtail diff --git a/core/processor/ProcessorSplitRegexNative.cpp b/core/processor/ProcessorSplitRegexNative.cpp index 18ba4ac987..5f3c91a638 100644 --- a/core/processor/ProcessorSplitRegexNative.cpp +++ b/core/processor/ProcessorSplitRegexNative.cpp @@ -125,11 +125,11 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, return; } const LogEvent& sourceEvent = e.Cast(); - if (!sourceEvent.HasContent(mSourceKey)) { + if (sourceEvent.GetContents().size() != 2 || !sourceEvent.HasContent(mSourceKey)) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), - ("unexpected error", "Some events do not have the SourceKey.")("processor", sName)( - "SourceKey", mSourceKey)("config", mContext->GetConfigName())); + ("unexpected error", "some events do not have the SourceKey")("SourceKey", mSourceKey)( + "processor", sName)("config", mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, "unexpected error: some events do not have the sourceKey.\tSourceKey: " + mSourceKey + "\tprocessor: " + sName @@ -151,7 +151,7 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, isPartialLog = true; } - long sourceOffset = 0L; + uint32_t sourceOffset = 0; if (sourceEvent.HasContent(LOG_RESERVED_KEY_FILE_OFFSET)) { sourceOffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method } @@ -174,23 +174,9 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, && mMultiline.GetContinuePatternReg() != nullptr && BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { // case: continue + end - // output logs in cache from multiStartIndex to endIndex - HandleSplittedLogs(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); - multiStartIndex = content.data() + content.size() + 1; + CreateNewEvent(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents); } else { - HandleUnmatchLogs(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents, - logPath); - multiStartIndex = content.data() + content.size() + 1; + HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); } } else { // case: start + continue or continue + end @@ -205,13 +191,12 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, // current line is not matched against the continue pattern, so the end pattern will decide // if the current log is a match or not if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { - HandleSplittedLogs( - StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); + CreateNewEvent(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); } else { HandleUnmatchLogs( StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), @@ -222,18 +207,16 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, newEvents, logPath); } - multiStartIndex = content.data() + content.size() + 1; isPartialLog = false; } else { // case: start + end or end if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { - HandleSplittedLogs( - StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); + CreateNewEvent(StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); multiStartIndex = content.data() + content.size() + 1; if (mMultiline.GetStartPatternReg() != nullptr) { isPartialLog = false; @@ -247,12 +230,12 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, if (mMultiline.GetContinuePatternReg() == nullptr) { // case: start if (BoostRegexMatch(content.data(), content.size(), *mMultiline.GetStartPatternReg(), exception)) { - HandleSplittedLogs(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); + CreateNewEvent(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); multiStartIndex = content.data(); } } else { @@ -270,15 +253,14 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, logGroup, newEvents, logPath); - multiStartIndex = content.data() + content.size() + 1; isPartialLog = false; } else { - HandleSplittedLogs(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); + CreateNewEvent(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); multiStartIndex = content.data(); } } @@ -291,12 +273,12 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, if (isPartialLog && multiStartIndex - sourceVal.data() < sourceVal.size()) { int endIndex = sourceVal[sourceVal.size() - 1] == '\n' ? sourceVal.size() - 1 : sourceVal.size(); if (mMultiline.GetEndPatternReg() == nullptr) { - HandleSplittedLogs(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); + CreateNewEvent(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); } else { HandleUnmatchLogs(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), sourceOffset, @@ -309,12 +291,12 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, } } -void ProcessorSplitRegexNative::HandleSplittedLogs(const StringView& content, - long sourceoffset, - StringBuffer& sourceKey, - const LogEvent& sourceEvent, - PipelineEventGroup& logGroup, - EventsContainer& newEvents) { +void ProcessorSplitRegexNative::CreateNewEvent(const StringView& content, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents) { StringView sourceVal = sourceEvent.GetContent(mSourceKey); std::unique_ptr targetEvent = logGroup.CreateLogEvent(); targetEvent->SetTimestamp( @@ -326,13 +308,6 @@ void ProcessorSplitRegexNative::HandleSplittedLogs(const StringView& content, StringBuffer offsetStr = logGroup.GetSourceBuffer()->CopyString(std::to_string(offset)); targetEvent->SetContentNoCopy(LOG_RESERVED_KEY_FILE_OFFSET, StringView(offsetStr.data, offsetStr.size)); } - if (sourceEvent.GetContents().size() > 1) { // copy other fields - for (auto& kv : sourceEvent.GetContents()) { - if (kv.first != mSourceKey && kv.first != LOG_RESERVED_KEY_FILE_OFFSET) { - targetEvent->SetContentNoCopy(kv.first, kv.second); - } - } - } newEvents.emplace_back(std::move(targetEvent)); } @@ -350,20 +325,21 @@ void ProcessorSplitRegexNative::HandleUnmatchLogs(const StringView& sourceVal, if (!mIgnoreUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING( GetContext().GetLogger(), - ("unmatched log line", "please check regex")("action", mMultiline.UnmatchedContentTreatmentToString())( + ("unmatched log line", "please check regex")( + "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( "first 1KB", content.substr(0, 1024).to_string())("filepath", logPath.to_string())( "processor", sName)("config", GetContext().GetConfigName())("log bytes", content.size() + 1)); - GetContext().GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unmatched log line, first 1KB:" + content.substr(0, 1024).to_string() - + "\taction: " + mMultiline.UnmatchedContentTreatmentToString() - + "\tfilepath: " + logPath.to_string() + "\tprocessor: " + sName - + "\tconfig: " + GetContext().GetConfigName(), - GetContext().GetProjectName(), - GetContext().GetLogstoreName(), - GetContext().GetRegion()); + GetContext().GetAlarm().SendAlarm( + SPLIT_LOG_FAIL_ALARM, + "unmatched log line, first 1KB:" + content.substr(0, 1024).to_string() + "\taction: " + + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " + + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + GetContext().GetConfigName(), + GetContext().GetProjectName(), + GetContext().GetLogstoreName(), + GetContext().GetRegion()); } if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { - HandleSplittedLogs(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); + CreateNewEvent(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); } begin += content.size() + 1; } @@ -375,7 +351,7 @@ StringView ProcessorSplitRegexNative::GetNextLine(StringView log, size_t begin) } for (size_t end = begin; end < log.size(); ++end) { - if (log[end] == mSplitChar) { + if (log[end] == '\n') { return StringView(log.data() + begin, end - begin); } } diff --git a/core/processor/ProcessorSplitRegexNative.h b/core/processor/ProcessorSplitRegexNative.h index 5af12a2191..fddb08597a 100644 --- a/core/processor/ProcessorSplitRegexNative.h +++ b/core/processor/ProcessorSplitRegexNative.h @@ -31,7 +31,6 @@ class ProcessorSplitRegexNative : public Processor { static const std::string sName; std::string mSourceKey = DEFAULT_CONTENT_KEY; - char mSplitChar = '\n'; MultilineOptions mMultiline; bool mAppendingLogPositionMeta = false; bool mIgnoreUnmatchWarning = false; @@ -49,12 +48,12 @@ class ProcessorSplitRegexNative : public Processor { PipelineEventPtr&& e, EventsContainer& newEvents); void SplitLogByRegex(PipelineEventGroup& logGroup); - void HandleSplittedLogs(const StringView& content, - long sourceoffset, - StringBuffer& sourceKey, - const LogEvent& sourceEvent, - PipelineEventGroup& logGroup, - EventsContainer& newEvents); + void CreateNewEvent(const StringView& content, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents); void HandleUnmatchLogs(const StringView& sourceVal, long sourceoffset, StringBuffer& sourceKey, diff --git a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp index 1dab8c6f36..5cb4e5d952 100644 --- a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp @@ -77,7 +77,8 @@ void ProcessorDesensitizeNativeUnittest::TestMultipleLines() { "contents" : { "content" : "asf@@@324 FS2$%pwd,pwd=saf543#$@,, -dbf@@@324 FS2$%pwd,pwd=saf543#$@,," +dbf@@@324 FS2$%pwd,pwd=saf543#$@,,", + "__file_offset__": 0 }, "timestampNanosecond" : 0, "timestamp" : 12345678901, diff --git a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp index 137c3b268f..83f31de40c 100644 --- a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp @@ -428,7 +428,8 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { { "content" : "[2023-09-04 13:15:50.1]\t[ERROR]\t[1]\t/ilogtail/AppConfigBase.cpp:1\t\tAppConfigBase AppConfigBase:1 [2023-09-04 13:15:33.2]\t[INFO]\t[2]\t/ilogtail/AppConfigBase.cpp:2\t\tAppConfigBase AppConfigBase:2 -[2023-09-04 13:15:22.3]\t[WARNING]\t[3]\t/ilogtail/AppConfigBase.cpp:3\t\tAppConfigBase AppConfigBase:3" +[2023-09-04 13:15:22.3]\t[WARNING]\t[3]\t/ilogtail/AppConfigBase.cpp:3\t\tAppConfigBase AppConfigBase:3", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -438,7 +439,8 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { { "content" : "[2023-09-04 13:15 :50.1]\t[ERROR]\t[1]\t/ilogtail/AppConfigBase.cpp:1\t\tAppConfigBase AppConfigBase:1 -[2023-09-04 13:15:22.3]\t[WARNING]\t[3]\t/ilogtail/AppConfigBase.cpp:3\t\tAppConfigBase AppConfigBase:3" +[2023-09-04 13:15:22.3]\t[WARNING]\t[3]\t/ilogtail/AppConfigBase.cpp:3\t\tAppConfigBase AppConfigBase:3", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 diff --git a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp index 2672ac7cc3..c860a102e6 100644 --- a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp @@ -142,7 +142,8 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { "contents" : { "content" : "123@@45 -012@@34" +012@@34", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -265,7 +266,8 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { "contents" : { "content" : "123@@45 -012@@34" +012@@34", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -411,7 +413,8 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { "contents" : { "content" : "123@@456@@1@@2@@3 -012@@345@@1@@2@@3" +012@@345@@1@@2@@3", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -538,7 +541,8 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { "contents" : { "content" : "123@@456@@1@@2@@3 -012@@345@@1@@2@@3" +012@@345@@1@@2@@3", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -668,7 +672,8 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { "contents" : { "content" : "123@@456 -012@@345" +012@@345", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -791,7 +796,8 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { "contents" : { "content" : "123@@456@@789 -012@@345@@678" +012@@345@@678", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -913,7 +919,8 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { "contents" : { "content" : "123@@456@@789 -012@@345@@678" +012@@345@@678", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 @@ -1123,6 +1130,7 @@ void ProcessorParseDelimiterNativeUnittest::TestProcessQuote() { 2023-12-25 1|zdfvzdfv zfdv|zfdvzdfv zfd|fzdvzdfvzdfvz|zfvzfdzv zfdb|zfdvzdfbvzb|zdfvzdfbvzdb|'advfawevaevb|dvzdfvzdbfazdb|zdfvbzdfb '|zdfbvzbszfbsfb 2023-12-25 1|zdfvzdfv zfdv|zfdvzdfv zfd|fzdvzdfvzdfvz|zfvzfdzv zfdb|zfdvzdfbvzb|zdfvzdfbvzdb|'advfawevaevb|dvzdfvzdbfazdb|zdfvbzdfb '|zdfbvzbszfbsfb 2023-12-25 1|zdfvzdfv zfdv|zfdvzdfv zfd|fzdvzdfvzdfvz|zfvzfdzv zfdb|zfdvzdfbvzb|zdfvzdfbvzdb|'advfawevaevb|dvzdfvzdbfazdb|zdfvbzdfb '|zdfbvzbszfbsfb", + "__file_offset__": 0 }, "timestamp" : 12345678901, "type" : 1 diff --git a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp index 6dd59d12fc..4da7e5c30c 100644 --- a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp @@ -92,7 +92,8 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultiline() { "content" : ")" << LOG_BEGIN_STRING << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2)" - << R"(" + << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -163,7 +164,8 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineKeepUnmatch() { { "content" : ")" << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2)" - << R"(" + << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -249,7 +251,8 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineDiscardUnmatch( "contents" : { "content" : ")" - << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2" + << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -306,7 +309,8 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchKeep { "contents" : { - "content" : "first.\nmultiline1\nsecond.\nmultiline1" + "content" : "first.\nmultiline1\nsecond.\nmultiline1", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -389,7 +393,8 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchDisc { "contents" : { - "content" : "first.\nmultiline1\nsecond.\nmultiline1" + "content" : "first.\nmultiline1\nsecond.\nmultiline1", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -520,7 +525,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_CONTINUE_STRING << R"(" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -566,7 +572,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -622,7 +629,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -667,7 +675,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -714,7 +723,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING - << R"(" + << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -760,7 +770,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -790,7 +801,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -834,7 +846,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -880,7 +893,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -926,7 +940,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_END_STRING << R"(" + << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -972,7 +987,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1018,7 +1034,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1062,7 +1079,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1108,7 +1126,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1173,7 +1192,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_CONTINUE_STRING << R"(" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1229,7 +1249,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1295,7 +1316,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1366,7 +1388,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1423,7 +1446,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING - << R"(" + << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1479,7 +1503,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1545,7 +1570,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1615,7 +1641,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1671,7 +1698,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1733,7 +1761,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { { "content" : ")" << LOG_UNMATCH << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_END_STRING << R"(" + << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1789,7 +1818,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1845,7 +1875,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1905,7 +1936,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1951,7 +1983,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 }, "timestamp" : 12345678901, "timestampNanosecond" : 0, From c1ad25912a34a74bf1a18be46002e6657507d4f8 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Thu, 21 Mar 2024 08:44:47 +0000 Subject: [PATCH 07/15] rename plugin --- core/pipeline/Pipeline.cpp | 4 +- core/plugin/PluginRegistry.cpp | 4 +- ...rocessorSplitMultilineLogStringNative.cpp} | 47 ++++---- ... ProcessorSplitMultilineLogStringNative.h} | 4 +- core/unittest/pipeline/PipelineUnittest.cpp | 4 +- core/unittest/processor/CMakeLists.txt | 6 +- .../ProcessorDesensitizeNativeUnittest.cpp | 16 +-- .../ProcessorParseApsaraNativeUnittest.cpp | 16 +-- .../ProcessorParseDelimiterNativeUnittest.cpp | 66 ++++++------ .../ProcessorParseJsonNativeUnittest.cpp | 2 +- ...SplitMultilineLogStringNativeUnittest.cpp} | 100 +++++++++--------- 11 files changed, 134 insertions(+), 135 deletions(-) rename core/processor/{ProcessorSplitRegexNative.cpp => ProcessorSplitMultilineLogStringNative.cpp} (88%) rename core/processor/{ProcessorSplitRegexNative.h => ProcessorSplitMultilineLogStringNative.h} (95%) rename core/unittest/processor/{ProcessorSplitRegexNativeUnittest.cpp => ProcessorSplitMultilineLogStringNativeUnittest.cpp} (94%) diff --git a/core/pipeline/Pipeline.cpp b/core/pipeline/Pipeline.cpp index 6032e74b66..fc7ae19701 100644 --- a/core/pipeline/Pipeline.cpp +++ b/core/pipeline/Pipeline.cpp @@ -26,7 +26,7 @@ #include "plugin/PluginRegistry.h" #include "processor/ProcessorParseApsaraNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "processor/ProcessorTagNative.h" #include "processor/daemon/LogProcess.h" @@ -113,7 +113,7 @@ bool Pipeline::Init(Config&& config) { detail["SplitChar"] = Json::Value('\0'); detail["AppendingLogPositionMeta"] = Json::Value(inputFile->mFileReader.mAppendingLogPositionMeta); } else if (inputFile->mMultiline.IsMultiline()) { - processor = PluginRegistry::GetInstance()->CreateProcessor(ProcessorSplitRegexNative::sName, + processor = PluginRegistry::GetInstance()->CreateProcessor(ProcessorSplitMultilineLogStringNative::sName, to_string(++pluginIndex)); detail["Mode"] = Json::Value("custom"); detail["StartPattern"] = Json::Value(inputFile->mMultiline.mStartPattern); diff --git a/core/plugin/PluginRegistry.cpp b/core/plugin/PluginRegistry.cpp index b2320df97b..60fd4560ba 100644 --- a/core/plugin/PluginRegistry.cpp +++ b/core/plugin/PluginRegistry.cpp @@ -46,7 +46,7 @@ #include "processor/ProcessorParseRegexNative.h" #include "processor/ProcessorParseTimestampNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "processor/ProcessorTagNative.h" #if defined(__linux__) && !defined(__ANDROID__) #include "processor/ProcessorSPL.h" @@ -250,7 +250,7 @@ void PluginRegistry::LoadStaticPlugins() { #endif RegisterProcessorCreator(new StaticProcessorCreator()); - RegisterProcessorCreator(new StaticProcessorCreator()); + RegisterProcessorCreator(new StaticProcessorCreator()); RegisterProcessorCreator(new StaticProcessorCreator()); RegisterProcessorCreator(new StaticProcessorCreator()); RegisterProcessorCreator(new StaticProcessorCreator()); diff --git a/core/processor/ProcessorSplitRegexNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp similarity index 88% rename from core/processor/ProcessorSplitRegexNative.cpp rename to core/processor/ProcessorSplitMultilineLogStringNative.cpp index 5f3c91a638..56d554c831 100644 --- a/core/processor/ProcessorSplitRegexNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "processor/ProcessorSplitRegexNative.h" - #include #include @@ -26,12 +24,13 @@ #include "models/LogEvent.h" #include "monitor/MetricConstants.h" #include "plugin/instance/ProcessorInstance.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" namespace logtail { -const std::string ProcessorSplitRegexNative::sName = "processor_split_regex_native"; +const std::string ProcessorSplitMultilineLogStringNative::sName = "processor_split_multiline_log_string_native"; -bool ProcessorSplitRegexNative::Init(const Json::Value& config) { +bool ProcessorSplitMultilineLogStringNative::Init(const Json::Value& config) { std::string errorMsg; // SourceKey @@ -86,7 +85,7 @@ bool ProcessorSplitRegexNative::Init(const Json::Value& config) { return true; } -void ProcessorSplitRegexNative::Process(PipelineEventGroup& logGroup) { +void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGroup) { if (logGroup.GetEvents().empty()) { return; } @@ -100,7 +99,7 @@ void ProcessorSplitRegexNative::Process(PipelineEventGroup& logGroup) { logGroup.SwapEvents(newEvents); } -bool ProcessorSplitRegexNative::IsSupportedEvent(const PipelineEventPtr& e) const { +bool ProcessorSplitMultilineLogStringNative::IsSupportedEvent(const PipelineEventPtr& e) const { if (e.Is()) { return true; } @@ -116,10 +115,10 @@ bool ProcessorSplitRegexNative::IsSupportedEvent(const PipelineEventPtr& e) cons return false; } -void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, - const StringView& logPath, - PipelineEventPtr&& e, - EventsContainer& newEvents) { +void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& logGroup, + const StringView& logPath, + PipelineEventPtr&& e, + EventsContainer& newEvents) { if (!IsSupportedEvent(e)) { newEvents.emplace_back(std::move(e)); return; @@ -291,12 +290,12 @@ void ProcessorSplitRegexNative::ProcessEvent(PipelineEventGroup& logGroup, } } -void ProcessorSplitRegexNative::CreateNewEvent(const StringView& content, - long sourceoffset, - StringBuffer& sourceKey, - const LogEvent& sourceEvent, - PipelineEventGroup& logGroup, - EventsContainer& newEvents) { +void ProcessorSplitMultilineLogStringNative::CreateNewEvent(const StringView& content, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents) { StringView sourceVal = sourceEvent.GetContent(mSourceKey); std::unique_ptr targetEvent = logGroup.CreateLogEvent(); targetEvent->SetTimestamp( @@ -311,13 +310,13 @@ void ProcessorSplitRegexNative::CreateNewEvent(const StringView& content, newEvents.emplace_back(std::move(targetEvent)); } -void ProcessorSplitRegexNative::HandleUnmatchLogs(const StringView& sourceVal, - long sourceoffset, - StringBuffer& sourceKey, - const LogEvent& sourceEvent, - PipelineEventGroup& logGroup, - EventsContainer& newEvents, - StringView logPath) { +void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& sourceVal, + long sourceoffset, + StringBuffer& sourceKey, + const LogEvent& sourceEvent, + PipelineEventGroup& logGroup, + EventsContainer& newEvents, + StringView logPath) { size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); @@ -345,7 +344,7 @@ void ProcessorSplitRegexNative::HandleUnmatchLogs(const StringView& sourceVal, } } -StringView ProcessorSplitRegexNative::GetNextLine(StringView log, size_t begin) { +StringView ProcessorSplitMultilineLogStringNative::GetNextLine(StringView log, size_t begin) { if (begin >= log.size()) { return StringView(); } diff --git a/core/processor/ProcessorSplitRegexNative.h b/core/processor/ProcessorSplitMultilineLogStringNative.h similarity index 95% rename from core/processor/ProcessorSplitRegexNative.h rename to core/processor/ProcessorSplitMultilineLogStringNative.h index fddb08597a..f140d770ff 100644 --- a/core/processor/ProcessorSplitRegexNative.h +++ b/core/processor/ProcessorSplitMultilineLogStringNative.h @@ -26,7 +26,7 @@ namespace logtail { -class ProcessorSplitRegexNative : public Processor { +class ProcessorSplitMultilineLogStringNative : public Processor { public: static const std::string sName; @@ -70,7 +70,7 @@ class ProcessorSplitRegexNative : public Processor { CounterPtr mProcUnmatchedEventsCnt; #ifdef APSARA_UNIT_TEST_MAIN - friend class ProcessorSplitRegexNativeUnittest; + friend class ProcessorSplitMultilineLogStringNativeUnittest; friend class ProcessorSplitRegexDisacardUnmatchUnittest; friend class ProcessorSplitRegexKeepUnmatchUnittest; #endif diff --git a/core/unittest/pipeline/PipelineUnittest.cpp b/core/unittest/pipeline/PipelineUnittest.cpp index 9b5e2753a3..726f239040 100644 --- a/core/unittest/pipeline/PipelineUnittest.cpp +++ b/core/unittest/pipeline/PipelineUnittest.cpp @@ -24,7 +24,7 @@ #include "pipeline/Pipeline.h" #include "plugin/PluginRegistry.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" using namespace std; @@ -2340,7 +2340,7 @@ void PipelineUnittest::OnInputFileWithMultiline() const { APSARA_TEST_TRUE(config->Parse()); pipeline.reset(new Pipeline()); APSARA_TEST_TRUE(pipeline->Init(std::move(*config))); - APSARA_TEST_EQUAL(ProcessorSplitRegexNative::sName, pipeline->mProcessorLine[1]->Name()); + APSARA_TEST_EQUAL(ProcessorSplitMultilineLogStringNative::sName, pipeline->mProcessorLine[1]->Name()); // json multiline configStr = R"( diff --git a/core/unittest/processor/CMakeLists.txt b/core/unittest/processor/CMakeLists.txt index 0c8262a4d1..8c9a53f8d2 100644 --- a/core/unittest/processor/CMakeLists.txt +++ b/core/unittest/processor/CMakeLists.txt @@ -18,8 +18,8 @@ project(processor_unittest) add_executable(processor_split_log_string_native_unittest ProcessorSplitLogStringNativeUnittest.cpp) target_link_libraries(processor_split_log_string_native_unittest unittest_base) -add_executable(processor_split_regex_native_unittest ProcessorSplitRegexNativeUnittest.cpp) -target_link_libraries(processor_split_regex_native_unittest unittest_base) +add_executable(processor_split_multiline_log_string_native_unittest ProcessorSplitMultilineLogStringNativeUnittest.cpp) +target_link_libraries(processor_split_multiline_log_string_native_unittest unittest_base) add_executable(processor_parse_regex_native_unittest ProcessorParseRegexNativeUnittest.cpp) target_link_libraries(processor_parse_regex_native_unittest unittest_base) @@ -47,7 +47,7 @@ target_link_libraries(processor_desensitize_native_unittest unittest_base) include(GoogleTest) gtest_discover_tests(processor_split_log_string_native_unittest) -gtest_discover_tests(processor_split_regex_native_unittest) +gtest_discover_tests(processor_split_multiline_log_string_native_unittest) gtest_discover_tests(processor_parse_regex_native_unittest) gtest_discover_tests(processor_parse_json_native_unittest) gtest_discover_tests(processor_parse_timestamp_native_unittest) diff --git a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp index 5cb4e5d952..522c7e8d99 100644 --- a/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorDesensitizeNativeUnittest.cpp @@ -16,7 +16,7 @@ #include "plugin/instance/ProcessorInstance.h" #include "processor/ProcessorDesensitizeNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" namespace logtail { @@ -141,7 +141,7 @@ dbf@@@324 FS2$%pwd,pwd=saf543#$@,,", std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -155,12 +155,12 @@ dbf@@@324 FS2$%pwd,pwd=saf543#$@,,", config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processorSplitRegexNative; - processorSplitRegexNative.SetContext(mContext); - processorSplitRegexNative.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); - APSARA_TEST_TRUE_FATAL(processorSplitRegexNative.Init(config)); - processorSplitRegexNative.Process(eventGroup); + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processorSplitMultilineLogStringNative; + processorSplitMultilineLogStringNative.SetContext(mContext); + processorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(processorSplitMultilineLogStringNative.Init(config)); + processorSplitMultilineLogStringNative.Process(eventGroup); // run function ProcessorDesensitizeNative ProcessorDesensitizeNative& processor = *(new ProcessorDesensitizeNative); diff --git a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp index 83f31de40c..b0f83ab094 100644 --- a/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseApsaraNativeUnittest.cpp @@ -21,7 +21,7 @@ #include "plugin/instance/ProcessorInstance.h" #include "processor/ProcessorParseApsaraNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" namespace logtail { @@ -552,7 +552,7 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -573,12 +573,12 @@ void ProcessorParseApsaraNativeUnittest::TestMultipleLines() { std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processorSplitRegexNative; - processorSplitRegexNative.SetContext(mContext); - processorSplitRegexNative.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); - APSARA_TEST_TRUE_FATAL(processorSplitRegexNative.Init(config)); - processorSplitRegexNative.Process(eventGroup); + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processorSplitMultilineLogStringNative; + processorSplitMultilineLogStringNative.SetContext(mContext); + processorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(processorSplitMultilineLogStringNative.Init(config)); + processorSplitMultilineLogStringNative.Process(eventGroup); // run function ProcessorParseApsaraNative ProcessorParseApsaraNative& processor = *(new ProcessorParseApsaraNative); diff --git a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp index c860a102e6..10463509aa 100644 --- a/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseDelimiterNativeUnittest.cpp @@ -20,7 +20,7 @@ #include "plugin/instance/ProcessorInstance.h" #include "processor/ProcessorParseDelimiterNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" namespace logtail { @@ -213,7 +213,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -239,10 +239,10 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); @@ -337,7 +337,7 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -363,10 +363,10 @@ void ProcessorParseDelimiterNativeUnittest::TestAllowingShortenedFields() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -489,7 +489,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -516,10 +516,10 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -618,7 +618,7 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -644,10 +644,10 @@ void ProcessorParseDelimiterNativeUnittest::TestExtend() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -744,7 +744,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -770,10 +770,10 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -868,7 +868,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -893,10 +893,10 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -995,7 +995,7 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ_FATAL(CompactJson(expectJson).c_str(), CompactJson(outJson).c_str()); } - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -1021,10 +1021,10 @@ void ProcessorParseDelimiterNativeUnittest::TestMultipleLines() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); // run function ProcessorParseDelimiterNative @@ -1206,7 +1206,7 @@ void ProcessorParseDelimiterNativeUnittest::TestProcessQuote() { } ] })"; - // ProcessorSplitRegexNative + // ProcessorSplitMultilineLogStringNative { // make events auto sourceBuffer = std::make_shared(); @@ -1240,10 +1240,10 @@ void ProcessorParseDelimiterNativeUnittest::TestProcessQuote() { config["AppendingLogPositionMeta"] = false; std::string pluginId = "testID"; - // run function ProcessorSplitRegexNative - ProcessorSplitRegexNative processor; + // run function ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(processor.Init(config)); processor.Process(eventGroup); diff --git a/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp b/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp index e4d391be2f..5041326639 100644 --- a/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp @@ -19,7 +19,7 @@ #include "plugin/instance/ProcessorInstance.h" #include "processor/ProcessorParseJsonNative.h" #include "processor/ProcessorSplitLogStringNative.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" namespace logtail { diff --git a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp similarity index 94% rename from core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp rename to core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp index 4da7e5c30c..1417e2c19b 100644 --- a/core/unittest/processor/ProcessorSplitRegexNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp @@ -18,7 +18,7 @@ #include "common/JsonUtil.h" #include "config/Config.h" #include "models/LogEvent.h" -#include "processor/ProcessorSplitRegexNative.h" +#include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" namespace logtail { @@ -31,7 +31,7 @@ const std::string LOG_END_STRING = " ...23 more"; const std::string LOG_END_REGEX = R"(\s*\.\.\.\d+ more)"; const std::string LOG_UNMATCH = "unmatch log"; -class ProcessorSplitRegexNativeUnittest : public ::testing::Test { +class ProcessorSplitMultilineLogStringNativeUnittest : public ::testing::Test { public: void SetUp() override { mContext.SetConfigName("project##config_0"); } @@ -46,37 +46,37 @@ class ProcessorSplitRegexNativeUnittest : public ::testing::Test { PipelineContext mContext; }; -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestInit); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultiline); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineKeepUnmatch); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineDiscardUnmatch); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineAllNotMatchKeepUnmatch); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcessEventMultilineAllNotMatchDiscardUnmatch); -UNIT_TEST_CASE(ProcessorSplitRegexNativeUnittest, TestProcess); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestInit); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultiline); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineKeepUnmatch); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineDiscardUnmatch); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineAllNotMatchKeepUnmatch); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineAllNotMatchDiscardUnmatch); +UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcess); -void ProcessorSplitRegexNativeUnittest::TestInit() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestInit() { // make config Json::Value config; config["StartPattern"] = ".*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventMultiline() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultiline() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -140,16 +140,16 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultiline() { APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineKeepUnmatch() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineKeepUnmatch() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -228,16 +228,16 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineKeepUnmatch() { APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineDiscardUnmatch() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineDiscardUnmatch() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -287,16 +287,16 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineDiscardUnmatch( APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchKeepUnmatch() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineAllNotMatchKeepUnmatch() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -371,16 +371,16 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchKeep APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchDiscardUnmatch() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineAllNotMatchDiscardUnmatch() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -413,16 +413,16 @@ void ProcessorSplitRegexNativeUnittest::TestProcessEventMultilineAllNotMatchDisc APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); } -void ProcessorSplitRegexNativeUnittest::TestProcess() { +void ProcessorSplitMultilineLogStringNativeUnittest::TestProcess() { // make config Json::Value config; config["StartPattern"] = "line.*"; config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = true; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); // make eventGroup @@ -507,9 +507,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -658,9 +658,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -829,9 +829,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -922,9 +922,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1062,9 +1062,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { config["UnmatchedContentTreatment"] = "discard"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1174,9 +1174,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1371,9 +1371,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1624,9 +1624,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1743,9 +1743,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log @@ -1919,9 +1919,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { config["UnmatchedContentTreatment"] = "split"; config["AppendingLogPositionMeta"] = false; // make processor - ProcessorSplitRegexNative processor; + ProcessorSplitMultilineLogStringNative processor; processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitRegexNative::sName, "1"); + processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); std::string pluginId = "testID"; APSARA_TEST_TRUE_FATAL(processor.Init(config)); { // case: complete log From 2c0f76ae89da0df33b582bf34a1a037c9197cdf2 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Thu, 21 Mar 2024 11:54:57 +0000 Subject: [PATCH 08/15] fix --- ...ProcessorSplitMultilineLogStringNative.cpp | 31 +- ...rSplitMultilineLogStringNativeUnittest.cpp | 2196 ++++++++++------- 2 files changed, 1322 insertions(+), 905 deletions(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 56d554c831..e6f78ece4c 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "processor/ProcessorSplitMultilineLogStringNative.h" + #include #include @@ -24,7 +26,6 @@ #include "models/LogEvent.h" #include "monitor/MetricConstants.h" #include "plugin/instance/ProcessorInstance.h" -#include "processor/ProcessorSplitMultilineLogStringNative.h" namespace logtail { @@ -141,13 +142,14 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo StringView sourceVal = sourceEvent.GetContent(mSourceKey); StringBuffer sourceKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); - const char* multiStartIndex = sourceVal.data(); + const char* multiStartIndex = nullptr; std::string exception; bool isPartialLog = false; if (mMultiline.GetStartPatternReg() == nullptr && mMultiline.GetContinuePatternReg() == nullptr && mMultiline.GetEndPatternReg() != nullptr) { // if only end pattern is given, then it will stick to this state isPartialLog = true; + multiStartIndex = sourceVal.data(); } uint32_t sourceOffset = 0; @@ -174,6 +176,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo && BoostRegexMatch(content.data(), content.size(), *mMultiline.GetEndPatternReg(), exception)) { // case: continue + end CreateNewEvent(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents); + multiStartIndex = content.data() + content.size() + 1; } else { HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); } @@ -216,9 +219,10 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents); - multiStartIndex = content.data() + content.size() + 1; if (mMultiline.GetStartPatternReg() != nullptr) { isPartialLog = false; + } else { + multiStartIndex = content.data() + content.size() + 1; } // if only end pattern is given, start another log automatically } @@ -240,26 +244,19 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo } else { // case: start + continue // continue pattern is given, but current line is not matched against the continue pattern + CreateNewEvent(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), + sourceOffset, + sourceKey, + sourceEvent, + logGroup, + newEvents); if (!BoostRegexMatch(content.data(), content.size(), *mMultiline.GetStartPatternReg(), exception)) { // when no end pattern is given, the only chance to enter unmatched state is when both // start and continue pattern are given, and the current line is not matched against the // start pattern - HandleUnmatchLogs( - StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents, - logPath); + HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); isPartialLog = false; } else { - CreateNewEvent(StringView(multiStartIndex, content.data() - 1 - multiStartIndex), - sourceOffset, - sourceKey, - sourceEvent, - logGroup, - newEvents); multiStartIndex = content.data(); } } diff --git a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp index 1417e2c19b..99c5e35069 100644 --- a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp @@ -11,13 +11,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - #include #include "common/Constants.h" #include "common/JsonUtil.h" #include "config/Config.h" #include "models/LogEvent.h" +#include "processor/ProcessorSplitLogStringNative.h" #include "processor/ProcessorSplitMultilineLogStringNative.h" #include "unittest/Unittest.h" @@ -31,489 +31,39 @@ const std::string LOG_END_STRING = " ...23 more"; const std::string LOG_END_REGEX = R"(\s*\.\.\.\d+ more)"; const std::string LOG_UNMATCH = "unmatch log"; -class ProcessorSplitMultilineLogStringNativeUnittest : public ::testing::Test { -public: - void SetUp() override { mContext.SetConfigName("project##config_0"); } - - void TestInit(); - void TestProcessEventMultiline(); - void TestProcessEventMultilineKeepUnmatch(); - void TestProcessEventMultilineDiscardUnmatch(); - void TestProcessEventMultilineAllNotMatchKeepUnmatch(); - void TestProcessEventMultilineAllNotMatchDiscardUnmatch(); - void TestProcess(); - - PipelineContext mContext; -}; - -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestInit); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultiline); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineKeepUnmatch); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineDiscardUnmatch); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineAllNotMatchKeepUnmatch); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcessEventMultilineAllNotMatchDiscardUnmatch); -UNIT_TEST_CASE(ProcessorSplitMultilineLogStringNativeUnittest, TestProcess); - -void ProcessorSplitMultilineLogStringNativeUnittest::TestInit() { - // make config - Json::Value config; - config["StartPattern"] = ".*"; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultiline() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::stringstream inJson; - inJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING - << R"(second.\nmultiline1\nmultiline2)" - << R"(", - "__file_offset__": 0 - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(first.\nmultiline1\nmultiline2)" - << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2)" - << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineKeepUnmatch() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::stringstream inJson; - inJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2)" - << R"(", - "__file_offset__": 0 - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : "first." - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "multiline1" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "multiline2" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2)" - << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineDiscardUnmatch() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::stringstream inJson; - inJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << R"(first.\nmultiline1\nmultiline2\n)" << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2", - "__file_offset__": 0 - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(second.\nmultiline1\nmultiline2" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineAllNotMatchKeepUnmatch() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::stringstream inJson; - inJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : "first.\nmultiline1\nsecond.\nmultiline1", - "__file_offset__": 0 - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : "first." - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "multiline1" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "second." - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : "multiline1" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcessEventMultilineAllNotMatchDiscardUnmatch() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::stringstream inJson; - inJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : "first.\nmultiline1\nsecond.\nmultiline1", - "__file_offset__": 0 - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; - // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); -} - -void ProcessorSplitMultilineLogStringNativeUnittest::TestProcess() { - // make config - Json::Value config; - config["StartPattern"] = "line.*"; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = true; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - // make eventGroup - auto sourceBuffer = std::make_shared(); - PipelineEventGroup eventGroup(sourceBuffer); - std::string inJson = R"({ - "events" : - [ - { - "contents" : - { - "__file_offset__": "0", - "content" : "line1\ncontinue\nline2\ncontinue" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - eventGroup.FromJsonString(inJson); - std::string logPath("/var/log/message"); - // run test function - processor.Process(eventGroup); - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "__file_offset__": "0", - "content" : "line1\ncontinue" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "__file_offset__": ")" - << strlen(R"(line1ncontinuen)") << R"(", - "content" : "line2\ncontinue" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); - // check observability - APSARA_TEST_EQUAL_FATAL(2, processor.GetContext().GetProcessProfile().splitLines); -} -class ProcessorSplitRegexDisacardUnmatchUnittest : public ::testing::Test { +class ProcessorSplitMultilineLogDisacardUnmatchUnittest : public ::testing::Test { public: void SetUp() override { mContext.SetConfigName("project##config_0"); } - void TestLogSplitWithBeginContinue(); void TestLogSplitWithBeginEnd(); void TestLogSplitWithBegin(); void TestLogSplitWithContinueEnd(); void TestLogSplitWithEnd(); - PipelineContext mContext; }; -UNIT_TEST_CASE(ProcessorSplitRegexDisacardUnmatchUnittest, TestLogSplitWithBeginContinue); -UNIT_TEST_CASE(ProcessorSplitRegexDisacardUnmatchUnittest, TestLogSplitWithBeginEnd); -UNIT_TEST_CASE(ProcessorSplitRegexDisacardUnmatchUnittest, TestLogSplitWithBegin); -UNIT_TEST_CASE(ProcessorSplitRegexDisacardUnmatchUnittest, TestLogSplitWithContinueEnd); -UNIT_TEST_CASE(ProcessorSplitRegexDisacardUnmatchUnittest, TestLogSplitWithEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginContinue); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBegin); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithContinueEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithEnd); -void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() { +void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() { // make config Json::Value config; config["StartPattern"] = LOG_BEGIN_REGEX; + config["SplitType"] = "regex"; config["ContinuePattern"] = LOG_CONTINUE_REGEX; config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -524,8 +74,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_CONTINUE_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -535,33 +84,15 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result - std::stringstream expectJson; - expectJson << R"({ - "events" : - [ - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - } - ] - })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } - { // case: complete log (only begin) - // make eventGroup + // case: start + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -572,7 +103,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -582,26 +113,14 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ "events" : [ - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, { "contents" : { @@ -615,10 +134,10 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + // case: unmatch + start + continue + continue + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -629,7 +148,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -639,32 +159,31 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -} - -void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // case: unmatch + start + start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -675,7 +194,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -685,11 +204,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -699,7 +216,17 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -708,10 +235,10 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: complete log - // make eventGroup + // case: unmatch + start + continue + continue + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -722,8 +249,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING - << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" + << LOG_CONTINUE_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -733,11 +260,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -747,7 +272,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -756,10 +281,10 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: incomplete log (begin) - // make eventGroup + // case: continue + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -770,7 +295,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + << LOG_CONTINUE_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -780,17 +305,29 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); - // judge result + ProcessorSplitMultilineLogStringNative.Process(eventGroup); std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup +} + +void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { + // make config + Json::Value config; + config["StartPattern"] = LOG_BEGIN_REGEX; + config["SplitType"] = "regex"; + config["EndPattern"] = LOG_END_REGEX; + config["UnmatchedContentTreatment"] = "discard"; + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -811,31 +348,44 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } -} + // case: unmatch+start+unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); -void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: unmatch+start+End+unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -846,7 +396,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(\n)" << LOG_UNMATCH + << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -856,11 +407,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -870,7 +419,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -879,10 +428,11 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + + // case: start+start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -893,7 +443,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -903,32 +453,15 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithBegin() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } -} - -void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { - // make config - Json::Value config; - config["ContinuePattern"] = LOG_CONTINUE_REGEX; - config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // case: unmatch+start+End + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -939,8 +472,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_END_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -950,11 +482,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -964,7 +494,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -973,10 +503,10 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: complete log (only end) - // make eventGroup + // case: unmatch+start+unmatch+End+unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -987,7 +517,8 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING + << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -997,11 +528,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1011,7 +540,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_END_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1020,10 +549,25 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup +} + +void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() { + // make config + Json::Value config; + config["StartPattern"] = LOG_BEGIN_REGEX; + config["SplitType"] = "regex"; + config["UnmatchedContentTreatment"] = "discard"; + + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1034,7 +578,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1044,32 +588,62 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result - std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); - } -} - -void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { - // make config - Json::Value config; - config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "discard"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup - auto sourceBuffer = std::make_shared(); + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: start + start + { + auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; inJson << R"({ @@ -1079,7 +653,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1089,11 +663,9 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1103,7 +675,17 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1112,10 +694,10 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + // case: start + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1126,7 +708,7 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1136,51 +718,803 @@ void ProcessorSplitRegexDisacardUnmatchUnittest::TestLogSplitWithEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL("null", CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } } -class ProcessorSplitRegexKeepUnmatchUnittest : public ::testing::Test { -public: - void SetUp() override { mContext.SetConfigName("project##config_0"); } +void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { + // make config + Json::Value config; + config["ContinuePattern"] = LOG_CONTINUE_REGEX; + config["EndPattern"] = LOG_END_REGEX; + config["UnmatchedContentTreatment"] = "discard"; + config["SplitType"] = "regex"; - void TestLogSplitWithBeginContinue(); - void TestLogSplitWithBeginEnd(); - void TestLogSplitWithBegin(); - void TestLogSplitWithContinueEnd(); - void TestLogSplitWithEnd(); + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); - PipelineContext mContext; -}; + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: Continue + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: Continue + Continue + end + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: continue + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: end + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_END_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); -UNIT_TEST_CASE(ProcessorSplitRegexKeepUnmatchUnittest, TestLogSplitWithBeginContinue); -UNIT_TEST_CASE(ProcessorSplitRegexKeepUnmatchUnittest, TestLogSplitWithBeginEnd); -UNIT_TEST_CASE(ProcessorSplitRegexKeepUnmatchUnittest, TestLogSplitWithBegin); -UNIT_TEST_CASE(ProcessorSplitRegexKeepUnmatchUnittest, TestLogSplitWithContinueEnd); -UNIT_TEST_CASE(ProcessorSplitRegexKeepUnmatchUnittest, TestLogSplitWithEnd); + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_END_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } +} -void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { +void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { // make config Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["ContinuePattern"] = LOG_CONTINUE_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + config["EndPattern"] = LOG_END_REGEX; + config["UnmatchedContentTreatment"] = "discard"; + config["SplitType"] = "regex"; + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: end + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_END_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_END_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); + } + // case: unmatch + end + unmatch + { + // make eventGroup + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } +} + +class ProcessorSplitMultilineLogKeepUnmatchUnittest : public ::testing::Test { +public: + void SetUp() override { mContext.SetConfigName("project##config_0"); } + void TestLogSplitWithBeginContinue(); + void TestLogSplitWithBeginEnd(); + void TestLogSplitWithBegin(); + void TestLogSplitWithContinueEnd(); + void TestLogSplitWithEnd(); + PipelineContext mContext; +}; + +UNIT_TEST_CASE(ProcessorSplitMultilineLogKeepUnmatchUnittest, TestLogSplitWithBeginContinue); +UNIT_TEST_CASE(ProcessorSplitMultilineLogKeepUnmatchUnittest, TestLogSplitWithBeginEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogKeepUnmatchUnittest, TestLogSplitWithBegin); +UNIT_TEST_CASE(ProcessorSplitMultilineLogKeepUnmatchUnittest, TestLogSplitWithContinueEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogKeepUnmatchUnittest, TestLogSplitWithEnd); + +void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { + // make config + Json::Value config; + config["StartPattern"] = LOG_BEGIN_REGEX; + config["SplitType"] = "regex"; + config["ContinuePattern"] = LOG_CONTINUE_REGEX; + config["UnmatchedContentTreatment"] = "single_line"; + + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // start + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // unmatch + start + continue + continue + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch + start + start + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch + start + continue + continue + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: continue + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } +} + +void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { + // make config + Json::Value config; + config["StartPattern"] = LOG_BEGIN_REGEX; + config["SplitType"] = "regex"; + config["EndPattern"] = LOG_END_REGEX; + config["UnmatchedContentTreatment"] = "single_line"; + + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch+start+unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1191,8 +1525,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_CONTINUE_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1202,11 +1535,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1226,7 +1557,17 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(" + << LOG_BEGIN_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1235,10 +1576,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: complete log (only begin) - // make eventGroup + // case: unmatch+start+End+unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1249,7 +1590,8 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(\n)" << LOG_UNMATCH + << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1259,11 +1601,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1283,7 +1623,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1293,7 +1633,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_BEGIN_STRING << R"(" + << LOG_UNMATCH << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1302,10 +1642,11 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + + // case: start+start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1316,7 +1657,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1326,11 +1667,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1340,7 +1679,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1350,7 +1689,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1359,25 +1698,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginContinue() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -} - -void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // case: unmatch+start+End + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1398,11 +1722,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1431,10 +1753,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: complete log - // make eventGroup + // case: unmatch+start+unmatch+End + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1456,11 +1778,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1489,10 +1809,24 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: incomplete log (begin) - // make eventGroup +} + +void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { + // make config + Json::Value config; + config["StartPattern"] = LOG_BEGIN_REGEX; + config["UnmatchedContentTreatment"] = "single_line"; + config["SplitType"] = "regex"; + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1503,7 +1837,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1513,11 +1847,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1542,7 +1874,42 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "timestamp" : 12345678901, "timestampNanosecond" : 0, "type" : 1 - }, + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ { "contents" : { @@ -1556,10 +1923,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + // case: start + start + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1570,7 +1937,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_BEGIN_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1580,11 +1947,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1594,7 +1959,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1604,7 +1969,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_BEGIN_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1613,24 +1978,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -} - -void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { - // make config - Json::Value config; - config["StartPattern"] = LOG_BEGIN_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // case: start + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1641,7 +1992,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(", + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1651,11 +2002,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1665,17 +2014,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" - }, - "timestamp" : 12345678901, - "timestampNanosecond" : 0, - "type" : 1 - }, - { - "contents" : - { - "content" : ")" - << LOG_BEGIN_STRING << R"(" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1684,10 +2023,25 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup +} + +void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { + // make config + Json::Value config; + config["ContinuePattern"] = LOG_CONTINUE_REGEX; + config["EndPattern"] = LOG_END_REGEX; + config["UnmatchedContentTreatment"] = "single_line"; + config["SplitType"] = "regex"; + + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1708,11 +2062,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1731,25 +2083,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithBegin() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -} - -void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { - // make config - Json::Value config; - config["ContinuePattern"] = LOG_CONTINUE_REGEX; - config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; - // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + // case: Continue + unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1760,8 +2097,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" - << LOG_END_STRING << R"(", + << LOG_CONTINUE_STRING << R"(\n)" << LOG_UNMATCH << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1771,11 +2107,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1785,17 +2119,17 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_CONTINUE_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, "type" : 1 }, - { + { "contents" : { "content" : ")" - << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(" + << LOG_UNMATCH << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1804,10 +2138,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: complete log (only end) - // make eventGroup + // case: Continue + Continue + end + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1818,7 +2152,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1828,11 +2162,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1842,17 +2174,52 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_CONTINUE_STRING << R"(\n)" << LOG_CONTINUE_STRING << R"(\n)" << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, "type" : 1 - }, + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + // case: continue + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ { "contents" : { "content" : ")" - << LOG_END_STRING << R"(" + << LOG_CONTINUE_STRING << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_CONTINUE_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1861,10 +2228,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + // case: end + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1875,7 +2242,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(", + << LOG_END_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1885,11 +2252,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1899,7 +2264,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(" + << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1908,24 +2273,29 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } } -void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { +void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { // make config Json::Value config; config["EndPattern"] = LOG_END_REGEX; - config["UnmatchedContentTreatment"] = "split"; - config["AppendingLogPositionMeta"] = false; + config["UnmatchedContentTreatment"] = "single_line"; + config["SplitType"] = "regex"; // make processor - ProcessorSplitMultilineLogStringNative processor; - processor.SetContext(mContext); - processor.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); - std::string pluginId = "testID"; - APSARA_TEST_TRUE_FATAL(processor.Init(config)); - { // case: complete log - // make eventGroup + + // ProcessorSplitLogStringNative + ProcessorSplitLogStringNative processorSplitLogStringNative; + processorSplitLogStringNative.SetContext(mContext); + APSARA_TEST_TRUE_FATAL(processorSplitLogStringNative.Init(config)); + // ProcessorSplitMultilineLogStringNative + ProcessorSplitMultilineLogStringNative ProcessorSplitMultilineLogStringNative; + ProcessorSplitMultilineLogStringNative.SetContext(mContext); + ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); + APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); + // case: end + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1936,7 +2306,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(", + << LOG_END_STRING << R"(", "__file_offset__": 0 }, "timestamp" : 12345678901, @@ -1946,11 +2316,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -1960,7 +2328,7 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { "contents" : { "content" : ")" - << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + << LOG_END_STRING << R"(" }, "timestamp" : 12345678901, "timestampNanosecond" : 0, @@ -1969,10 +2337,10 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - { // case: no match log - // make eventGroup + // case: unmatch + { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::stringstream inJson; @@ -1993,11 +2361,9 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { ] })"; eventGroup.FromJsonString(inJson.str()); - std::string logPath("/var/log/message"); - EventsContainer newEvents; + // run test function - processor.ProcessEvent(eventGroup, logPath, std::move(eventGroup.MutableEvents()[0]), newEvents); - eventGroup.SwapEvents(newEvents); + ProcessorSplitMultilineLogStringNative.Process(eventGroup); // judge result std::stringstream expectJson; expectJson << R"({ @@ -2016,10 +2382,64 @@ void ProcessorSplitRegexKeepUnmatchUnittest::TestLogSplitWithEnd() { ] })"; std::string outJson = eventGroup.ToJsonString(); - APSARA_TEST_STREQ_FATAL(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } -} + // case: unmatch + end + unmatch + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(\n)" << LOG_UNMATCH << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_END_STRING << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + }, + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } +} } // namespace logtail UNIT_TEST_MAIN \ No newline at end of file From bf9b90659232b528f97ff47e64513be6e2b91815 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Fri, 22 Mar 2024 05:37:22 +0000 Subject: [PATCH 09/15] fix --- core/file_server/MultilineOptions.cpp | 13 ++++ core/file_server/MultilineOptions.h | 4 +- ...ProcessorSplitMultilineLogStringNative.cpp | 31 ++++----- .../ProcessorSplitMultilineLogStringNative.h | 7 +-- ...rSplitMultilineLogStringNativeUnittest.cpp | 63 ++++++++++++++++--- 5 files changed, 86 insertions(+), 32 deletions(-) diff --git a/core/file_server/MultilineOptions.cpp b/core/file_server/MultilineOptions.cpp index a98c19e3c7..7b97858ee3 100644 --- a/core/file_server/MultilineOptions.cpp +++ b/core/file_server/MultilineOptions.cpp @@ -166,6 +166,19 @@ bool MultilineOptions::Init(const Json::Value& config, const PipelineContext& ct ctx.GetRegion()); } + // Ignore Warning + if (!GetOptionalBoolParam(config, "IgnoringUnmatchWarning", mIgnoringUnmatchWarning, errorMsg)) { + PARAM_WARNING_DEFAULT(ctx.GetLogger(), + ctx.GetAlarm(), + errorMsg, + mIgnoringUnmatchWarning, + pluginName, + ctx.GetConfigName(), + ctx.GetProjectName(), + ctx.GetLogstoreName(), + ctx.GetRegion()); + } + return true; } diff --git a/core/file_server/MultilineOptions.h b/core/file_server/MultilineOptions.h index 9b72d92d11..f9a49b4f1d 100644 --- a/core/file_server/MultilineOptions.h +++ b/core/file_server/MultilineOptions.h @@ -42,6 +42,7 @@ class MultilineOptions { std::string mContinuePattern; std::string mEndPattern; UnmatchedContentTreatment mUnmatchedContentTreatment = UnmatchedContentTreatment::SINGLE_LINE; + bool mIgnoringUnmatchWarning = false; private: bool ParseRegex(const std::string& pattern, std::shared_ptr& reg); @@ -52,7 +53,8 @@ class MultilineOptions { bool mIsMultiline = false; }; -const std::string& UnmatchedContentTreatmentToString(MultilineOptions::UnmatchedContentTreatment unmatchedContentTreatment); +const std::string& +UnmatchedContentTreatmentToString(MultilineOptions::UnmatchedContentTreatment unmatchedContentTreatment); using MultilineConfig = std::pair; diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index e6f78ece4c..991792f6c7 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -47,19 +47,6 @@ bool ProcessorSplitMultilineLogStringNative::Init(const Json::Value& config) { mContext->GetRegion()); } - // Ignore Warning - if (!GetOptionalBoolParam(config, "IgnoreUnmatchWarning", mIgnoreUnmatchWarning, errorMsg)) { - PARAM_WARNING_DEFAULT(mContext->GetLogger(), - mContext->GetAlarm(), - errorMsg, - mIgnoreUnmatchWarning, - sName, - mContext->GetConfigName(), - mContext->GetProjectName(), - mContext->GetLogstoreName(), - mContext->GetRegion()); - } - if (!mMultiline.Init(config, *mContext, sName)) { return false; } @@ -86,12 +73,18 @@ bool ProcessorSplitMultilineLogStringNative::Init(const Json::Value& config) { return true; } +/* + Presume: + 1. Events must be LogEvent + 2. This is an inner plugin, so the size of log content must equal to 2 (sourceKey, __file_offset__) + 3. The last character of each event must be \0 (set in LogFileReader) +*/ void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGroup) { if (logGroup.GetEvents().empty()) { return; } EventsContainer newEvents; - const StringView& logPath = logGroup.GetMetadata(EventGroupMetaKey::LOG_FILE_PATH_RESOLVED); + StringView logPath = logGroup.GetMetadata(EventGroupMetaKey::LOG_FILE_PATH_RESOLVED); for (PipelineEventPtr& e : logGroup.MutableEvents()) { ProcessEvent(logGroup, logPath, std::move(e), newEvents); } @@ -117,7 +110,7 @@ bool ProcessorSplitMultilineLogStringNative::IsSupportedEvent(const PipelineEven } void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& logGroup, - const StringView& logPath, + StringView logPath, PipelineEventPtr&& e, EventsContainer& newEvents) { if (!IsSupportedEvent(e)) { @@ -125,6 +118,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo return; } const LogEvent& sourceEvent = e.Cast(); + // This is an inner plugin, so the size of log content must equal to 2 (sourceKey, __file_offset__) if (sourceEvent.GetContents().size() != 2 || !sourceEvent.HasContent(mSourceKey)) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), @@ -267,16 +261,15 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo // when in unmatched state, the unmatched log is handled one by one, so there is no need for additional handle // here if (isPartialLog && multiStartIndex - sourceVal.data() < sourceVal.size()) { - int endIndex = sourceVal[sourceVal.size() - 1] == '\n' ? sourceVal.size() - 1 : sourceVal.size(); if (mMultiline.GetEndPatternReg() == nullptr) { - CreateNewEvent(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), + CreateNewEvent(StringView(multiStartIndex, sourceVal.data() + sourceVal.size() - multiStartIndex), sourceOffset, sourceKey, sourceEvent, logGroup, newEvents); } else { - HandleUnmatchLogs(StringView(multiStartIndex, sourceVal.data() + endIndex - multiStartIndex), + HandleUnmatchLogs(StringView(multiStartIndex, sourceVal.data() + sourceVal.size() - multiStartIndex), sourceOffset, sourceKey, sourceEvent, @@ -318,7 +311,7 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); mProcUnmatchedEventsCnt->Add(1); - if (!mIgnoreUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING( GetContext().GetLogger(), ("unmatched log line", "please check regex")( diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.h b/core/processor/ProcessorSplitMultilineLogStringNative.h index f140d770ff..d771e299e4 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.h +++ b/core/processor/ProcessorSplitMultilineLogStringNative.h @@ -33,7 +33,6 @@ class ProcessorSplitMultilineLogStringNative : public Processor { std::string mSourceKey = DEFAULT_CONTENT_KEY; MultilineOptions mMultiline; bool mAppendingLogPositionMeta = false; - bool mIgnoreUnmatchWarning = false; const std::string& Name() const override { return sName; } bool Init(const Json::Value& config) override; @@ -43,10 +42,8 @@ class ProcessorSplitMultilineLogStringNative : public Processor { bool IsSupportedEvent(const PipelineEventPtr& e) const override; private: - void ProcessEvent(PipelineEventGroup& logGroup, - const StringView& logPath, - PipelineEventPtr&& e, - EventsContainer& newEvents); + void + ProcessEvent(PipelineEventGroup& logGroup, StringView logPath, PipelineEventPtr&& e, EventsContainer& newEvents); void SplitLogByRegex(PipelineEventGroup& logGroup); void CreateNewEvent(const StringView& content, long sourceoffset, diff --git a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp index 99c5e35069..d03de04843 100644 --- a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp @@ -34,20 +34,24 @@ const std::string LOG_UNMATCH = "unmatch log"; class ProcessorSplitMultilineLogDisacardUnmatchUnittest : public ::testing::Test { public: - void SetUp() override { mContext.SetConfigName("project##config_0"); } void TestLogSplitWithBeginContinue(); void TestLogSplitWithBeginEnd(); void TestLogSplitWithBegin(); void TestLogSplitWithContinueEnd(); void TestLogSplitWithEnd(); + +protected: + void SetUp() override { mContext.SetConfigName("project##config_0"); } + +private: PipelineContext mContext; }; -UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginContinue); -UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginEnd); -UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBegin); -UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithContinueEnd); -UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithEnd); +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginContinue) +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBeginEnd) +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithBegin) +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithContinueEnd) +UNIT_TEST_CASE(ProcessorSplitMultilineLogDisacardUnmatchUnittest, TestLogSplitWithEnd) void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginContinue() { // make config @@ -2025,6 +2029,51 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + // case: start + unmatch + \n + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } } void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { @@ -2442,4 +2491,4 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { } } // namespace logtail -UNIT_TEST_MAIN \ No newline at end of file +UNIT_TEST_MAIN From fd070b61ecf0743dccda558fdcf98c487286e323 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Fri, 22 Mar 2024 06:41:50 +0000 Subject: [PATCH 10/15] fix --- ...ProcessorSplitMultilineLogStringNative.cpp | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 991792f6c7..b6fe41f81b 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -118,8 +118,23 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo return; } const LogEvent& sourceEvent = e.Cast(); + auto sourceIterator = sourceEvent.FindContent(mSourceKey); // This is an inner plugin, so the size of log content must equal to 2 (sourceKey, __file_offset__) - if (sourceEvent.GetContents().size() != 2 || !sourceEvent.HasContent(mSourceKey)) { + if (sourceEvent.Size() != 2) { + newEvents.emplace_back(std::move(e)); + LOG_ERROR(mContext->GetLogger(), + ("unexpected error", "size of event content doesn't equal to 2")("processor", sName)( + "config", mContext->GetConfigName())); + mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, + "unexpected error: size of event content doesn't equal to 2.\tSourceKey: " + + mSourceKey + "\tprocessor: " + sName + + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); + return; + } + if (sourceIterator == sourceEvent.end()) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), ("unexpected error", "some events do not have the SourceKey")("SourceKey", mSourceKey)( @@ -134,7 +149,12 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo return; } - StringView sourceVal = sourceEvent.GetContent(mSourceKey); + uint32_t sourceOffset = 0; + if (sourceEvent.FindContent(LOG_RESERVED_KEY_FILE_OFFSET) != sourceEvent.end()) { + sourceOffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method + } + + StringView sourceVal = sourceIterator->second; StringBuffer sourceKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); const char* multiStartIndex = nullptr; std::string exception; @@ -146,10 +166,6 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo multiStartIndex = sourceVal.data(); } - uint32_t sourceOffset = 0; - if (sourceEvent.HasContent(LOG_RESERVED_KEY_FILE_OFFSET)) { - sourceOffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method - } size_t begin = 0; while (begin < sourceVal.size()) { From 8d6f172e08fdb9f6a7af6bffa1ef9eb2460d150a Mon Sep 17 00:00:00 2001 From: Bingchang Chen Date: Thu, 21 Mar 2024 23:59:48 -0700 Subject: [PATCH 11/15] Update core/processor/ProcessorSplitMultilineLogStringNative.cpp Co-authored-by: henryzhx8 --- ...ProcessorSplitMultilineLogStringNative.cpp | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index b6fe41f81b..63d557a162 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -118,7 +118,6 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo return; } const LogEvent& sourceEvent = e.Cast(); - auto sourceIterator = sourceEvent.FindContent(mSourceKey); // This is an inner plugin, so the size of log content must equal to 2 (sourceKey, __file_offset__) if (sourceEvent.Size() != 2) { newEvents.emplace_back(std::move(e)); @@ -126,35 +125,47 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo ("unexpected error", "size of event content doesn't equal to 2")("processor", sName)( "config", mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unexpected error: size of event content doesn't equal to 2.\tSourceKey: " - + mSourceKey + "\tprocessor: " + sName + "unexpected error: size of event content doesn't equal to 2.\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), mContext->GetLogstoreName(), mContext->GetRegion()); return; } + + auto sourceIterator = sourceEvent.FindContent(mSourceKey); if (sourceIterator == sourceEvent.end()) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), - ("unexpected error", "some events do not have the SourceKey")("SourceKey", mSourceKey)( + ("unexpected error", "some events do not have the SourceKey")( "processor", sName)("config", mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unexpected error: some events do not have the sourceKey.\tSourceKey: " - + mSourceKey + "\tprocessor: " + sName + "unexpected error: some events do not have the sourceKey.\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), mContext->GetLogstoreName(), mContext->GetRegion()); return; } + StringView sourceVal = sourceIterator->second; - uint32_t sourceOffset = 0; - if (sourceEvent.FindContent(LOG_RESERVED_KEY_FILE_OFFSET) != sourceEvent.end()) { - sourceOffset = atol(sourceEvent.GetContent(LOG_RESERVED_KEY_FILE_OFFSET).data()); // use safer method + auto offsetIterator = sourceEvent.FindContent(LOG_RESERVED_KEY_FILE_OFFSET); + if (offsetIterator == sourceEvent.end()) { + newEvents.emplace_back(std::move(e)); + LOG_ERROR(mContext->GetLogger(), + ("unexpected error", "event do not have key __file_ofset__")("processor", sName)( + "config", mContext->GetConfigName())); + mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, + "unexpected error: event do not have key __file_ofset__.\tprocessor" + + sName + + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); + return; } - - StringView sourceVal = sourceIterator->second; + uint32_t sourceOffset = atol(offsetIterator->second.data()); + StringBuffer sourceKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); const char* multiStartIndex = nullptr; std::string exception; From d79c670f83f080c01153f021efa3d475ee4aa7d6 Mon Sep 17 00:00:00 2001 From: henryzhx8 Date: Mon, 25 Mar 2024 08:52:37 +0000 Subject: [PATCH 12/15] fix metric --- core/monitor/MetricConstants.cpp | 11 +-- ...ProcessorSplitMultilineLogStringNative.cpp | 73 ++++++++++--------- .../ProcessorSplitMultilineLogStringNative.h | 5 +- 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/core/monitor/MetricConstants.cpp b/core/monitor/MetricConstants.cpp index f343fef1f3..345a547e84 100644 --- a/core/monitor/MetricConstants.cpp +++ b/core/monitor/MetricConstants.cpp @@ -43,10 +43,11 @@ const std::string METRIC_PROC_PARSE_ERROR_TOTAL = "proc_parse_error_total"; const std::string METRIC_PROC_KEY_COUNT_NOT_MATCH_ERROR_TOTAL = "proc_key_count_not_match_error_total"; const std::string METRIC_PROC_HISTORY_FAILURE_TOTAL = "proc_history_failure_total"; -const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL - = "proc_split_multiline_log_splitted_records_total"; -const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL - = "proc_split_multiline_log_unmatched_records_total"; +const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_RECORDS_TOTAL + = "proc_split_multiline_log_matched_records_total"; +const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_LINES_TOTAL = "proc_split_multiline_log_matched_lines_total"; +const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_LINES_TOTAL + = "proc_split_multiline_log_unmatched_lines_total"; // processor filter metrics const std::string METRIC_PROC_FILTER_IN_SIZE_BYTES = "proc_filter_in_size_bytes"; @@ -61,4 +62,4 @@ const std::string PLUGIN_PROCESSOR_PARSE_REGEX_NATIVE = "processor_parse_regex_n // processor desensitize metrics const std::string METRIC_PROC_DESENSITIZE_RECORDS_TOTAL = "proc_desensitize_records_total"; -} \ No newline at end of file +} // namespace logtail \ No newline at end of file diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 63d557a162..9902939caf 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -64,20 +64,21 @@ bool ProcessorSplitMultilineLogStringNative::Init(const Json::Value& config) { mContext->GetRegion()); } - mSplitLines = &(GetContext().GetProcessProfile().splitLines); + mProcMatchedEventsCnt = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_RECORDS_TOTAL); + mProcMatchedLinesCnt = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_LINES_TOTAL); + mProcUnmatchedLinesCnt + = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_LINES_TOTAL); + + mSplitLines = &(mContext->GetProcessProfile().splitLines); - mProcSplittedEventsCnt - = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL); - mProcUnmatchedEventsCnt - = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL); return true; } /* - Presume: - 1. Events must be LogEvent - 2. This is an inner plugin, so the size of log content must equal to 2 (sourceKey, __file_offset__) - 3. The last character of each event must be \0 (set in LogFileReader) + Presumption: + 1. Event must be LogEvent + 2. Log content must have exactly 2 elements (sourceKey, __file_offset__) + 3. The last \n of each log string is discarded in LogFileReader */ void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGroup) { if (logGroup.GetEvents().empty()) { @@ -89,7 +90,6 @@ void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGrou ProcessEvent(logGroup, logPath, std::move(e), newEvents); } *mSplitLines = newEvents.size(); - mProcSplittedEventsCnt->Add(newEvents.size()); logGroup.SwapEvents(newEvents); } @@ -125,22 +125,22 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo ("unexpected error", "size of event content doesn't equal to 2")("processor", sName)( "config", mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unexpected error: size of event content doesn't equal to 2.\tprocessor: " + sName - + "\tconfig: " + mContext->GetConfigName(), + "unexpected error: size of event content doesn't equal to 2.\tprocessor: " + + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), mContext->GetLogstoreName(), mContext->GetRegion()); return; } - + auto sourceIterator = sourceEvent.FindContent(mSourceKey); if (sourceIterator == sourceEvent.end()) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), - ("unexpected error", "some events do not have the SourceKey")( - "processor", sName)("config", mContext->GetConfigName())); + ("unexpected error", "event does not have SourceKey")("processor", sName)("config", + mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unexpected error: some events do not have the sourceKey.\tprocessor: " + sName + "unexpected error: event does not have SourceKey.\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), mContext->GetLogstoreName(), @@ -153,11 +153,10 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo if (offsetIterator == sourceEvent.end()) { newEvents.emplace_back(std::move(e)); LOG_ERROR(mContext->GetLogger(), - ("unexpected error", "event do not have key __file_ofset__")("processor", sName)( - "config", mContext->GetConfigName())); + ("unexpected error", + "event does not have key __file_ofset__")("processor", sName)("config", mContext->GetConfigName())); mContext->GetAlarm().SendAlarm(SPLIT_LOG_FAIL_ALARM, - "unexpected error: event do not have key __file_ofset__.\tprocessor" - + sName + "unexpected error: event does not have key __file_ofset__.\tprocessor" + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), mContext->GetLogstoreName(), @@ -165,10 +164,10 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo return; } uint32_t sourceOffset = atol(offsetIterator->second.data()); - + StringBuffer sourceKey = logGroup.GetSourceBuffer()->CopyString(mSourceKey); - const char* multiStartIndex = nullptr; std::string exception; + const char* multiStartIndex = nullptr; bool isPartialLog = false; if (mMultiline.GetStartPatternReg() == nullptr && mMultiline.GetContinuePatternReg() == nullptr && mMultiline.GetEndPatternReg() != nullptr) { @@ -177,7 +176,6 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo multiStartIndex = sourceVal.data(); } - size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); @@ -198,6 +196,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo // case: continue + end CreateNewEvent(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents); multiStartIndex = content.data() + content.size() + 1; + mProcMatchedEventsCnt->Add(1); } else { HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); } @@ -220,6 +219,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents); + mProcMatchedEventsCnt->Add(1); } else { HandleUnmatchLogs( StringView(multiStartIndex, content.data() + content.size() - multiStartIndex), @@ -245,6 +245,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo } else { multiStartIndex = content.data() + content.size() + 1; } + mProcMatchedEventsCnt->Add(1); // if only end pattern is given, start another log automatically } // no continue pattern given, and the current line in not matched against the end pattern, @@ -261,6 +262,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo logGroup, newEvents); multiStartIndex = content.data(); + mProcMatchedEventsCnt->Add(1); } } else { // case: start + continue @@ -271,6 +273,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents); + mProcMatchedEventsCnt->Add(1); if (!BoostRegexMatch(content.data(), content.size(), *mMultiline.GetStartPatternReg(), exception)) { // when no end pattern is given, the only chance to enter unmatched state is when both // start and continue pattern are given, and the current line is not matched against the @@ -295,6 +298,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents); + mProcMatchedEventsCnt->Add(1); } else { HandleUnmatchLogs(StringView(multiStartIndex, sourceVal.data() + sourceVal.size() - multiStartIndex), sourceOffset, @@ -337,22 +341,21 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); - mProcUnmatchedEventsCnt->Add(1); + mProcUnmatchedLinesCnt->Add(1); if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { - LOG_WARNING( - GetContext().GetLogger(), - ("unmatched log line", "please check regex")( - "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( - "first 1KB", content.substr(0, 1024).to_string())("filepath", logPath.to_string())( - "processor", sName)("config", GetContext().GetConfigName())("log bytes", content.size() + 1)); - GetContext().GetAlarm().SendAlarm( + LOG_WARNING(mContext->GetLogger(), + ("unmatched log line", "please check regex")( + "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( + "first 1KB", content.substr(0, 1024).to_string())("filepath", logPath.to_string())( + "processor", sName)("config", mContext->GetConfigName())("log bytes", content.size() + 1)); + mContext->GetAlarm().SendAlarm( SPLIT_LOG_FAIL_ALARM, "unmatched log line, first 1KB:" + content.substr(0, 1024).to_string() + "\taction: " + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " - + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + GetContext().GetConfigName(), - GetContext().GetProjectName(), - GetContext().GetLogstoreName(), - GetContext().GetRegion()); + + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); } if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { CreateNewEvent(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.h b/core/processor/ProcessorSplitMultilineLogStringNative.h index d771e299e4..aad292f5b6 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.h +++ b/core/processor/ProcessorSplitMultilineLogStringNative.h @@ -63,8 +63,9 @@ class ProcessorSplitMultilineLogStringNative : public Processor { int* mSplitLines = nullptr; - CounterPtr mProcSplittedEventsCnt; - CounterPtr mProcUnmatchedEventsCnt; + CounterPtr mProcMatchedEventsCnt; + CounterPtr mProcMatchedLinesCnt; + CounterPtr mProcUnmatchedLinesCnt; #ifdef APSARA_UNIT_TEST_MAIN friend class ProcessorSplitMultilineLogStringNativeUnittest; From 786b81d1644fb14ecd12d5731bc779ba4446e320 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Tue, 26 Mar 2024 05:55:05 +0000 Subject: [PATCH 13/15] metrics --- core/monitor/MetricConstants.h | 5 +- ...ProcessorSplitMultilineLogStringNative.cpp | 42 ++-- .../ProcessorSplitMultilineLogStringNative.h | 6 +- ...rSplitMultilineLogStringNativeUnittest.cpp | 218 +++++++++++++++++- 4 files changed, 246 insertions(+), 25 deletions(-) diff --git a/core/monitor/MetricConstants.h b/core/monitor/MetricConstants.h index bbed4b560c..a047aa611a 100644 --- a/core/monitor/MetricConstants.h +++ b/core/monitor/MetricConstants.h @@ -42,8 +42,9 @@ extern const std::string METRIC_PROC_PARSE_OUT_SIZE_BYTES; extern const std::string METRIC_PROC_PARSE_ERROR_TOTAL; extern const std::string METRIC_PROC_KEY_COUNT_NOT_MATCH_ERROR_TOTAL; extern const std::string METRIC_PROC_HISTORY_FAILURE_TOTAL; -extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_SPLITTED_RECORDS_TOTAL; -extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_RECORDS_TOTAL; +extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_RECORDS_TOTAL; +extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_LINES_TOTAL; +extern const std::string METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_LINES_TOTAL; // processor filter metrics extern const std::string METRIC_PROC_FILTER_IN_SIZE_BYTES; diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 9902939caf..74ed1800f0 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -66,8 +66,7 @@ bool ProcessorSplitMultilineLogStringNative::Init(const Json::Value& config) { mProcMatchedEventsCnt = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_RECORDS_TOTAL); mProcMatchedLinesCnt = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_MATCHED_LINES_TOTAL); - mProcUnmatchedLinesCnt - = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_LINES_TOTAL); + mProcUnmatchedLinesCnt = GetMetricsRecordRef().CreateCounter(METRIC_PROC_SPLIT_MULTILINE_LOG_UNMATCHED_LINES_TOTAL); mSplitLines = &(mContext->GetProcessProfile().splitLines); @@ -84,12 +83,15 @@ void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGrou if (logGroup.GetEvents().empty()) { return; } + mInputLinesOneProcess = 0; + mUnmatchLinesOneProcess = 0; EventsContainer newEvents; StringView logPath = logGroup.GetMetadata(EventGroupMetaKey::LOG_FILE_PATH_RESOLVED); for (PipelineEventPtr& e : logGroup.MutableEvents()) { ProcessEvent(logGroup, logPath, std::move(e), newEvents); } - *mSplitLines = newEvents.size(); + mProcMatchedLinesCnt->Add(mInputLinesOneProcess - mUnmatchLinesOneProcess); + mProcUnmatchedLinesCnt->Add(mUnmatchLinesOneProcess); logGroup.SwapEvents(newEvents); } @@ -179,6 +181,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); + ++mInputLinesOneProcess; if (!isPartialLog) { // it is impossible to enter this state if only end pattern is given boost::regex regex; @@ -341,27 +344,28 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); - mProcUnmatchedLinesCnt->Add(1); - if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { - LOG_WARNING(mContext->GetLogger(), - ("unmatched log line", "please check regex")( - "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( - "first 1KB", content.substr(0, 1024).to_string())("filepath", logPath.to_string())( - "processor", sName)("config", mContext->GetConfigName())("log bytes", content.size() + 1)); - mContext->GetAlarm().SendAlarm( - SPLIT_LOG_FAIL_ALARM, - "unmatched log line, first 1KB:" + content.substr(0, 1024).to_string() + "\taction: " - + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " - + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), - mContext->GetProjectName(), - mContext->GetLogstoreName(), - mContext->GetRegion()); - } + ++mUnmatchLinesOneProcess; if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { CreateNewEvent(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); } begin += content.size() + 1; } + if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { + size_t warningLogSize = sourceVal.size() < 1024 ? sourceVal.size() : 1024; + LOG_WARNING(mContext->GetLogger(), + ("unmatched log line", "please check regex")( + "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( + "first 1KB:", sourceVal.substr(0, warningLogSize).to_string())("filepath", logPath.to_string())( + "processor", sName)("config", mContext->GetConfigName())("log bytes", sourceVal.size() + 1)); + mContext->GetAlarm().SendAlarm( + SPLIT_LOG_FAIL_ALARM, + "unmatched log line, first 1KB:" + sourceVal.substr(0, warningLogSize).to_string() + "\taction: " + + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " + + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), + mContext->GetProjectName(), + mContext->GetLogstoreName(), + mContext->GetRegion()); + } } StringView ProcessorSplitMultilineLogStringNative::GetNextLine(StringView log, size_t begin) { diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.h b/core/processor/ProcessorSplitMultilineLogStringNative.h index aad292f5b6..58160b4ccd 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.h +++ b/core/processor/ProcessorSplitMultilineLogStringNative.h @@ -62,6 +62,8 @@ class ProcessorSplitMultilineLogStringNative : public Processor { StringView GetNextLine(StringView log, size_t begin); int* mSplitLines = nullptr; + int mInputLinesOneProcess = 0; + int mUnmatchLinesOneProcess = 0; CounterPtr mProcMatchedEventsCnt; CounterPtr mProcMatchedLinesCnt; @@ -69,8 +71,8 @@ class ProcessorSplitMultilineLogStringNative : public Processor { #ifdef APSARA_UNIT_TEST_MAIN friend class ProcessorSplitMultilineLogStringNativeUnittest; - friend class ProcessorSplitRegexDisacardUnmatchUnittest; - friend class ProcessorSplitRegexKeepUnmatchUnittest; + friend class ProcessorSplitMultilineLogDisacardUnmatchUnittest; + friend class ProcessorSplitMultilineLogKeepUnmatchUnittest; #endif }; diff --git a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp index d03de04843..319a3af99b 100644 --- a/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorSplitMultilineLogStringNativeUnittest.cpp @@ -67,6 +67,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + unmatch + // input: 1 event, 2 lines + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -96,6 +98,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: start + unmatch + // input: 1 event, 2 lines + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -141,6 +145,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + start + continue + continue + unmatch + // input: 1 event, 5 lines + // output: 1 event, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -187,6 +193,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + start + start + // input: 1 event, 3 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -242,6 +250,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + start + continue + continue + // input: 1 event, 4 lines + // output: 1 event, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -288,6 +298,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: continue + // input: 1 event, 1 line + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -315,6 +327,14 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginCon std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 1 + 1 + 2 + 1 + 0, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 1 + 3 + 2 + 3 + 0, + ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(2 + 1 + 2 + 1 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd() { @@ -331,6 +351,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + unmatch + // input: 1 event, 2 lines + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -360,6 +382,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: unmatch+start+unmatch + // input: 1 event, 3 lines + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -389,6 +413,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: unmatch+start+End+unmatch + // input: 1 event, 4 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -434,8 +460,9 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - // case: start+start + // input: 1 event, 2 lines + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -465,6 +492,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: unmatch+start+End + // input: 1 event, 3 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -510,6 +539,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch+start+unmatch+End+unmatch + // input: 1 event, 4 lines + // output: 1 event, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -555,6 +586,14 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBeginEnd std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 0 + 1 + 0 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 0 + 2 + 0 + 2 + 3, + ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(2 + 3 + 2 + 2 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() { @@ -571,6 +610,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + start + // input: 1 event, 2 lines + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -616,6 +657,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + // input: 1 event, 1 line + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -646,6 +689,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: start + start + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -701,6 +746,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: start + unmatch + // input: 1 event, 2 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -745,6 +792,61 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithBegin() std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + // case: unmatch + start + unmatch + unmatch + // input: 1 event, 4 lines + // output: 1 event, 3 lines + { + auto sourceBuffer = std::make_shared(); + PipelineEventGroup eventGroup(sourceBuffer); + std::stringstream inJson; + inJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_UNMATCH << R"(\n)" << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH + << R"(", + "__file_offset__": 0 + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + eventGroup.FromJsonString(inJson.str()); + + // run test function + ProcessorSplitMultilineLogStringNative.Process(eventGroup); + // judge result + std::stringstream expectJson; + expectJson << R"({ + "events" : + [ + { + "contents" : + { + "content" : ")" + << LOG_BEGIN_STRING << R"(\n)" << LOG_UNMATCH << R"(\n)" << LOG_UNMATCH << R"(" + }, + "timestamp" : 12345678901, + "timestampNanosecond" : 0, + "type" : 1 + } + ] + })"; + std::string outJson = eventGroup.ToJsonString(); + APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); + } + + // metric + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2 + 2 + 3, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 1 + 0 + 0 + 1, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinueEnd() { @@ -761,6 +863,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + // input: 1 event, 1 line + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -790,6 +894,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: Continue + unmatch + // input: 1 event, 2 lines + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -819,6 +925,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: Continue + Continue + end + // input: 1 event, 3 lines + // output: 1 event, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -864,6 +972,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: continue + // input: 1 event, 1 line + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -893,6 +1003,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: end + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -937,6 +1049,13 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithContinue std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 0 + 1 + 0 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 0 + 3 + 0 + 1, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 2 + 0 + 1 + 0, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { @@ -952,6 +1071,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: end + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -997,6 +1118,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + // input: 1 event, 1 line + // output: 0 event, 0 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1026,6 +1149,8 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { APSARA_TEST_STREQ("null", CompactJson(outJson).c_str()); } // case: unmatch + end + unmatch + // input: 1 event, 3 lines + // output: 1 event, 2 lines { // make eventGroup auto sourceBuffer = std::make_shared(); @@ -1071,6 +1196,11 @@ void ProcessorSplitMultilineLogDisacardUnmatchUnittest::TestLogSplitWithEnd() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(1 + 0 + 1, ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 1 + 1, ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } class ProcessorSplitMultilineLogKeepUnmatchUnittest : public ::testing::Test { @@ -1105,6 +1235,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + unmatch + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1160,6 +1292,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // start + unmatch + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1215,6 +1349,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // unmatch + start + continue + continue + unmatch + // input: 1 event, 5 lines + // output: 3 events, 5 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1281,6 +1417,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + start + start + // input: 1 event, 3 lines + // output: 3 events, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1346,6 +1484,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + start + continue + continue + // input: 1 event, 4 lines + // output: 2 event, 4 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1402,6 +1542,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: continue + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1446,6 +1588,14 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginContinu std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 1 + 1 + 2 + 1 + 0, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 1 + 3 + 2 + 3 + 0, + ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(2 + 1 + 2 + 1 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { @@ -1463,6 +1613,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + unmatch + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1518,6 +1670,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch+start+unmatch + // input: 1 event, 3 lines + // output: 3 events, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1583,6 +1737,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch+start+End+unmatch + // input: 1 event, 4 lines + // output: 3 events, 4 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1648,8 +1804,9 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } - // case: start+start + // input: 1 event, 2 lines + // output: 2 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1705,6 +1862,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch+start+End + // input: 1 event, 3 lines + // output: 2 events, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1760,6 +1919,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch+start+unmatch+End + // input: 1 event, 4 lines + // output: 2 events, 4 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1815,6 +1976,14 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBeginEnd() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 0 + 1 + 0 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 0 + 2 + 0 + 2 + 3, + ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(2 + 3 + 2 + 2 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { @@ -1830,6 +1999,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + start + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1885,6 +2056,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1930,6 +2103,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: start + start + // input: 1 event, 2 lines + // output: 2 events, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -1985,6 +2160,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: start + unmatch + // input: 1 event, 2 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2030,6 +2207,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: start + unmatch + \n + // input: 1 event, 2 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2074,6 +2253,13 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithBegin() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2 + 1 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2 + 2 + 2, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 1 + 0 + 0 + 0, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd() { @@ -2090,6 +2276,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: unmatch + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2135,6 +2323,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: Continue + unmatch + // input: 1 event, 2 lines + // output: 1 event, 2 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2190,6 +2380,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: Continue + Continue + end + // input: 1 event, 3 lines + // output: 1 event, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2235,6 +2427,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: continue + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2280,6 +2474,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: end + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2324,6 +2520,13 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithContinueEnd( std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(0 + 0 + 1 + 0 + 1, + ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 0 + 3 + 0 + 1, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 2 + 0 + 1 + 0, + ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { @@ -2344,6 +2547,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { ProcessorSplitMultilineLogStringNative.SetMetricsRecordRef(ProcessorSplitMultilineLogStringNative::sName, "1"); APSARA_TEST_TRUE_FATAL(ProcessorSplitMultilineLogStringNative.Init(config)); // case: end + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2389,6 +2594,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + // input: 1 event, 1 line + // output: 1 event, 1 line { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2434,6 +2641,8 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } // case: unmatch + end + unmatch + // input: 1 event, 3 lines + // output: 2 events, 3 lines { auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); @@ -2488,6 +2697,11 @@ void ProcessorSplitMultilineLogKeepUnmatchUnittest::TestLogSplitWithEnd() { std::string outJson = eventGroup.ToJsonString(); APSARA_TEST_STREQ(CompactJson(expectJson.str()).c_str(), CompactJson(outJson).c_str()); } + + // metric + APSARA_TEST_EQUAL_FATAL(1 + 0 + 1, ProcessorSplitMultilineLogStringNative.mProcMatchedEventsCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(1 + 0 + 2, ProcessorSplitMultilineLogStringNative.mProcMatchedLinesCnt->GetValue()); + APSARA_TEST_EQUAL_FATAL(0 + 1 + 1, ProcessorSplitMultilineLogStringNative.mProcUnmatchedLinesCnt->GetValue()); } } // namespace logtail From a924ecfcbc27b1ae2818ba23a1cfec2c571ed0b7 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Tue, 26 Mar 2024 06:09:33 +0000 Subject: [PATCH 14/15] only log the first line when unmatch --- .../ProcessorSplitMultilineLogStringNative.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 74ed1800f0..27bf7578ec 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -341,7 +341,7 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& PipelineEventGroup& logGroup, EventsContainer& newEvents, StringView logPath) { - size_t begin = 0; + size_t begin, fisrtLogSize, totalLines = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); ++mUnmatchLinesOneProcess; @@ -349,17 +349,21 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& CreateNewEvent(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); } begin += content.size() + 1; + ++totalLines; + if (fisrtLogSize <= 0) { + fisrtLogSize = content.size(); + } } if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { - size_t warningLogSize = sourceVal.size() < 1024 ? sourceVal.size() : 1024; LOG_WARNING(mContext->GetLogger(), ("unmatched log line", "please check regex")( "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( - "first 1KB:", sourceVal.substr(0, warningLogSize).to_string())("filepath", logPath.to_string())( - "processor", sName)("config", mContext->GetConfigName())("log bytes", sourceVal.size() + 1)); + "first log:", sourceVal.substr(0, fisrtLogSize).to_string())("filepath", logPath.to_string())( + "processor", sName)("config", mContext->GetConfigName())("total lines", totalLines)( + "log bytes", sourceVal.size() + 1)); mContext->GetAlarm().SendAlarm( SPLIT_LOG_FAIL_ALARM, - "unmatched log line, first 1KB:" + sourceVal.substr(0, warningLogSize).to_string() + "\taction: " + "unmatched log line, first log:" + sourceVal.substr(0, fisrtLogSize).to_string() + "\taction: " + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), From 0711cbbfef39efde5bedfb8fdf29f5b2fae30863 Mon Sep 17 00:00:00 2001 From: abingcbc Date: Tue, 26 Mar 2024 09:23:51 +0000 Subject: [PATCH 15/15] fix --- ...ProcessorSplitMultilineLogStringNative.cpp | 41 +++++++++++-------- .../ProcessorSplitMultilineLogStringNative.h | 13 +++--- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.cpp b/core/processor/ProcessorSplitMultilineLogStringNative.cpp index 27bf7578ec..dbd45f30f7 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.cpp +++ b/core/processor/ProcessorSplitMultilineLogStringNative.cpp @@ -83,15 +83,15 @@ void ProcessorSplitMultilineLogStringNative::Process(PipelineEventGroup& logGrou if (logGroup.GetEvents().empty()) { return; } - mInputLinesOneProcess = 0; - mUnmatchLinesOneProcess = 0; + int inputLines = 0; + int unmatchLines = 0; EventsContainer newEvents; StringView logPath = logGroup.GetMetadata(EventGroupMetaKey::LOG_FILE_PATH_RESOLVED); for (PipelineEventPtr& e : logGroup.MutableEvents()) { - ProcessEvent(logGroup, logPath, std::move(e), newEvents); + ProcessEvent(logGroup, logPath, std::move(e), newEvents, &inputLines, &unmatchLines); } - mProcMatchedLinesCnt->Add(mInputLinesOneProcess - mUnmatchLinesOneProcess); - mProcUnmatchedLinesCnt->Add(mUnmatchLinesOneProcess); + mProcMatchedLinesCnt->Add(inputLines - unmatchLines); + mProcUnmatchedLinesCnt->Add(unmatchLines); logGroup.SwapEvents(newEvents); } @@ -114,7 +114,9 @@ bool ProcessorSplitMultilineLogStringNative::IsSupportedEvent(const PipelineEven void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& logGroup, StringView logPath, PipelineEventPtr&& e, - EventsContainer& newEvents) { + EventsContainer& newEvents, + int* inputLines, + int* unmatchLines) { if (!IsSupportedEvent(e)) { newEvents.emplace_back(std::move(e)); return; @@ -181,7 +183,7 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo size_t begin = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); - ++mInputLinesOneProcess; + ++(*inputLines); if (!isPartialLog) { // it is impossible to enter this state if only end pattern is given boost::regex regex; @@ -201,7 +203,8 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo multiStartIndex = content.data() + content.size() + 1; mProcMatchedEventsCnt->Add(1); } else { - HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); + HandleUnmatchLogs( + content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath, unmatchLines); } } else { // case: start + continue or continue + end @@ -231,7 +234,8 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents, - logPath); + logPath, + unmatchLines); } isPartialLog = false; } else { @@ -281,7 +285,8 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo // when no end pattern is given, the only chance to enter unmatched state is when both // start and continue pattern are given, and the current line is not matched against the // start pattern - HandleUnmatchLogs(content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath); + HandleUnmatchLogs( + content, sourceOffset, sourceKey, sourceEvent, logGroup, newEvents, logPath, unmatchLines); isPartialLog = false; } else { multiStartIndex = content.data(); @@ -309,7 +314,8 @@ void ProcessorSplitMultilineLogStringNative::ProcessEvent(PipelineEventGroup& lo sourceEvent, logGroup, newEvents, - logPath); + logPath, + unmatchLines); } } } @@ -340,30 +346,31 @@ void ProcessorSplitMultilineLogStringNative::HandleUnmatchLogs(const StringView& const LogEvent& sourceEvent, PipelineEventGroup& logGroup, EventsContainer& newEvents, - StringView logPath) { + StringView logPath, + int* unmatchLines) { size_t begin, fisrtLogSize, totalLines = 0; while (begin < sourceVal.size()) { StringView content = GetNextLine(sourceVal, begin); - ++mUnmatchLinesOneProcess; + ++(*unmatchLines); if (mMultiline.mUnmatchedContentTreatment == MultilineOptions::UnmatchedContentTreatment::SINGLE_LINE) { CreateNewEvent(content, sourceoffset, sourceKey, sourceEvent, logGroup, newEvents); } begin += content.size() + 1; ++totalLines; - if (fisrtLogSize <= 0) { + if (fisrtLogSize == 0) { fisrtLogSize = content.size(); } } if (!mMultiline.mIgnoringUnmatchWarning && LogtailAlarm::GetInstance()->IsLowLevelAlarmValid()) { LOG_WARNING(mContext->GetLogger(), - ("unmatched log line", "please check regex")( + ("unmatched log string", "please check regex")( "action", UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment))( - "first log:", sourceVal.substr(0, fisrtLogSize).to_string())("filepath", logPath.to_string())( + "first line:", sourceVal.substr(0, fisrtLogSize).to_string())("filepath", logPath.to_string())( "processor", sName)("config", mContext->GetConfigName())("total lines", totalLines)( "log bytes", sourceVal.size() + 1)); mContext->GetAlarm().SendAlarm( SPLIT_LOG_FAIL_ALARM, - "unmatched log line, first log:" + sourceVal.substr(0, fisrtLogSize).to_string() + "\taction: " + "unmatched log string, first line:" + sourceVal.substr(0, fisrtLogSize).to_string() + "\taction: " + UnmatchedContentTreatmentToString(mMultiline.mUnmatchedContentTreatment) + "\tfilepath: " + logPath.to_string() + "\tprocessor: " + sName + "\tconfig: " + mContext->GetConfigName(), mContext->GetProjectName(), diff --git a/core/processor/ProcessorSplitMultilineLogStringNative.h b/core/processor/ProcessorSplitMultilineLogStringNative.h index 58160b4ccd..ad35f7ba30 100644 --- a/core/processor/ProcessorSplitMultilineLogStringNative.h +++ b/core/processor/ProcessorSplitMultilineLogStringNative.h @@ -42,8 +42,12 @@ class ProcessorSplitMultilineLogStringNative : public Processor { bool IsSupportedEvent(const PipelineEventPtr& e) const override; private: - void - ProcessEvent(PipelineEventGroup& logGroup, StringView logPath, PipelineEventPtr&& e, EventsContainer& newEvents); + void ProcessEvent(PipelineEventGroup& logGroup, + StringView logPath, + PipelineEventPtr&& e, + EventsContainer& newEvents, + int* inputLines, + int* unmatchLines); void SplitLogByRegex(PipelineEventGroup& logGroup); void CreateNewEvent(const StringView& content, long sourceoffset, @@ -57,13 +61,12 @@ class ProcessorSplitMultilineLogStringNative : public Processor { const LogEvent& sourceEvent, PipelineEventGroup& logGroup, EventsContainer& newEvents, - StringView logPath); + StringView logPath, + int* unmatchLines); StringView GetNextLine(StringView log, size_t begin); int* mSplitLines = nullptr; - int mInputLinesOneProcess = 0; - int mUnmatchLinesOneProcess = 0; CounterPtr mProcMatchedEventsCnt; CounterPtr mProcMatchedLinesCnt;