diff --git a/.chloggen/clickhouseexporter_update_default_logs_table.yaml b/.chloggen/clickhouseexporter_update_default_logs_table.yaml new file mode 100644 index 000000000000..17b1c4eac345 --- /dev/null +++ b/.chloggen/clickhouseexporter_update_default_logs_table.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: clickhouseexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Updated the default logs table to a more optimized schema + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33611] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: Simplified data types, improved partitioning and time range queries. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/exporter/clickhouseexporter/README.md b/exporter/clickhouseexporter/README.md index c6d4928be6ab..af68563bac8c 100644 --- a/exporter/clickhouseexporter/README.md +++ b/exporter/clickhouseexporter/README.md @@ -43,7 +43,7 @@ as [ClickHouse document says:](https://clickhouse.com/docs/en/introduction/perfo - Get log severity count time series. ```clickhouse -SELECT toDateTime(toStartOfInterval(Timestamp, INTERVAL 60 second)) as time, SeverityText, count() as count +SELECT toDateTime(toStartOfInterval(TimestampTime, INTERVAL 60 second)) as time, SeverityText, count() as count FROM otel_logs WHERE time >= NOW() - INTERVAL 1 HOUR GROUP BY SeverityText, time @@ -55,7 +55,7 @@ ORDER BY time; ```clickhouse SELECT Timestamp as log_time, Body FROM otel_logs -WHERE Timestamp >= NOW() - INTERVAL 1 HOUR +WHERE TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -65,7 +65,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE ServiceName = 'clickhouse-exporter' - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -75,7 +75,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE LogAttributes['container_name'] = '/example_flog_1' - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -85,7 +85,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE hasToken(Body, 'http') - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -95,7 +95,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE Body like '%http%' - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -105,7 +105,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE match(Body, 'http') - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` @@ -115,7 +115,7 @@ Limit 100; SELECT Timestamp as log_time, Body FROM otel_logs WHERE JSONExtractFloat(Body, 'bytes') > 1000 - AND Timestamp >= NOW() - INTERVAL 1 HOUR + AND TimestampTime >= NOW() - INTERVAL 1 HOUR Limit 100; ``` diff --git a/exporter/clickhouseexporter/example/default_ddl/logs.sql b/exporter/clickhouseexporter/example/default_ddl/logs.sql index e98d65defd73..c177f1495d3b 100644 --- a/exporter/clickhouseexporter/example/default_ddl/logs.sql +++ b/exporter/clickhouseexporter/example/default_ddl/logs.sql @@ -1,21 +1,24 @@ -- Default Logs table DDL CREATE TABLE IF NOT EXISTS otel_logs ( - Timestamp DateTime64(9) CODEC(Delta, ZSTD(1)), + Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)), + TimestampDate Date DEFAULT toDate(Timestamp), + TimestampTime DateTime DEFAULT toDateTime(Timestamp), TraceId String CODEC(ZSTD(1)), SpanId String CODEC(ZSTD(1)), - TraceFlags UInt32 CODEC(ZSTD(1)), + TraceFlags UInt8, SeverityText LowCardinality(String) CODEC(ZSTD(1)), - SeverityNumber Int32 CODEC(ZSTD(1)), + SeverityNumber UInt8, ServiceName LowCardinality(String) CODEC(ZSTD(1)), Body String CODEC(ZSTD(1)), - ResourceSchemaUrl String CODEC(ZSTD(1)), + ResourceSchemaUrl LowCardinality(String) CODEC(ZSTD(1)), ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), - ScopeSchemaUrl String CODEC(ZSTD(1)), + ScopeSchemaUrl LowCardinality(String) CODEC(ZSTD(1)), ScopeName String CODEC(ZSTD(1)), - ScopeVersion String CODEC(ZSTD(1)), + ScopeVersion LowCardinality(String) CODEC(ZSTD(1)), ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), + INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, @@ -25,7 +28,7 @@ CREATE TABLE IF NOT EXISTS otel_logs ( INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 ) ENGINE = MergeTree() -TTL toDateTime("Timestamp") + toIntervalDay(180) -PARTITION BY toDate(Timestamp) -ORDER BY (ServiceName, SeverityText, toUnixTimestamp(Timestamp), TraceId) -SETTINGS index_granularity=8192, ttl_only_drop_parts = 1; +PARTITION BY toYYYYMM(TimestampDate) +ORDER BY (ServiceName, TimestampDate, TimestampTime) +TTL TimestampTime + toIntervalDay(180) +SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; diff --git a/exporter/clickhouseexporter/exporter_logs.go b/exporter/clickhouseexporter/exporter_logs.go index b466931a20a0..69707ca80fe2 100644 --- a/exporter/clickhouseexporter/exporter_logs.go +++ b/exporter/clickhouseexporter/exporter_logs.go @@ -133,34 +133,37 @@ const ( // language=ClickHouse SQL createLogsTableSQL = ` CREATE TABLE IF NOT EXISTS %s %s ( - Timestamp DateTime64(9) CODEC(Delta, ZSTD(1)), - TraceId String CODEC(ZSTD(1)), - SpanId String CODEC(ZSTD(1)), - TraceFlags UInt32 CODEC(ZSTD(1)), - SeverityText LowCardinality(String) CODEC(ZSTD(1)), - SeverityNumber Int32 CODEC(ZSTD(1)), - ServiceName LowCardinality(String) CODEC(ZSTD(1)), - Body String CODEC(ZSTD(1)), - ResourceSchemaUrl String CODEC(ZSTD(1)), - ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), - ScopeSchemaUrl String CODEC(ZSTD(1)), - ScopeName String CODEC(ZSTD(1)), - ScopeVersion String CODEC(ZSTD(1)), - ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), - LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), - INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1, - INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 + Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)), + TimestampDate Date DEFAULT toDate(Timestamp), + TimestampTime DateTime DEFAULT toDateTime(Timestamp), + TraceId String CODEC(ZSTD(1)), + SpanId String CODEC(ZSTD(1)), + TraceFlags UInt8, + SeverityText LowCardinality(String) CODEC(ZSTD(1)), + SeverityNumber UInt8, + ServiceName LowCardinality(String) CODEC(ZSTD(1)), + Body String CODEC(ZSTD(1)), + ResourceSchemaUrl LowCardinality(String) CODEC(ZSTD(1)), + ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), + ScopeSchemaUrl LowCardinality(String) CODEC(ZSTD(1)), + ScopeName String CODEC(ZSTD(1)), + ScopeVersion LowCardinality(String) CODEC(ZSTD(1)), + ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), + LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), + + INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, + INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 ) ENGINE = %s +PARTITION BY toYYYYMM(TimestampDate) +ORDER BY (ServiceName, TimestampDate, TimestampTime) %s -PARTITION BY toDate(Timestamp) -ORDER BY (ServiceName, SeverityText, toUnixTimestamp(Timestamp), TraceId) -SETTINGS index_granularity=8192, ttl_only_drop_parts = 1; +SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; ` // language=ClickHouse SQL insertLogsSQLTemplate = `INSERT INTO %s ( @@ -238,7 +241,7 @@ func createLogsTable(ctx context.Context, cfg *Config, db *sql.DB) error { } func renderCreateLogsTableSQL(cfg *Config) string { - ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Timestamp") + ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "TimestampTime") return fmt.Sprintf(createLogsTableSQL, cfg.LogsTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr) } diff --git a/exporter/clickhouseexporter/exporter_metrics.go b/exporter/clickhouseexporter/exporter_metrics.go index 0f0dbc02537f..b4825565100f 100644 --- a/exporter/clickhouseexporter/exporter_metrics.go +++ b/exporter/clickhouseexporter/exporter_metrics.go @@ -47,7 +47,7 @@ func (e *metricsExporter) start(ctx context.Context, _ component.Host) error { return err } - ttlExpr := generateTTLExpr(e.cfg.TTLDays, e.cfg.TTL, "TimeUnix") + ttlExpr := generateTTLExpr(e.cfg.TTLDays, e.cfg.TTL, "toDateTime(TimeUnix)") return internal.NewMetricsTable(ctx, e.cfg.MetricsTableName, e.cfg.ClusterString(), e.cfg.TableEngineString(), ttlExpr, e.client) } diff --git a/exporter/clickhouseexporter/exporter_traces.go b/exporter/clickhouseexporter/exporter_traces.go index acbb7b8fa1c1..72e8e6219ca5 100644 --- a/exporter/clickhouseexporter/exporter_traces.go +++ b/exporter/clickhouseexporter/exporter_traces.go @@ -295,12 +295,12 @@ func renderInsertTracesSQL(cfg *Config) string { } func renderCreateTracesTableSQL(cfg *Config) string { - ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Timestamp") + ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "toDateTime(Timestamp)") return fmt.Sprintf(createTracesTableSQL, cfg.TracesTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr) } func renderCreateTraceIDTsTableSQL(cfg *Config) string { - ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Start") + ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "toDateTime(Start)") return fmt.Sprintf(createTraceIDTsTableSQL, cfg.TracesTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr) } diff --git a/exporter/clickhouseexporter/factory.go b/exporter/clickhouseexporter/factory.go index d9d283247bf0..44b9d6ffdc10 100644 --- a/exporter/clickhouseexporter/factory.go +++ b/exporter/clickhouseexporter/factory.go @@ -126,19 +126,19 @@ func createMetricExporter( func generateTTLExpr(ttlDays uint, ttl time.Duration, timeField string) string { if ttlDays > 0 { - return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalDay(%d)`, timeField, ttlDays) + return fmt.Sprintf(`TTL %s + toIntervalDay(%d)`, timeField, ttlDays) } if ttl > 0 { switch { case ttl%(24*time.Hour) == 0: - return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalDay(%d)`, timeField, ttl/(24*time.Hour)) + return fmt.Sprintf(`TTL %s + toIntervalDay(%d)`, timeField, ttl/(24*time.Hour)) case ttl%(time.Hour) == 0: - return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalHour(%d)`, timeField, ttl/time.Hour) + return fmt.Sprintf(`TTL %s + toIntervalHour(%d)`, timeField, ttl/time.Hour) case ttl%(time.Minute) == 0: - return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalMinute(%d)`, timeField, ttl/time.Minute) + return fmt.Sprintf(`TTL %s + toIntervalMinute(%d)`, timeField, ttl/time.Minute) default: - return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalSecond(%d)`, timeField, ttl/time.Second) + return fmt.Sprintf(`TTL %s + toIntervalSecond(%d)`, timeField, ttl/time.Second) } } return "" diff --git a/exporter/clickhouseexporter/integration_test.go b/exporter/clickhouseexporter/integration_test.go index 258465395c71..2d15cb6a780c 100644 --- a/exporter/clickhouseexporter/integration_test.go +++ b/exporter/clickhouseexporter/integration_test.go @@ -95,6 +95,8 @@ func verifyExportLog(t *testing.T, logExporter *logsExporter) { type log struct { Timestamp string `db:"Timestamp"` + TimestampDate string `db:"TimestampDate"` + TimestampTime string `db:"TimestampTime"` TraceID string `db:"TraceId"` SpanID string `db:"SpanId"` TraceFlags uint32 `db:"TraceFlags"` @@ -115,6 +117,8 @@ func verifyExportLog(t *testing.T, logExporter *logsExporter) { expectLog := log{ Timestamp: "2023-12-25T09:53:49Z", + TimestampDate: "2023-12-25T00:00:00Z", + TimestampTime: "2023-12-25T09:53:49Z", TraceID: "01020300000000000000000000000000", SpanID: "0102030000000000", SeverityText: "error",