config/datanode.example.toml

## The running mode of the datanode. It can be `standalone` or `distributed`.
mode = "standalone"

## The datanode identifier and should be unique in the cluster.
## @toml2docs:none-default
node_id = 42

## Start services after regions have obtained leases.
## It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
require_lease_before_startup = false

## Initialize all regions in the background during the startup.
## By default, it provides services after all regions have been initialized.
init_regions_in_background = false

## Parallelism of initializing regions.
init_regions_parallelism = 16

## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0

## Deprecated, use `grpc.addr` instead.
## @toml2docs:none-default
rpc_addr = "127.0.0.1:3001"

## Deprecated, use `grpc.hostname` instead.
## @toml2docs:none-default
rpc_hostname = "127.0.0.1"

## Deprecated, use `grpc.runtime_size` instead.
## @toml2docs:none-default
rpc_runtime_size = 8

## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
## @toml2docs:none-default
rpc_max_recv_message_size = "512MB"

## Deprecated, use `grpc.rpc_max_send_message_size` instead.
## @toml2docs:none-default
rpc_max_send_message_size = "512MB"

## Enable telemetry to collect anonymous usage data. Enabled by default.
#+ enable_telemetry = true

## The HTTP server options.
[http]
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "30s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"

## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
addr = "127.0.0.1:3001"
## The hostname advertised to the metasrv,
## and used for connections from outside the host
hostname = "127.0.0.1:3001"
## The number of server worker threads.
runtime_size = 8
## The maximum receive message size for gRPC server.
max_recv_message_size = "512MB"
## The maximum send message size for gRPC server.
max_send_message_size = "512MB"

## gRPC server TLS options, see `mysql.tls` section.
[grpc.tls]
## TLS mode.
mode = "disable"

## Certificate file path.
## @toml2docs:none-default
cert_path = ""

## Private key file path.
## @toml2docs:none-default
key_path = ""

## Watch for Certificate and key file change and auto reload.
## For now, gRPC tls config does not support auto reload.
watch = false

## The runtime options.
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
#+ compact_rt_size = 4

## The heartbeat options.
[heartbeat]
## Interval for sending heartbeat messages to the metasrv.
interval = "3s"

## Interval for retrying to send heartbeat messages to the metasrv.
retry_interval = "3s"

## The metasrv client options.
[meta_client]
## The addresses of the metasrv.
metasrv_addrs = ["127.0.0.1:3002"]

## Operation timeout.
timeout = "3s"

## Heartbeat timeout.
heartbeat_timeout = "500ms"

## DDL timeout.
ddl_timeout = "10s"

## Connect server timeout.
connect_timeout = "1s"

## `TCP_NODELAY` option for accepted connections.
tcp_nodelay = true

## The configuration about the cache of the metadata.
metadata_cache_max_capacity = 100000

## TTL of the metadata cache.
metadata_cache_ttl = "10m"

# TTI of the metadata cache.
metadata_cache_tti = "5m"

## The WAL options.
[wal]
## The provider of the WAL.
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
## - `kafka`: it's remote wal that data is stored in Kafka.
provider = "raft_engine"

## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "/tmp/greptimedb/wal"

## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
file_size = "128MB"

## The threshold of the WAL size to trigger a flush.
## **It's only used when the provider is `raft_engine`**.
purge_threshold = "1GB"

## The interval to trigger a flush.
## **It's only used when the provider is `raft_engine`**.
purge_interval = "1m"

## The read batch size.
## **It's only used when the provider is `raft_engine`**.
read_batch_size = 128

## Whether to use sync write.
## **It's only used when the provider is `raft_engine`**.
sync_write = false

## Whether to reuse logically truncated log files.
## **It's only used when the provider is `raft_engine`**.
enable_log_recycle = true

## Whether to pre-create log files on start up.
## **It's only used when the provider is `raft_engine`**.
prefill_log_files = false

## Duration for fsyncing log files.
## **It's only used when the provider is `raft_engine`**.
sync_period = "10s"

## Parallelism during WAL recovery.
recovery_parallelism = 2

## The Kafka broker endpoints.
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]

## The max size of a single producer batch.
## Warning: Kafka has a default limit of 1MB per message in a topic.
## **It's only used when the provider is `kafka`**.
max_batch_bytes = "1MB"

## The consumer wait timeout.
## **It's only used when the provider is `kafka`**.
consumer_wait_timeout = "100ms"

## The initial backoff delay.
## **It's only used when the provider is `kafka`**.
backoff_init = "500ms"

## The maximum backoff delay.
## **It's only used when the provider is `kafka`**.
backoff_max = "10s"

## The exponential backoff rate, i.e. next backoff = base * current backoff.
## **It's only used when the provider is `kafka`**.
backoff_base = 2

## The deadline of retries.
## **It's only used when the provider is `kafka`**.
backoff_deadline = "5mins"

## Whether to enable WAL index creation.
## **It's only used when the provider is `kafka`**.
create_index = true

## The interval for dumping WAL indexes.
## **It's only used when the provider is `kafka`**.
dump_index_interval = "60s"

## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false

# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
# - `PLAIN`
# - `SCRAM-SHA-256`
# - `SCRAM-SHA-512`
# [wal.sasl]
# type = "SCRAM-SHA-512"
# username = "user_kafka"
# password = "secret"

# The Kafka TLS configuration.
# **It's only used when the provider is `kafka`**.
# [wal.tls]
# server_ca_cert_path = "/path/to/server_cert"
# client_cert_path = "/path/to/client_cert"
# client_key_path = "/path/to/key"

# Example of using S3 as the storage.
# [storage]
# type = "S3"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"

# Example of using Oss as the storage.
# [storage]
# type = "Oss"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# access_key_secret = "123456"
# endpoint = "https://oss-cn-hangzhou.aliyuncs.com"

# Example of using Azblob as the storage.
# [storage]
# type = "Azblob"
# container = "greptimedb"
# root = "data"
# account_name = "test"
# account_key = "123456"
# endpoint = "https://greptimedb.blob.core.windows.net"
# sas_token = ""

# Example of using Gcs as the storage.
# [storage]
# type = "Gcs"
# bucket = "greptimedb"
# root = "data"
# scope = "test"
# credential_path = "123456"
# credential = "base64-credential"
# endpoint = "https://storage.googleapis.com"

## The data storage options.
[storage]
## The working home directory.
data_home = "/tmp/greptimedb/"

## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
## - `S3`: the data is stored in the S3 object storage.
## - `Gcs`: the data is stored in the Google Cloud Storage.
## - `Azblob`: the data is stored in the Azure Blob Storage.
## - `Oss`: the data is stored in the Aliyun OSS.
type = "File"

## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
## A local file directory, defaults to `{data_home}`. An empty string means disabling.
## @toml2docs:none-default
#+ cache_path = ""

## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
## @toml2docs:none-default
cache_capacity = "5GiB"

## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
## @toml2docs:none-default
bucket = "greptimedb"

## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
## @toml2docs:none-default
root = "greptimedb"

## The access key id of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3` and `Oss`**.
## @toml2docs:none-default
access_key_id = "test"

## The secret access key of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3`**.
## @toml2docs:none-default
secret_access_key = "test"

## The secret access key of the aliyun account.
## **It's only used when the storage type is `Oss`**.
## @toml2docs:none-default
access_key_secret = "test"

## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
account_name = "test"

## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
account_key = "test"

## The scope of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
scope = "test"

## The credential path of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
credential_path = "test"

## The credential of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## @toml2docs:none-default
credential = "base64-credential"

## The container of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
container = "greptimedb"

## The sas token of the azure account.
## **It's only used when the storage type is `Azblob`**.
## @toml2docs:none-default
sas_token = ""

## The endpoint of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## @toml2docs:none-default
endpoint = "https://s3.amazonaws.com"

## The region of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## @toml2docs:none-default
region = "us-west-2"

## The http client options to the storage.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
[storage.http_client]

## The maximum idle connection per host allowed in the pool.
pool_max_idle_per_host = 1024

## The timeout for only the connect phase of a http client.
connect_timeout = "30s"

## The total request timeout, applied from when the request starts connecting until the response body has finished.
## Also considered a total deadline.
timeout = "30s"

## The timeout for idle sockets being kept-alive.
pool_idle_timeout = "90s"

# Custom storage options
# [[storage.providers]]
# name = "S3"
# type = "S3"
# bucket = "greptimedb"
# root = "data"
# access_key_id = "test"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# [[storage.providers]]
# name = "Gcs"
# type = "Gcs"
# bucket = "greptimedb"
# root = "data"
# scope = "test"
# credential_path = "123456"
# credential = "base64-credential"
# endpoint = "https://storage.googleapis.com"

## The region engine options. You can configure multiple region engines.
[[region_engine]]

## The Mito engine options.
[region_engine.mito]

## Number of region workers.
#+ num_workers = 8

## Request channel size of each worker.
worker_channel_size = 128

## Max batch size for a worker to handle requests.
worker_request_batch_size = 64

## Number of meta action updated to trigger a new checkpoint for the manifest.
manifest_checkpoint_distance = 10

## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false

## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4

## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2

## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8

## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"

## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
## @toml2docs:none-default="Auto"
#+ global_write_buffer_size = "1GB"

## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
## @toml2docs:none-default="Auto"
#+ global_write_buffer_reject_size = "2GB"

## Cache size for SST metadata. Setting it to 0 to disable the cache.
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
## @toml2docs:none-default="Auto"
#+ sst_meta_cache_size = "128MB"

## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## @toml2docs:none-default="Auto"
#+ vector_cache_size = "512MB"

## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/8 of OS memory.
## @toml2docs:none-default="Auto"
#+ page_cache_size = "512MB"

## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"

## Whether to enable the write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
enable_write_cache = false

## File system path for write cache, defaults to `{data_home}`.
write_cache_path = ""

## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
write_cache_size = "5GiB"

## TTL for write cache.
## @toml2docs:none-default
write_cache_ttl = "8h"

## Buffer size for SST writing.
sst_write_buffer_size = "8MB"

## Capacity of the channel to send data from parallel scan tasks to the main task.
parallel_scan_channel_size = 32

## Whether to allow stale WAL entries read during replay.
allow_stale_entries = false

## Minimum time interval between two compactions.
## To align with the old behavior, the default value is 0 (no restrictions).
min_compaction_interval = "0m"

## The options for index in Mito engine.
[region_engine.mito.index]

## Auxiliary directory path for the index in filesystem, used to store intermediate files for
## creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.
## The default name for this directory is `index_intermediate` for backward compatibility.
##
## This path contains two subdirectories:
## - `__intm`: for storing intermediate files used during creating index.
## - `staging`: for storing staging files used during searching index.
aux_path = ""

## The max capacity of the staging directory.
staging_size = "2GB"

## Cache size for inverted index metadata.
metadata_cache_size = "64MiB"

## Cache size for inverted index content.
content_cache_size = "128MiB"

## Page size for inverted index content cache.
content_cache_page_size = "64KiB"

## The options for inverted index in Mito engine.
[region_engine.mito.inverted_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for performing an external sort during index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

## Deprecated, use `region_engine.mito.index.aux_path` instead.
intermediate_path = ""

## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

## The options for bloom filter index in Mito engine.
[region_engine.mito.bloom_filter_index]

## Whether to create the index on flush.
## - `auto`: automatically (default)
## - `disable`: never
create_on_flush = "auto"

## Whether to create the index on compaction.
## - `auto`: automatically (default)
## - `disable`: never
create_on_compaction = "auto"

## Whether to apply the index on query
## - `auto`: automatically (default)
## - `disable`: never
apply_on_query = "auto"

## Memory threshold for the index creation.
## - `auto`: automatically determine the threshold based on the system memory size (default)
## - `unlimited`: no memory limit
## - `[size]` e.g. `64MB`: fixed memory threshold
mem_threshold_on_create = "auto"

[region_engine.mito.memtable]
## Memtable type.
## - `time_series`: time-series memtable
## - `partition_tree`: partition tree memtable (experimental)
type = "time_series"

## The max number of keys in one shard.
## Only available for `partition_tree` memtable.
index_max_keys_per_shard = 8192

## The max rows of data inside the actively writing buffer in one shard.
## Only available for `partition_tree` memtable.
data_freeze_threshold = 32768

## Max dictionary bytes.
## Only available for `partition_tree` memtable.
fork_dictionary_bytes = "1GiB"

[[region_engine]]
## Enable the file engine.
[region_engine.file]

[[region_engine]]
## Metric engine options.
[region_engine.metric]
## Whether to enable the experimental sparse primary key encoding.
experimental_sparse_primary_key_encoding = false

## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"

## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default
level = "info"

## Enable OTLP tracing.
enable_otlp_tracing = false

## The OTLP tracing endpoint.
otlp_endpoint = "http://localhost:4317"

## Whether to append logs to stdout.
append_stdout = true

## The log format. Can be `text`/`json`.
log_format = "text"

## The maximum amount of log files.
max_log_files = 720

## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0

## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false

## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"

## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0

## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]

## whether enable export metrics.
enable = false

## The interval of export metrics.
write_interval = "30s"

## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
## You must create the database before enabling it.
[export_metrics.self_import]
## @toml2docs:none-default
db = "greptime_metrics"

[export_metrics.remote_write]
## The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`.
url = ""

## HTTP headers of Prometheus remote-write carry.
headers = { }

## The tracing options. Only effect when compiled with `tokio-console` feature.
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
#+ tokio_console_addr = "127.0.0.1"