Enable setting default chunk encoding by config file (#2077)

ChinYing-Li · ChinYing-Li · commit 66da47553cb7 · 2021-04-20T17:14:44.000+08:00
Signed-off-by: ChinYing-Li &lt;chinying.li@mail.utoronto.ca&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -43,6 +43,7 @@
 * [ENHANCEMENT] Block Storage Ingester: `/flush` now accepts two new parameters: `tenant` to specify tenant to flush and `wait=true` to make call synchronous. Multiple tenants can be specified by repeating `tenant` parameter. If no `tenant` is specified, all tenants are flushed, as before. #4073
 * [ENHANCEMENT] Alertmanager: validate configured `-alertmanager.web.external-url` and fail if ends with `/`. #4081
 * [ENHANCEMENT] Allow configuration of Cassandra's host selection policy. #4069
+* [ENHANCEMENT] Ingester: enable setting default chunk encoding by config file. #4086
 * [BUGFIX] Ruler-API: fix bug where `/api/v1/rules/<namespace>/<group_name>` endpoint return `400` instead of `404`. #4013
 * [BUGFIX] Distributor: reverted changes done to rate limiting in #3825. #3948
 * [BUGFIX] Ingester: Fix race condition when opening and closing tsdb concurrently. #3959
diff --git a/development/tsdb-blocks-storage-s3-gossip/config/cortex.yaml b/development/tsdb-blocks-storage-s3-gossip/config/cortex.yaml
@@ -116,6 +116,9 @@ alertmanager_storage:
 storage:
   engine: blocks
 
+encoding:
+  chunk_encoding: big-chunk
+
 compactor:
   compaction_interval: 30s
   data_dir:            /tmp/cortex-compactor
diff --git a/development/tsdb-blocks-storage-s3-single-binary/config/cortex.yaml b/development/tsdb-blocks-storage-s3-single-binary/config/cortex.yaml
@@ -60,6 +60,9 @@ blocks_storage:
 storage:
   engine: blocks
 
+encoding:
+  chunk_encoding: big-chunk
+
 ruler:
   enable_api: true
   enable_sharding: true
diff --git a/development/tsdb-blocks-storage-s3/config/cortex.yaml b/development/tsdb-blocks-storage-s3/config/cortex.yaml
@@ -114,6 +114,9 @@ alertmanager_storage:
 storage:
   engine: blocks
 
+encoding:
+  chunk_encoding: big-chunk
+
 compactor:
   compaction_interval: 30s
   data_dir:            /tmp/cortex-compactor
diff --git a/development/tsdb-blocks-storage-swift-single-binary/config/cortex.yaml b/development/tsdb-blocks-storage-swift-single-binary/config/cortex.yaml
@@ -61,6 +61,9 @@ blocks_storage:
 storage:
   engine: blocks
 
+encoding:
+  chunk_encoding: big-chunk
+
 ruler:
   enable_api: true
   enable_sharding: true
diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -130,6 +130,11 @@ api:
 # The table_manager_config configures the Cortex table-manager.
 [table_manager: <table_manager_config>]
 
+encoding:
+  # Encoding version to use for chunks.
+  # CLI flag: -encoding.chunk-encoding
+  [chunk_encoding: <string> | default = "big-chunk"]
+
 # The blocks_storage_config configures the blocks storage.
 [blocks_storage: <blocks_storage_config>]
 
diff --git a/docs/guides/capacity-planning.md b/docs/guides/capacity-planning.md
@@ -51,8 +51,12 @@ Now, some rules of thumb:
  2. Each million series (including churn) consumes 15GB of chunk
  storage and 4GB of index, per day (so multiply by the retention
  period).
- 3. Each 100,000 samples/sec arriving takes 1 CPU in distributors.
- Distributors don't need much RAM.
+ 3. The distributors CPU utilization depends on the specific Cortex cluster
+    setup, while they don't need much RAM. Typically, distributors are capable
+    to process between 20,000 and 100,000 samples/sec with 1 CPU core. It's also
+    highly recommended to configure Prometheus `max_samples_per_send` to 1,000
+    samples, in order to reduce the distributors CPU utilization given the same
+    total samples/sec throughput.
 
 If you turn on compression between distributors and ingesters (for
 example to save on inter-zone bandwidth charges at AWS/GCP) they will use
diff --git a/pkg/chunk/encoding/factory.go b/pkg/chunk/encoding/factory.go
@@ -11,7 +11,9 @@ import (
 type Encoding byte
 
 // Config configures the behaviour of chunk encoding
-type Config struct{}
+type Config struct {
+	EncodingName string `yaml:"chunk_encoding"`
+}
 
 var (
 	// DefaultEncoding exported for use in unit tests elsewhere
@@ -20,8 +22,8 @@ var (
 )
 
 // RegisterFlags registers configuration settings.
-func (Config) RegisterFlags(f *flag.FlagSet) {
-	f.Var(&DefaultEncoding, "ingester.chunk-encoding", "Encoding version to use for chunks.")
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.EncodingName, "encoding.chunk-encoding", "big-chunk", "Encoding version to use for chunks.")
 	f.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
 }
 
@@ -63,25 +65,25 @@ type encoding struct {
 
 var encodings = map[Encoding]encoding{
 	DoubleDelta: {
-		Name: "DoubleDelta",
+		Name: "double-delta",
 		New: func() Chunk {
 			return newDoubleDeltaEncodedChunk(d1, d0, true, ChunkLen)
 		},
 	},
 	Varbit: {
-		Name: "Varbit",
+		Name: "varbit",
 		New: func() Chunk {
 			return newVarbitChunk(varbitZeroEncoding)
 		},
 	},
 	Bigchunk: {
-		Name: "Bigchunk",
+		Name: "big-chunk",
 		New: func() Chunk {
 			return newBigchunk()
 		},
 	},
 	PrometheusXorChunk: {
-		Name: "PrometheusXorChunk",
+		Name: "prometheus-xor-chunk",
 		New: func() Chunk {
 			return newPrometheusXorChunk()
 		},
@@ -90,7 +92,12 @@ var encodings = map[Encoding]encoding{
 
 // Set implements flag.Value.
 func (e *Encoding) Set(s string) error {
-	// First see if the name was given
+	// If nothing is provided, keep the original value
+	if s == "" {
+		return nil
+	}
+
+	// Then see if the name was given
 	for k, v := range encodings {
 		if s == v.Name {
 			*e = k
diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go
@@ -108,7 +108,7 @@ type Config struct {
 	Frontend         frontend.CombinedFrontendConfig `yaml:"frontend"`
 	QueryRange       queryrange.Config               `yaml:"query_range"`
 	TableManager     chunk.TableManagerConfig        `yaml:"table_manager"`
-	Encoding         encoding.Config                 `yaml:"-"` // No yaml for this, it only works with flags.
+	Encoding         encoding.Config                 `yaml:"encoding"`
 	BlocksStorage    tsdb.BlocksStorageConfig        `yaml:"blocks_storage"`
 	Compactor        compactor.Config                `yaml:"compactor"`
 	StoreGateway     storegateway.Config             `yaml:"store_gateway"`
@@ -355,6 +355,11 @@ func New(cfg Config) (*Cortex, error) {
 		Cfg: cfg,
 	}
 
+	// Set default chunk encoding
+	if err := encoding.DefaultEncoding.Set(cfg.Encoding.EncodingName); err != nil {
+		return nil, err
+	}
+
 	cortex.setupThanosTracing()
 
 	if err := cortex.setupModuleManager(); err != nil {