Skip to content

Commit

Permalink
feat: fuzzy table data disk cache key reload (#15566)
Browse files Browse the repository at this point in the history
* feat: fuzzy table data disk cache key reload

During query node restart, if the config item
`data_cache_key_reload_policy` is set to "fuzzy", disk cache keys will
be reloaded from the cache directory instead of directly removing
previous cache data. This means that the cache data existing before the
restart will not be deleted.

Note that during the reloading of cache keys, cache capacity will NOT
be checked. Therefore, if `cache.disk.max_bytes` is decreased between
restarts, no cached items on disk will be removed immediately. Instead,
items will be removed when the first new item is put into the cache.

New config item introduced:

~~~
[cache]

Policy of data cache key reloading:

- Available options: [reset|fuzzy]
  - "reset": remove previous data cache during restart
  - "fuzzy": reload cache keys from cache dir, retaining the cache data
             that existed before the restart

data_cache_key_reload_policy = "reset"
~~~

* Update src/query/storages/common/cache/src/providers/disk_cache.rs

Co-authored-by: Bohu <[email protected]>

* cargo fmt

* parallel deletion

* cleanup

---------

Co-authored-by: Bohu <[email protected]>
  • Loading branch information
dantengsky and BohuTANG authored May 19, 2024
1 parent ef4fe8e commit 6a89aa1
Show file tree
Hide file tree
Showing 10 changed files with 290 additions and 26 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 46 additions & 2 deletions src/query/config/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ use super::inner::QueryConfig as InnerQueryConfig;
use crate::background_config::BackgroundConfig;
use crate::DATABEND_COMMIT_VERSION;

// FIXME: too much boilerplate here

const CATALOG_HIVE: &str = "hive";

/// Config for `query`.
Expand Down Expand Up @@ -2806,6 +2804,15 @@ pub struct CacheConfig {
)]
pub data_cache_storage: CacheStorageTypeConfig,

/// Policy of disk cache restart
#[clap(
long = "cache-data-cache-key-reload-policy",
value_name = "VALUE",
value_enum,
default_value_t
)]
pub data_cache_key_reload_policy: DiskCacheKeyReloadPolicy,

/// Max size of external cache population queue length
///
/// the items being queued reference table column raw data, which are
Expand Down Expand Up @@ -2890,6 +2897,22 @@ impl Default for CacheStorageTypeConfig {
}
}

#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)]
#[serde(rename_all = "lowercase")]
pub enum DiskCacheKeyReloadPolicy {
// remove all the disk cache during restart
Reset,
// recovery the cache keys during restart,
// but cache capacity will not be checked
Fuzzy,
}

impl Default for DiskCacheKeyReloadPolicy {
fn default() -> Self {
Self::Reset
}
}

#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)]
#[serde(default, deny_unknown_fields)]
pub struct DiskCacheConfig {
Expand Down Expand Up @@ -2991,6 +3014,7 @@ mod cache_config_converters {
table_data_cache_population_queue_size: value
.table_data_cache_population_queue_size,
disk_cache_config: value.disk_cache_config.try_into()?,
data_cache_key_reload_policy: value.data_cache_key_reload_policy.try_into()?,
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio,
})
Expand All @@ -3013,6 +3037,7 @@ mod cache_config_converters {
inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio,
table_prune_partitions_count: value.table_prune_partitions_count,
data_cache_storage: value.data_cache_storage.into(),
data_cache_key_reload_policy: value.data_cache_key_reload_policy.into(),
table_data_cache_population_queue_size: value
.table_data_cache_population_queue_size,
disk_cache_config: value.disk_cache_config.into(),
Expand Down Expand Up @@ -3060,4 +3085,23 @@ mod cache_config_converters {
}
}
}

impl TryFrom<DiskCacheKeyReloadPolicy> for inner::DiskCacheKeyReloadPolicy {
type Error = ErrorCode;
fn try_from(value: DiskCacheKeyReloadPolicy) -> std::result::Result<Self, Self::Error> {
Ok(match value {
DiskCacheKeyReloadPolicy::Reset => inner::DiskCacheKeyReloadPolicy::Reset,
DiskCacheKeyReloadPolicy::Fuzzy => inner::DiskCacheKeyReloadPolicy::Fuzzy,
})
}
}

impl From<inner::DiskCacheKeyReloadPolicy> for DiskCacheKeyReloadPolicy {
fn from(value: inner::DiskCacheKeyReloadPolicy) -> Self {
match value {
inner::DiskCacheKeyReloadPolicy::Reset => DiskCacheKeyReloadPolicy::Reset,
inner::DiskCacheKeyReloadPolicy::Fuzzy => DiskCacheKeyReloadPolicy::Fuzzy,
}
}
}
}
28 changes: 27 additions & 1 deletion src/query/config/src/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,9 @@ pub struct CacheConfig {
/// Storage that hold the raw data caches
pub disk_cache_config: DiskCacheConfig,

/// Policy of reloading disk cache keys
pub data_cache_key_reload_policy: DiskCacheKeyReloadPolicy,

/// Max size of in memory table column object cache. By default it is 0 (disabled)
///
/// CAUTION: The cache items are deserialized table column objects, may take a lot of memory.
Expand All @@ -589,7 +592,6 @@ pub struct CacheConfig {
pub enum CacheStorageTypeConfig {
None,
Disk,
// Redis,
}

impl Default for CacheStorageTypeConfig {
Expand All @@ -598,6 +600,20 @@ impl Default for CacheStorageTypeConfig {
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DiskCacheKeyReloadPolicy {
// remove all the disk cache during restart
Reset,
// recovery the cache keys during restart,
// but cache capacity will not be checked
Fuzzy,
}
impl Default for DiskCacheKeyReloadPolicy {
fn default() -> Self {
Self::Reset
}
}

impl ToString for CacheStorageTypeConfig {
fn to_string(&self) -> String {
match self {
Expand All @@ -607,6 +623,15 @@ impl ToString for CacheStorageTypeConfig {
}
}

impl ToString for DiskCacheKeyReloadPolicy {
fn to_string(&self) -> String {
match self {
DiskCacheKeyReloadPolicy::Reset => "reset".to_string(),
DiskCacheKeyReloadPolicy::Fuzzy => "fuzzy".to_string(),
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct DiskCacheConfig {
/// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it.
Expand Down Expand Up @@ -643,6 +668,7 @@ impl Default for CacheConfig {
data_cache_storage: Default::default(),
table_data_cache_population_queue_size: 0,
disk_cache_config: Default::default(),
data_cache_key_reload_policy: Default::default(),
table_data_deserialized_data_bytes: 0,
table_data_deserialized_memory_ratio: 0,
}
Expand Down
1 change: 1 addition & 0 deletions src/query/config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub use inner::CacheConfig;
pub use inner::CacheStorageTypeConfig as CacheStorageTypeInnerConfig;
pub use inner::CatalogConfig;
pub use inner::CatalogHiveConfig;
pub use inner::DiskCacheKeyReloadPolicy;
pub use inner::InnerConfig;
pub use inner::ThriftProtocol;
pub use version::DATABEND_COMMIT_VERSION;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
+-----------+--------------------------------------------+----------------------------------------------------------------+----------+
| Column 0 | Column 1 | Column 2 | Column 3 |
+-----------+--------------------------------------------+----------------------------------------------------------------+----------+
| 'cache' | 'data_cache_key_reload_policy' | 'reset' | '' |
| 'cache' | 'data_cache_storage' | 'none' | '' |
| 'cache' | 'disk.max_bytes' | '21474836480' | '' |
| 'cache' | 'disk.path' | './.databend/_cache' | '' |
Expand Down
1 change: 1 addition & 0 deletions src/query/storages/common/cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ crossbeam-channel = "0.5.6"
hex = "0.4.3"
log = { workspace = true }
parking_lot = { workspace = true }
rayon = "1.9.0"
siphasher = "0.3.10"

[dev-dependencies]
Expand Down
Loading

0 comments on commit 6a89aa1

Please sign in to comment.