Skip to content

Commit 21e4b55

Browse files
Make quantity field human readable (TraceMachina#891)
Add support for human readable quantity field in the different json config, i.e, 10KB -> 10000. This implementation still allows for the field being pure unsigned integers, so, it can be seen as offering an alternative configuration. Co-authored-by: Marcus Eagan <[email protected]>
1 parent 77b2c33 commit 21e4b55

File tree

12 files changed

+464
-23
lines changed

12 files changed

+464
-23
lines changed

Cargo.lock

+260-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nativelink-config/BUILD.bazel

+19
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ load(
33
"rust_doc",
44
"rust_doc_test",
55
"rust_library",
6+
"rust_test_suite",
67
)
78

89
rust_library(
@@ -16,11 +17,29 @@ rust_library(
1617
],
1718
visibility = ["//visibility:public"],
1819
deps = [
20+
"@crates//:byte-unit",
21+
"@crates//:humantime",
1922
"@crates//:serde",
2023
"@crates//:shellexpand",
2124
],
2225
)
2326

27+
rust_test_suite(
28+
name = "integration",
29+
timeout = "short",
30+
srcs = [
31+
"tests/deserialization_test.rs",
32+
],
33+
deps = [
34+
"//nativelink-config",
35+
"//nativelink-error",
36+
"@crates//:byte-unit",
37+
"@crates//:humantime",
38+
"@crates//:serde",
39+
"@crates//:serde_json5",
40+
],
41+
)
42+
2443
rust_doc(
2544
name = "docs",
2645
crate = ":nativelink-config",

nativelink-config/Cargo.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,8 @@ version = "0.4.0"
44
edition = "2021"
55

66
[dependencies]
7-
serde = { version = "1.0.201", features = ["derive"] }
7+
byte-unit = "5.1.4"
8+
humantime = "2.1.0"
9+
serde = { version = "1.0.198", features = ["derive"] }
10+
serde_json5 = "0.1.0"
811
shellexpand = "3.1.0"

nativelink-config/src/cas_server.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use serde::Deserialize;
1818

1919
use crate::schedulers::SchedulerConfig;
2020
use crate::serde_utils::{
21+
convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
2122
convert_numeric_with_shellexpand, convert_optional_numeric_with_shellexpand,
2223
convert_optional_string_with_shellexpand, convert_string_with_shellexpand,
2324
convert_vec_string_with_shellexpand,
@@ -139,7 +140,7 @@ pub struct ByteStreamConfig {
139140
/// 16KiB - 64KiB is optimal.
140141
///
141142
/// Defaults: 64KiB
142-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
143+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
143144
pub max_bytes_per_stream: usize,
144145

145146
/// In the event a client disconnects while uploading a blob, we will hold
@@ -148,7 +149,7 @@ pub struct ByteStreamConfig {
148149
/// the same blob.
149150
///
150151
/// Defaults: 10 (seconds)
151-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
152+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
152153
pub persist_stream_on_disconnect_timeout: usize,
153154
}
154155

@@ -557,7 +558,7 @@ pub struct LocalWorkerConfig {
557558
/// longer than this time limit, the task will be rejected. Value in seconds.
558559
///
559560
/// Default: 1200 (seconds / 20 mins)
560-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
561+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
561562
pub max_action_timeout: usize,
562563

563564
/// If timeout is handled in `entrypoint` or another wrapper script.
@@ -667,7 +668,7 @@ pub struct GlobalConfig {
667668
/// a new file descriptor because the limit has been reached.
668669
///
669670
/// Default: 1000 (1 second)
670-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
671+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
671672
pub idle_file_descriptor_timeout_millis: u64,
672673

673674
/// This flag can be used to prevent metrics from being collected at runtime.
@@ -695,7 +696,7 @@ pub struct GlobalConfig {
695696
/// digest.
696697
///
697698
/// Default: 1024*1024 (1MiB)
698-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
699+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
699700
pub default_digest_size_health_check: usize,
700701
}
701702

nativelink-config/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@
1414

1515
pub mod cas_server;
1616
pub mod schedulers;
17-
mod serde_utils;
17+
pub mod serde_utils;
1818
pub mod stores;

nativelink-config/src/schedulers.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::collections::HashMap;
1616

1717
use serde::Deserialize;
1818

19-
use crate::serde_utils::convert_numeric_with_shellexpand;
19+
use crate::serde_utils::{convert_duration_with_shellexpand, convert_numeric_with_shellexpand};
2020
use crate::stores::{GrpcEndpoint, Retry, StoreRefName};
2121

2222
#[allow(non_camel_case_types)]
@@ -97,13 +97,13 @@ pub struct SimpleScheduler {
9797
/// The amount of time to retain completed actions in memory for in case
9898
/// a WaitExecution is called after the action has completed.
9999
/// Default: 60 (seconds)
100-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
100+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
101101
pub retain_completed_for_s: u64,
102102

103103
/// Remove workers from pool once the worker has not responded in this
104104
/// amount of time in seconds.
105105
/// Default: 5 (seconds)
106-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
106+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
107107
pub worker_timeout_s: u64,
108108

109109
/// If a job returns an internal error or times out this many times when

nativelink-config/src/serde_utils.rs

+80
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ use std::fmt;
1616
use std::marker::PhantomData;
1717
use std::str::FromStr;
1818

19+
use byte_unit::Byte;
20+
use humantime::parse_duration;
1921
use serde::{de, Deserialize, Deserializer};
2022

2123
/// Helper for serde macro so you can use shellexpand variables in the json configuration
@@ -138,3 +140,81 @@ pub fn convert_optional_string_with_shellexpand<'de, D: Deserializer<'de>>(
138140
Ok(None)
139141
}
140142
}
143+
144+
pub fn convert_data_size_with_shellexpand<'de, D, T, E>(deserializer: D) -> Result<T, D::Error>
145+
where
146+
D: Deserializer<'de>,
147+
E: fmt::Display,
148+
T: TryFrom<i64> + FromStr<Err = E>,
149+
<T as TryFrom<i64>>::Error: fmt::Display,
150+
{
151+
// define a visitor that deserializes
152+
// `ActualData` encoded as json within a string
153+
struct USizeVisitor<T: TryFrom<i64>>(PhantomData<T>);
154+
155+
impl<'de, T, FromStrErr> de::Visitor<'de> for USizeVisitor<T>
156+
where
157+
FromStrErr: fmt::Display,
158+
T: TryFrom<i64> + FromStr<Err = FromStrErr>,
159+
<T as TryFrom<i64>>::Error: fmt::Display,
160+
{
161+
type Value = T;
162+
163+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
164+
formatter.write_str("a string containing json data")
165+
}
166+
167+
fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
168+
v.try_into().map_err(de::Error::custom)
169+
}
170+
171+
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
172+
let expanded = (*shellexpand::env(v).map_err(de::Error::custom)?).to_string();
173+
let byte_size = Byte::parse_str(expanded, true).map_err(de::Error::custom)?;
174+
let byte_size_u128 = byte_size.as_u128();
175+
T::try_from(byte_size_u128.try_into().map_err(de::Error::custom)?)
176+
.map_err(de::Error::custom)
177+
}
178+
}
179+
180+
deserializer.deserialize_any(USizeVisitor::<T>(PhantomData::<T> {}))
181+
}
182+
183+
pub fn convert_duration_with_shellexpand<'de, D, T, E>(deserializer: D) -> Result<T, D::Error>
184+
where
185+
D: Deserializer<'de>,
186+
E: fmt::Display,
187+
T: TryFrom<i64> + FromStr<Err = E>,
188+
<T as TryFrom<i64>>::Error: fmt::Display,
189+
{
190+
// define a visitor that deserializes
191+
// `ActualData` encoded as json within a string
192+
struct USizeVisitor<T: TryFrom<i64>>(PhantomData<T>);
193+
194+
impl<'de, T, FromStrErr> de::Visitor<'de> for USizeVisitor<T>
195+
where
196+
FromStrErr: fmt::Display,
197+
T: TryFrom<i64> + FromStr<Err = FromStrErr>,
198+
<T as TryFrom<i64>>::Error: fmt::Display,
199+
{
200+
type Value = T;
201+
202+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
203+
formatter.write_str("a string containing json data")
204+
}
205+
206+
fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
207+
v.try_into().map_err(de::Error::custom)
208+
}
209+
210+
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
211+
let expanded = (*shellexpand::env(v).map_err(de::Error::custom)?).to_string();
212+
let duration = parse_duration(&expanded).map_err(de::Error::custom)?;
213+
let duration_secs = duration.as_secs();
214+
T::try_from(duration_secs.try_into().map_err(de::Error::custom)?)
215+
.map_err(de::Error::custom)
216+
}
217+
}
218+
219+
deserializer.deserialize_any(USizeVisitor::<T>(PhantomData::<T> {}))
220+
}

nativelink-config/src/stores.rs

+12-11
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use serde::{Deserialize, Serialize};
1616

1717
use crate::serde_utils::{
18+
convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
1819
convert_numeric_with_shellexpand, convert_optional_string_with_shellexpand,
1920
convert_string_with_shellexpand, convert_vec_string_with_shellexpand,
2021
};
@@ -203,7 +204,7 @@ pub struct ShardStore {
203204
#[serde(deny_unknown_fields)]
204205
pub struct SizePartitioningStore {
205206
/// Size to partition the data on.
206-
#[serde(deserialize_with = "convert_numeric_with_shellexpand")]
207+
#[serde(deserialize_with = "convert_data_size_with_shellexpand")]
207208
pub size: u64,
208209

209210
/// Store to send data when object is < (less than) size.
@@ -243,7 +244,7 @@ pub struct FilesystemStore {
243244
/// Buffer size to use when reading files. Generally this should be left
244245
/// to the default value except for testing.
245246
/// Default: 32k.
246-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
247+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
247248
pub read_buffer_size: u32,
248249

249250
/// Policy used to evict items out of the store. Failure to set this
@@ -255,7 +256,7 @@ pub struct FilesystemStore {
255256
/// value is used to determine an entry's actual size on disk consumed
256257
/// For a 4KB block size filesystem, a 1B file actually consumes 4KB
257258
/// Default: 4096
258-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
259+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
259260
pub block_size: u64,
260261
}
261262

@@ -297,7 +298,7 @@ pub struct DedupStore {
297298
/// deciding where to partition the data.
298299
///
299300
/// Default: 65536 (64k)
300-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
301+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
301302
pub min_size: u32,
302303

303304
/// A best-effort attempt will be made to keep the average size
@@ -311,13 +312,13 @@ pub struct DedupStore {
311312
/// details.
312313
///
313314
/// Default: 262144 (256k)
314-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
315+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
315316
pub normal_size: u32,
316317

317318
/// Maximum size a chunk is allowed to be.
318319
///
319320
/// Default: 524288 (512k)
320-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
321+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
321322
pub max_size: u32,
322323

323324
/// Due to implementation detail, we want to prefer to download
@@ -396,7 +397,7 @@ pub struct Lz4Config {
396397
/// compression ratios.
397398
///
398399
/// Default: 65536 (64k).
399-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
400+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
400401
pub block_size: u32,
401402

402403
/// Maximum size allowed to attempt to deserialize data into.
@@ -407,7 +408,7 @@ pub struct Lz4Config {
407408
/// allow you to specify the maximum that we'll attempt deserialize.
408409
///
409410
/// Default: value in `block_size`.
410-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
411+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
411412
pub max_decode_block_size: u32,
412413
}
413414

@@ -447,19 +448,19 @@ pub struct CompressionStore {
447448
pub struct EvictionPolicy {
448449
/// Maximum number of bytes before eviction takes place.
449450
/// Default: 0. Zero means never evict based on size.
450-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
451+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
451452
pub max_bytes: usize,
452453

453454
/// When eviction starts based on hitting max_bytes, continue until
454455
/// max_bytes - evict_bytes is met to create a low watermark. This stops
455456
/// operations from thrashing when the store is close to the limit.
456457
/// Default: 0
457-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
458+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
458459
pub evict_bytes: usize,
459460

460461
/// Maximum number of seconds for an entry to live before an eviction.
461462
/// Default: 0. Zero means never evict based on time.
462-
#[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
463+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
463464
pub max_seconds: u32,
464465

465466
/// Maximum size of the store before an eviction takes place.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright 2023 The NativeLink Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use nativelink_config::serde_utils::{
16+
convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
17+
};
18+
use serde::Deserialize;
19+
20+
#[derive(Deserialize)]
21+
struct DurationEntity {
22+
#[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
23+
duration: usize,
24+
}
25+
26+
#[derive(Deserialize)]
27+
struct DataSizeEntity {
28+
#[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
29+
data_size: usize,
30+
}
31+
32+
#[cfg(test)]
33+
mod tests {
34+
use super::*;
35+
36+
#[test]
37+
fn test_duration_human_readable_deserialize() {
38+
let example = r#"
39+
{"duration": "1m 10s"}
40+
"#;
41+
let deserialized: DurationEntity = serde_json5::from_str(example).unwrap();
42+
assert_eq!(deserialized.duration, 70);
43+
}
44+
45+
#[test]
46+
fn test_duration_usize_deserialize() {
47+
let example = r#"
48+
{"duration": 10}
49+
"#;
50+
let deserialized: DurationEntity = serde_json5::from_str(example).unwrap();
51+
assert_eq!(deserialized.duration, 10);
52+
}
53+
54+
#[test]
55+
fn test_data_size_unit_deserialize() {
56+
let example = r#"
57+
{"data_size": "1KiB"}
58+
"#;
59+
let deserialized: DataSizeEntity = serde_json5::from_str(example).unwrap();
60+
assert_eq!(deserialized.data_size, 1024);
61+
}
62+
63+
#[test]
64+
fn test_data_size_usize_deserialize() {
65+
let example = r#"
66+
{"data_size": 10}
67+
"#;
68+
let deserialized: DataSizeEntity = serde_json5::from_str(example).unwrap();
69+
assert_eq!(deserialized.data_size, 10);
70+
}
71+
}

0 commit comments

Comments
 (0)